def test_learning_factorization(verbose=False): n = 9 m = 8 y_mat = toy_factorization_problem(n=n, m=m, rk=3, noise=1) rank = 2 batch_size = n * m tuples = mat2tuples(y_mat) tuple_iterable = data_to_batches(tuples, minibatch_size=batch_size) # tuple_iterable = positive_and_negative_tuple_sampler(mat2tuples(y_mat), minibatch_size=batch_size) sampler, (x, y) = feed_dict_sampler(tuple_iterable, types=[np.int64, np.float32]) loss_op = tf.reduce_mean(loss_func(multilinear_tuple_scorer(x, rank=rank, n_emb=n+m)[0], y, 'quadratic')) initial_losses = [tf_eval(loss_op, f) for _, f in sampler] if verbose: print(initial_losses) # hooks = [lambda s, e, it, l: it and ((it % 100) == 0) and print("%d) loss=%f" % (it, l))] hooks = [lambda it, b, l: it and ((it % 1) == 0) and print("{0}) train loss={1}".format(it, l[0]))] emb, = learn(loss_op, sampler, tf.train.AdamOptimizer(learning_rate=0.1), hooks, max_epochs=200) # emb, = learn(l, sampler, tf.train.GradientDescentOptimizer(learning_rate=0.5), hooks, max_epochs=500) # emb, = learn(l, sampler, tf.train.AdagradOptimizer(0.01, initial_accumulator_value=0.01), hooks, max_epochs=500) mat0 = svd_factorize_matrix(y_mat, rank=2) # exact svd solution mat = emb[:n, :].dot(emb[n:, :].T) if verbose: print(np.linalg.norm(mat - mat0)) assert(np.linalg.norm(mat - mat0) < 1e-3)
def test_learning_factorization(verbose=False): n = 9 m = 8 y_mat = toy_factorization_problem(n=n, m=m, rk=3, noise=1) rank = 2 batch_size = n * m tuples = mat2tuples(y_mat) tuple_iterable = data_to_batches(tuples, minibatch_size=batch_size) # tuple_iterable = positive_and_negative_tuple_sampler(mat2tuples(y_mat), minibatch_size=batch_size) sampler, (x, y) = feed_dict_sampler(tuple_iterable, types=[np.int64, np.float32]) loss_op = tf.reduce_mean( loss_func( multilinear_tuple_scorer(x, rank=rank, n_emb=n + m)[0], y, 'quadratic')) initial_losses = [tf_eval(loss_op, f) for _, f in sampler] if verbose: print(initial_losses) # hooks = [lambda s, e, it, l: it and ((it % 100) == 0) and print("%d) loss=%f" % (it, l))] hooks = [ lambda it, b, l: it and ( (it % 1) == 0) and print("{0}) train loss={1}".format(it, l[0])) ] emb, = learn(loss_op, sampler, tf.train.AdamOptimizer(learning_rate=0.1), hooks, max_epochs=200) # emb, = learn(l, sampler, tf.train.GradientDescentOptimizer(learning_rate=0.5), hooks, max_epochs=500) # emb, = learn(l, sampler, tf.train.AdagradOptimizer(0.01, initial_accumulator_value=0.01), hooks, max_epochs=500) mat0 = svd_factorize_matrix(y_mat, rank=2) # exact svd solution mat = emb[:n, :].dot(emb[n:, :].T) if verbose: print(np.linalg.norm(mat - mat0)) assert (np.linalg.norm(mat - mat0) < 1e-3)
mat0[i,j] = mat[i, j] mat = mat0 print(mat) tuples = [([i, n + j], mat[i, j]) for (i, j) in observed_indices] else: tuples = [([i, n + j], mat[i, j]) for i in range(n) for j in range(m)] tuple_iterable = data_to_batches(tuples, minibatch_size=n * m) batches = [batch for batch in tuple_iterable] print(batches[0]) sampler, (x, y) = feed_dict_sampler(tuple_iterable, types=[np.int64, np.float32]) # sampler is an iterator return (bucket_id, dictionary containing the batch) # x and y are "placeholder" (equivalent of symbolic variables used as input to your model) # print([s for s in sampler]) print(x) emb_var = tf.Variable(tf.cast(np.random.randn(n + m, rank), 'float32', name='embeddings')) # emb_var = tf.Variable(tf.cast(embeddings_true, 'float32', name='embeddings')) # computation graph selected_embeddings = tf.gather(emb_var, x)