def test_learning_factorization(verbose=False):
    n = 9
    m = 8
    y_mat = toy_factorization_problem(n=n, m=m, rk=3, noise=1)
    rank = 2
    batch_size = n * m
    tuples = mat2tuples(y_mat)
    tuple_iterable = data_to_batches(tuples, minibatch_size=batch_size)
    # tuple_iterable = positive_and_negative_tuple_sampler(mat2tuples(y_mat), minibatch_size=batch_size)
    sampler, (x, y) = feed_dict_sampler(tuple_iterable, types=[np.int64, np.float32])
    loss_op = tf.reduce_mean(loss_func(multilinear_tuple_scorer(x, rank=rank, n_emb=n+m)[0], y, 'quadratic'))
    initial_losses = [tf_eval(loss_op, f) for _, f in sampler]
    if verbose:
        print(initial_losses)
    # hooks = [lambda s, e, it, l: it and ((it % 100) == 0) and print("%d) loss=%f" % (it, l))]
    hooks = [lambda it, b, l: it and ((it % 1) == 0) and print("{0}) train loss={1}".format(it, l[0]))]

    emb, = learn(loss_op, sampler, tf.train.AdamOptimizer(learning_rate=0.1), hooks, max_epochs=200)
    # emb, = learn(l, sampler, tf.train.GradientDescentOptimizer(learning_rate=0.5), hooks, max_epochs=500)
    # emb, = learn(l, sampler, tf.train.AdagradOptimizer(0.01, initial_accumulator_value=0.01), hooks, max_epochs=500)
    mat0 = svd_factorize_matrix(y_mat, rank=2)  # exact svd solution
    mat = emb[:n, :].dot(emb[n:, :].T)
    if verbose:
        print(np.linalg.norm(mat - mat0))
    assert(np.linalg.norm(mat - mat0) < 1e-3)
Example #2
0
def test_learning_factorization(verbose=False):
    n = 9
    m = 8
    y_mat = toy_factorization_problem(n=n, m=m, rk=3, noise=1)
    rank = 2
    batch_size = n * m
    tuples = mat2tuples(y_mat)
    tuple_iterable = data_to_batches(tuples, minibatch_size=batch_size)
    # tuple_iterable = positive_and_negative_tuple_sampler(mat2tuples(y_mat), minibatch_size=batch_size)
    sampler, (x, y) = feed_dict_sampler(tuple_iterable,
                                        types=[np.int64, np.float32])
    loss_op = tf.reduce_mean(
        loss_func(
            multilinear_tuple_scorer(x, rank=rank, n_emb=n + m)[0], y,
            'quadratic'))
    initial_losses = [tf_eval(loss_op, f) for _, f in sampler]
    if verbose:
        print(initial_losses)
    # hooks = [lambda s, e, it, l: it and ((it % 100) == 0) and print("%d) loss=%f" % (it, l))]
    hooks = [
        lambda it, b, l: it and (
            (it % 1) == 0) and print("{0}) train loss={1}".format(it, l[0]))
    ]

    emb, = learn(loss_op,
                 sampler,
                 tf.train.AdamOptimizer(learning_rate=0.1),
                 hooks,
                 max_epochs=200)
    # emb, = learn(l, sampler, tf.train.GradientDescentOptimizer(learning_rate=0.5), hooks, max_epochs=500)
    # emb, = learn(l, sampler, tf.train.AdagradOptimizer(0.01, initial_accumulator_value=0.01), hooks, max_epochs=500)
    mat0 = svd_factorize_matrix(y_mat, rank=2)  # exact svd solution
    mat = emb[:n, :].dot(emb[n:, :].T)
    if verbose:
        print(np.linalg.norm(mat - mat0))
    assert (np.linalg.norm(mat - mat0) < 1e-3)
missing_values = True

if missing_values:
    mat0 = np.nan * np.ones((n, m))
    observed_indices = [(np.random.randint(n), np.random.randint(m)) for _ in range(int(n * m))]
    print(observed_indices)
    for i, j in observed_indices:
        mat0[i,j] = mat[i, j]
    mat = mat0
    print(mat)
    tuples = [([i, n + j], mat[i, j]) for (i, j) in observed_indices]
else:
    tuples = [([i, n + j], mat[i, j]) for i in range(n) for j in range(m)]


tuple_iterable = data_to_batches(tuples, minibatch_size=n * m)

batches = [batch for batch in tuple_iterable]


print(batches[0])

sampler, (x, y) = feed_dict_sampler(tuple_iterable, types=[np.int64, np.float32])
# sampler is an iterator return (bucket_id, dictionary containing the batch)
# x and y are "placeholder" (equivalent of symbolic variables used as input to your model)

# print([s for s in sampler])

print(x)

emb_var = tf.Variable(tf.cast(np.random.randn(n + m, rank), 'float32', name='embeddings'))