Beispiel #1
0
def test_multitask_learning(verbose=False):
    # factorization of the parameters with multiple linear regressions
    np.random.seed(1)
    n_data, n_features, n_labels, rank = 6, 5, 4, 3
    batch_size = n_data
    n_emb = 1 + n_features + n_labels

    x_mat = np.random.randn(n_data, n_features)
    w_mat = np.random.randn(n_features, rank).dot(np.random.randn(rank, n_labels))
    y_mat = x_mat.dot(w_mat)

    if False:
        x = tf.placeholder(np.float32, [batch_size, n_features], name='answer')
        y = tf.placeholder(np.float32, [batch_size, n_labels], name='answer')
        sampler = placeholder_feeder((x, y), [(x_mat, y_mat)])  # only one sample
        emb = tf.Variable(tf.cast(np.random.randn(n_features + n_labels, rank), 'float32'))
        w = tf.matmul(emb[:n_features, :], emb[n_features:, :], transpose_b=True)
        preds = tf.matmul(x, w)
        objective = tf.reduce_sum(tf.square(preds - y))
        hooks = [lambda it, e, xy, f: it and ((it % 10) == 0) and print("%d) loss=%f" % (it, f))]
        emb_val, = learn(objective, sampler, hooks=hooks, max_epochs=2000, variables=[emb])
        mat_est = x_mat.dot(emb_val[:n_features, :]).dot(emb_val[n_features:, :].T)
    else:
        data = []
        for i in range(n_data):
            data.append(([((NEW_ID, j + 1), x_mat[i, j]) for j in range(n_features)],
                         [((NEW_ID, k + 1 + n_features), y_mat[i, k]) for k in range(n_labels)]))
        data_arr = vectorize_samples(data)
        batches = data_to_batches(data_arr, minibatch_size=batch_size)
        qc = tf.placeholder(np.int64, [batch_size, n_features, 2], name='question_in_context')
        yc = tf.placeholder(np.float32, [batch_size, n_features], name='answer_in_context')
        q = tf.placeholder(np.int64, [batch_size, n_labels, 2], name='question')
        y = tf.placeholder(np.float32, [batch_size, n_labels], name='answer')
        sampler = placeholder_feeder((qc, yc, q, y), batches)
        [print(s) for s in sampler]
        emb0 = tf.Variable(tf.cast(np.random.randn(n_emb, rank), 'float32'))

        def reader(context_inputs, context_ouputs):
            context_embs = tf.gather(emb0, context_inputs[:, :, 1])
            # preds = tf.reshape(tf.matmul(
            #             tf.reshape(context_embs, (n_data * n_features, rank)),
            #             tf.reshape(emb0[0, :], [rank, 1])),
            #         (n_data, n_features))
            # residues = tf.tile(tf.reshape(preds - yc, (n_data, n_features, 1)), [1, 1, rank])
            # embs_after_reading = tf.tile(tf.reshape(emb0[0, :], (1, rank)), (n_data, 1)) \
            #     + tf.reduce_mean(context_embs * residues, 1) * step_size
            # step_size = tf.Variable(tf.cast(1.0, 'float32'), trainable=True)
            yc_rep = tf.tile(tf.reshape(context_ouputs, (n_data, n_features, 1)), (1, 1, rank))
            embs_after_reading = 0 * tf.tile(tf.reshape(emb0[0, :], (1, rank)), (n_data, 1)) \
                                 + tf.reduce_sum(context_embs * yc_rep, 1)  # * step_size
            return embs_after_reading

        def answerer(embeddings, question):
            embs_after_reading_mat = tf.tile(tf.reshape(embeddings, [n_data, 1, 1, rank]), [1, n_labels, 1, 1])
            fixed_embs = tf.reshape(tf.gather(emb0, question[:, :, 1]), [n_data, n_labels, 1, rank])
            emb1_question = tf.concat(2, [fixed_embs, embs_after_reading_mat])
            return tf.reduce_sum(tf.reduce_prod(emb1_question, 2), 2)

        def loss(pred, gold):
            return tf.nn.l2_loss(pred - gold)

        objective = loss(answerer(reader(qc, yc), q), y)

        hooks = [lambda it, e, xy, f: it and ((it % 10) == 0) and print("%d) loss=%f" % (it, f))]
        emb_val, = learn(objective, sampler, hooks=hooks, max_epochs=2000, variables=[emb0])
        mat_est = x_mat.dot(emb_val[1:n_features + 1, :]).dot(emb_val[n_features + 1:, :].T)
    if verbose:
        print(0.5 * np.linalg.norm(mat_est - y_mat) ** 2)  # we should have recovered the low-rank matrix
    else:
        assert (np.linalg.norm(mat_est - y_mat) < 1e-3)
Beispiel #2
0
def test_larcqy(verbose=False):

    # factorization of the parameters with multiple linear regressions

    # input_types = {'index', 'features'}
    input_types = {'index'}
    input_types = {'features'}
    n1, n2, d1, d2, rank_gold = 7, 6, 5, 4, 3

    # random data generation
    np.random.seed(1)
    emb_noise, noise = 1, 0
    batch_size = n1 * n2
    t = lambda x: np.round(x, 1)
    data_emb1 = t(np.random.randn(n1, rank_gold) * emb_noise)
    data_emb2 = t(np.random.randn(n2, rank_gold) * emb_noise)
    feat_emb1 = t(np.random.randn(d1, rank_gold) * emb_noise)
    feat_emb2 = t(np.random.randn(d2, rank_gold) * emb_noise)
    x1_mat = data_emb1.dot(feat_emb1.T)
    x2_mat = data_emb2.dot(feat_emb2.T)
    a1 = x1_mat.dot(feat_emb1)
    a2 = x2_mat.dot(feat_emb2)
    y_mat = a1.dot(a2.T) + np.random.randn(n1, n2) * noise

    # data stuff
    data = []
    n_ents = 2
    for i in range(n1):
        for j in range(n2):
            inputs = []
            if 'features' in input_types:
                inputs += [((0, k + 2), x1_mat[i, k]) for k in range(d1)] \
                          + [((1, k + 2 + d1), x2_mat[j, k]) for k in range(d2)]
                n_ents = d1 + d2 + 2
            if 'index' in input_types:
                inputs += [((0, d1 + d2 + 2 + i), 1.0), ((1, d1 + d2 + 2 + n1 + j), 1.0)]
                n_ents = d1 + d2 + n1 + n2 + 2
            outputs = [((0, 1), y_mat[i, j])]
            data.append((inputs, outputs))
    data_arr = vectorize_samples(data)
    batches = data_to_batches(data_arr, batch_size, dtypes=[np.int64, np.float32, np.int64, np.float32])
    if False:
        qc = tf.placeholder(np.int64, (batch_size, n_ents, 2), name='question_in_context')
        yc = tf.placeholder(np.float32, (batch_size, n_ents), name='answer_in_context')
        q = tf.placeholder(np.int64, (batch_size, 1, 2), name='question')
        y = tf.placeholder(np.float32, (batch_size, 1), name='answer')
        sampler = placeholder_feeder((qc, yc, q, y), batches)
        sampler = [x for x in sampler]
    else:
        qc0, yc0, q0, y0 = [x for x in batches][0]
        # print(qc0, yc0, q0, y0)
        qc = tf.Variable(qc0)
        yc = tf.Variable(yc0)
        q = tf.Variable(q0)
        y = tf.Variable(y0)

    # model definition
    rank = min(rank_gold, max(min(n1, n2), min(d1, d2)))
    print(rank)
    if False:
        emb0_val = np.concatenate((np.zeros((2, rank)), feat_emb1, feat_emb2))
        emb0_val += np.random.randn(n_ents, rank) * 0.1
        # emb0_val = np.round(emb0_val, 1)
    else:
        emb0_val = np.random.randn(n_ents, rank)
    emb0 = tf.constant(emb0_val.tolist(), dtype=np.float32, shape=(n_ents, rank))
    loss = total_loss_quadratic

    # reading step
    emb1 = reader(emb0=emb0, context=(qc, yc), n_slots=2)
    # answering step
    objective = loss(answerer(emb1, q), y)  # + 1e-6 * tf.nn.l2_loss(emb0)

    # tf_debug_gradient(emb0, objective, verbose=False)  # This creates new variables...

    # train the model
    optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
    hooks = [lambda it, e, xy, f: it and ((it % 100) == 0) and print("%d) loss=%f" % (it, f[0]))]
    # hooks += [OperatorHook(tf_show(grads[0]), sampler[0][1])]
    params = learn(objective, optimizer=optimizer, hooks=hooks, max_epochs=300, variables=[emb0])

    # mat_est = model.numeric_eval(params, x_mat)
    #
    if verbose:
        #     print(0.5 * np.linalg.norm(mat_est - y_mat) ** 2)  # we should have recovered the low-rank matrix
        print(params[-1])
        pass