Beispiel #1
0
    def hard_sampler(y_none, y_pred):
        # We obtain the similarity matrix and its diagonal
        v, c, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1)
        S = cos_similarity(v, c)
        St = tf.transpose(S)
        diagonal = tf.linalg.diag_part(S)
        #print(diagonal)
        reshaped = tf.expand_dims(diagonal,
                                  axis=1)  #tf.reshape(diagonal,(s[0],1))

        # Set the diagonal to -1 so it is never chosen as the next best candidate
        S = tf.linalg.set_diag(S, -tf.math.pow(diagonal, 0))
        St = tf.linalg.set_diag(St, -tf.math.pow(diagonal, 0))
        #print(reshaped.shape)
        # Proceed to substract the diagonal to the sims matrix
        values_s = tf.math.top_k(S, k=n)[0]
        vid_contrast = values_s - reshaped  #+ margin

        values_st = tf.math.top_k(St, k=n)[0]
        sen_contrast = values_st - reshaped  #+ margin

        b_loss = tf.maximum(0.0, vid_contrast + margin) + tf.maximum(
            0.0, sen_contrast + margin)
        b_sum = tf.reduce_sum(b_loss, axis=-1)  # Should be mean
        return tf.reduce_mean(b_sum)
Beispiel #2
0
def gen_topics_embedding(corpus, num_topics, dictionary, word_dim, w2v_model):
    lda_model = train_lda(True, corpus, num_topics, dictionary)

    temp = dictionary[0]  # This is only to "load" the dictionary.

    topics_emb = np.zeros((num_topics, word_dim))
    for topic_id in range(num_topics):
        topic = lda_model.get_topic_terms(topicid=topic_id, topn=10)
        topic_emb = np.zeros(word_dim)
        weights = []
        vecs = []
        for idx in range(len(topic)):
            word = dictionary.id2token[topic[idx][0]]
            weights.append(topic[idx][1])
            vecs.append(w2v_model[word])
        weights = np.array(weights)
        weights = weights / weights.sum()
        for idx in range(len(topic)):
            topic_emb += weights[idx] * vecs[idx]
        topics_emb[topic_id] = topic_emb
        # print(topic_emb)

    sim_mat = np.zeros((num_topics, num_topics))
    for i in range(num_topics):
        for j in range(num_topics):
            sim_mat[i][j] = cos_similarity(topics_emb[i], topics_emb[j])
    print("Topics similarity matrix:\n{}".format(sim_mat))
    return topics_emb
Beispiel #3
0
def validate_classification(train_idxs, test_idxs, raw_tasks, topics_emb, net,
                            theta, dataset):
    train_ids = [raw_tasks[train_idx] for train_idx in train_idxs]
    test_ids = [raw_tasks[test_idx] for test_idx in test_idxs]

    train_embs = [topics_emb[train_idx] for train_idx in train_idxs]
    test_embs = [topics_emb[test_idx] for test_idx in test_idxs]

    train_emb_distrib = compute_mix_emb(train_ids, train_embs)

    support_shots = num_training_samples_per_class
    query_shots = num_classes_per_task
    # support_shots = num_training_samples_per_class * sample_rate
    # query_shots = num_classes_per_task * sample_rate

    metrics = []

    # total_val_samples_per_task = num_val_samples_per_class * num_classes_per_task
    for idx in range(len(test_ids)):
        task_ids = test_ids[idx]
        task_embs = test_embs[idx]

        net.load_weights(INNER_MODEL_PATH)
        # task_s, task_q = get_support_query_data(task, num_training_samples_per_class)
        # task_sample_ids = sample_task_from_raw_task(task_ids, support_shots, query_shots)
        task_sample_ids = sample_task_from_raw_task_by_label(
            task_ids, support_shots, query_shots, False)
        task_s = load_struct_data([task_sample_ids['s']], dataset)[0]
        task_q = load_struct_data([task_sample_ids['q']], dataset)[0]

        zeta = cos_similarity(train_emb_distrib, task_embs)

        drop_theta(zeta, theta)

        # w_task = adapt_to_task(task=task_s, inner_model=net, w0=theta)
        adapt_to_task(task_s, net, theta)
        WX_q, X_q, cnnX_q, MX_q, y_q = parse_task_data(task_q)
        # # ------------------------------------------------------------------------------
        # # print output of each layer
        # w_layer_names = ['multi_graph_cnn_1', 'embedding_1', 'dense_1',\
        #                 'conv2d_1', 'multi_graph_cnn_2', 'gru_1', \
        #                 'dense_2', 'gru_2', 'cocnnattention_1', 'coattention_1',\
        #                 'batch_normalization_3', 'batch_normalization_2',\
        #                 'batch_normalization_1', 'dense_3', 'dense_4', 'dense_5',\
        #                 'dense_6', 'dense_7']
        # for name in w_layer_names:
        #     print_layer(name, net, WX_q, X_q, cnnX_q, MX_q)
        # # ------------------------------------------------------------------------------

        scores = net.evaluate([WX_q, X_q, cnnX_q, MX_q], y_q, verbose=0)

        y_pred = net.predict([WX_q, X_q, cnnX_q, MX_q])
        print("y_q:{}  y_pred:{}".format(y_q.reshape(-1), y_pred.reshape(-1)))

        metrics.append(scores)

        # if not train_flag:
        #     sys.stdout.write('\033[F')
    return metrics
Beispiel #4
0
 def proxy_sampler_2(y_true, y_pred):
     proxy = cos_similarity(y_true, y_true)
     v, c = tf.split(y_pred, 2, axis=1)
     S = cos_similarity(v, c)
     St = tf.transpose(S)
     s = tf.shape(S)
     diagonal = tf.linalg.diag_part(S)
     #print(diagonal)
     reshaped = tf.expand_dims(diagonal,
                               axis=1)  #tf.reshape(diagonal,(s[0],1))
     print(reshaped.shape)
     # Proceed to substract the diagonal to the sims matrix
     vid_contrast = S - reshaped + margin
     sen_contrast = St - reshaped + margin
     b_loss = tf.maximum(0.0, vid_contrast) + tf.maximum(0.0, sen_contrast)
     b_sum = tf.reduce_sum(b_loss, axis=-1)  # Should be mean
     return tf.reduce_mean(b_sum)
Beispiel #5
0
            def task_metalearn(inp, reuse=True):
                """ Perform gradient descent for one task in the meta-batch. """
                img, label, features = inp
                task_outputbs, task_lossesb = [], []

                task_accuraciesb = []
                if FLAGS.dropout_ratio > 0:
                    dropout_indices = tf.random.shuffle(tf.range(FLAGS.dict_size))[:int((1-FLAGS.dropout_ratio)*FLAGS.dict_size)]
                else:
                    dropout_indices = tf.range(FLAGS.dict_size)
                k_dropped = tf.gather(memo_weights['k'], indices=dropout_indices, axis=0) if 'train' in prefix else memo_weights['k']
                v_dropped = tf.gather(memo_weights['v'], indices=dropout_indices, axis=0) if 'train' in prefix else memo_weights['v']

                sim = cos_similarity(features, k_dropped, memo_weights['alpha'])
                task_outputa = self.fc_forward(k_dropped, weights)
                task_lossa = self.loss_func_weighted(task_outputa, v_dropped, sim)
                grads = tf.gradients(task_lossa, list(weights.values()))
                gradients = dict(zip(weights.keys(), grads))
                fast_weights = dict()
                for key in weights.keys():
                    if key in ['w1','b1']:
                        if FLAGS.scalar_lr:
                            fast_weights[key] =  weights[key] - memo_weights['adaptive_lr']*gradients[key]
                        else:
                            fast_weights[key] =  weights[key] - tf.matmul(memo_weights['adaptive_lr_diag'],gradients[key])
                    else:
                        fast_weights[key] = weights[key]
                
                output = self.fc_forward(features, fast_weights)
                task_outputbs.append(output)
                task_lossesb.append(self.loss_func(output, label))

                for j in range(num_updates - 1):
                    loss = self.loss_func_weighted(self.fc_forward(k_dropped, fast_weights), v_dropped, sim)
                    grads = tf.gradients(loss, list(fast_weights.values()))
                    gradients = dict(zip(fast_weights.keys(), grads))
                    
                    for key in fast_weights.keys():
                        if key in ['w1','b1']:
                            if FLAGS.scalar_lr:
                                fast_weights[key] =  fast_weights[key] - memo_weights['adaptive_lr']*gradients[key]
                            else:
                                fast_weights[key] =  fast_weights[key] - tf.matmul(memo_weights['adaptive_lr_diag'],gradients[key])
                        else:
                            fast_weights[key] = fast_weights[key]
                    output = self.fc_forward(features, fast_weights)
                    task_outputbs.append(output)
                    task_lossesb.append(self.loss_func(output, label))

                task_output = [task_outputa, task_outputbs, task_lossa, task_lossesb]

                task_accuracya = tf.contrib.metrics.accuracy(tf.argmax(tf.nn.softmax(task_outputa), 1), tf.argmax(v_dropped, 1))
                for j in range(num_updates):
                    task_accuraciesb.append(tf.contrib.metrics.accuracy(tf.argmax(tf.nn.softmax(task_outputbs[j]), 1), tf.argmax(label, 1)))
                task_output.extend([task_accuracya, task_accuraciesb])
                if FLAGS.visualize:
                    task_output.extend([memo_weights['v'], memo_weights['k'], sim, features])
                return task_output
Beispiel #6
0
 def naive_max_ranking_roll(y_none, y_pred):
     vidp, senp, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1)
     vidn = tf.roll(vidp, shift=1, axis=0)
     senn = tf.roll(senp, shift=1, axis=0)
     # It's called Roll because treats its neighbor as negative
     vp_sn = cos_similarity(vidp, senn)
     d_vp_sn = tf.linalg.diag_part(vp_sn)
     r_vp_sn = tf.expand_dims(d_vp_sn, axis=1)
     vp_sp = cos_similarity(vidp, senp)
     d_vp_sp = tf.linalg.diag_part(vp_sp)
     r_vp_sp = tf.expand_dims(d_vp_sp, axis=1)
     vn_sp = cos_similarity(vidn, senp)
     d_vn_sp = tf.linalg.diag_part(vn_sp)
     r_vn_sp = tf.expand_dims(d_vn_sp, axis=1)
     # Max ranking loss
     loss = tf.maximum(0.0, margin + r_vp_sn - r_vp_sp) + tf.maximum(
         0.0, margin + r_vn_sp - r_vp_sp)
     loss = tf.reduce_mean(loss) + 1e-12
     return loss
Beispiel #7
0
    def proxy_sampler_3(y_none, y_pred):
        v, c, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1)
        C = cos_similarity(y_true, y_true)
        values_p, indices_p = tf.math.top_k(-C, k=n)

        S = cos_similarity(v, c)
        St = tf.transpose(S)

        diagonal = tf.linalg.diag_part(S)

        reshaped = tf.expand_dims(diagonal, axis=1)

        values_s = tf.gather(S, indices_p, batch_dims=1)
        values_st = tf.gather(St, indices_p, batch_dims=1)

        vid_contrast = values_s - reshaped + margin
        sen_contrast = values_st - reshaped + margin

        b_loss = tf.maximum(0.0, vid_contrast) + tf.maximum(0.0, sen_contrast)

        b_sum = tf.reduce_sum(b_loss, axis=-1)

        return b_sum
Beispiel #8
0
    def proxy_sampler(y_true, y_pred):
        # We obtain the similarity matrix of sentences and its diagonal
        C = cos_similarity(y_true, y_true)
        # Here C is negative to obtain the position of the most dissimilar sentences
        indices_p = tf.math.top_k(-C, k=n)[1]
        # Now we start with v and c similarity
        v, c = tf.split(y_pred, 2, axis=1)
        S = cos_similarity(v, c)
        St = tf.transpose(S)
        diagonal = tf.linalg.diag_part(S)
        #print(diagonal)
        reshaped = tf.expand_dims(diagonal,
                                  axis=1)  #tf.reshape(diagonal,(s[0],1))
        # Extract from the v-c sim matrix the positions obtained by the proxy
        values_s = tf.gather(S, indices_p, batch_dims=1)
        values_st = tf.gather(St, indices_p, batch_dims=1)

        vid_contrast = values_s - reshaped  #+ margin
        sen_contrast = values_st - reshaped  #+ margin

        b_loss = tf.maximum(0.0, vid_contrast + margin) + tf.maximum(
            0.0, sen_contrast + margin)
        b_sum = tf.reduce_sum(b_loss, axis=-1)  # Should be mean
        return tf.reduce_mean(b_sum)
Beispiel #9
0
 def hard_sampler_full(y_none, y_pred):
     # We obtain the similarity matrix and its diagonal
     v, c, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1)
     S = cos_similarity(v, c)
     St = tf.transpose(S)
     diagonal = tf.linalg.diag_part(S)
     #print(diagonal)
     reshaped = tf.expand_dims(diagonal,
                               axis=1)  #tf.reshape(diagonal,(s[0],1))
     #print(reshaped.shape)
     # Proceed to substract the diagonal to the sims matrix
     vid_contrast = S - reshaped + margin
     sen_contrast = St - reshaped + margin
     b_loss = tf.maximum(0.0, vid_contrast) + tf.maximum(0.0, sen_contrast)
     b_sum = tf.reduce_sum(b_loss, axis=-1)  # Should be mean
     return tf.reduce_mean(b_sum)
Beispiel #10
0
    def iterative_loss(y_true, y_p):
        # v and c are the respective video and captions tensors from the batch, shape (b, [repr. dim.])
        # n is the number of samples per pair
        # t type of pairs to sample

        # For each datapoint find the 3 most distant
        v, c = tf.split(y_p, 2, axis=1)

        # D is supposed to be b x b
        D = cos_similarity(v, c)
        print(D, tf.shape(D))
        # vp_cp is just the repetition for n pairs
        s = tf.shape(D)
        # Convert the matrix into vector
        diagonal = tf.linalg.diag_part(D)
        reshaped = tf.reshape(diagonal, (s[0], 1))
        # Repeat each item into n columns
        repeated = tf.tile(reshaped, [1, n])
        # Again convert matrix into vector
        vp_cp = tf.reshape(repeated, [n * s[0]])

        # Change dialect, rows represent video
        # Here we eliminate the diagonal to only choose negatives by ranking the respective rows and columns
        rows = D - tf.linalg.diag(tf.linalg.diag_part(D)) - tf.eye(s[0])
        # Columns represent captions
        columns = tf.transpose(rows)

        values_r = tf.math.top_k(rows, k=n)[0]
        #values_r = -values_r
        s_r = tf.shape(values_r)

        values_c = tf.math.top_k(columns, k=n)[0]
        #values_c = -values_c
        s_c = tf.shape(values_c)

        vp_cn = tf.reshape(values_r, [n * s_r[0]])
        vn_cp = tf.reshape(values_c, [n * s_c[0]])

        loss = tf.maximum(0.0, margin + vp_cn - vp_cp) + tf.maximum(
            0.0, margin + vn_cp - vp_cp)
        loss = tf.reduce_mean(loss) + 1e-12
        return loss
Beispiel #11
0
def rank_matrix(a, b):
    sm = cos_similarity(a, b)
    return tf.argsort(sm, direction="DESCENDING")
Beispiel #12
0
def meta_train(train_idxs, val_idxs, test_idxs, raw_tasks, topics_emb, net,
               theta, dataset):
    train_ids = [raw_tasks[train_idx] for train_idx in train_idxs]
    val_ids = [raw_tasks[val_idx] for val_idx in val_idxs]
    test_ids = [raw_tasks[test_idx] for test_idx in test_idxs]

    train_embs = [topics_emb[train_idx] for train_idx in train_idxs]
    val_embs = [topics_emb[val_idx] for val_idx in val_idxs]
    test_embs = [topics_emb[test_idx] for test_idx in test_idxs]

    train_emb_distrib = compute_mix_emb(train_ids, train_embs)

    # support_shots = num_training_samples_per_class * num_classes_per_task
    # query_shots = num_classes_per_task * num_classes_per_task
    support_shots = num_training_samples_per_class * sample_rate
    query_shots = num_classes_per_task * sample_rate

    for epoch in range(resume_epoch, resume_epoch + num_epochs):

        print("Meta epoch_{}:".format(epoch + 1))
        # print("theta[0][0]:\n{}".format(theta[0][0]))
        task_num = len(train_ids)
        # initialize gradients with zero
        gradients = []
        for item in theta['mean']:
            gradients.append(np.zeros(item.shape))

        # batch_num = int(task_num/batch_size+0.5)
        batch_num = batch_per_epoch
        for batch in range(batch_num):
            np.random.seed()
            batch_idxs = np.random.choice(np.arange(task_num), size=batch_size, \
                replace=False, p=None)
            batch_ids = [train_ids[batch_idx] for batch_idx in batch_idxs]
            batch_embs = [train_embs[batch_idx] for batch_idx in batch_idxs]
            for idx in range(len(batch_ids)):
                task_ids = batch_ids[idx]
                # run a task
                # load data of a task
                net.load_weights(INNER_MODEL_PATH)
                # task_s, task_q = get_support_query_data(train_task, num_training_samples_per_class)
                # print(task_ids)
                task_sample_ids = sample_task_from_raw_task_by_label(
                    task_ids, support_shots, query_shots)

                task_s = load_struct_data([task_sample_ids['s']], dataset)[0]
                task_q = load_struct_data([task_sample_ids['q']], dataset)[0]

                zeta = cos_similarity(train_emb_distrib, batch_embs[idx])
                drop_theta(zeta, theta)

                adapt_to_task(task_s, net, theta)
                # we valuate the model for every task
                WX_q, X_q, cnnX_q, MX_q, y_q = parse_task_data(task_q)
                # y_pred = net.predict([WX_q, X_q, cnnX_q, MX_q])
                weights_1 = net.get_weights().copy()
                net.fit([WX_q, X_q, cnnX_q, MX_q], y_q, epochs=1)
                weights_2 = net.get_weights().copy()

                # compute gradients
                for idx in range(len(weights_1)):
                    gradients[idx] += ((weights_1[idx] - weights_2[idx]) /
                                       inner_lr)

            # update theta
            for idx in range(len(theta['mean'])):
                theta['mean'][idx] = theta['mean'][idx] - gradients[idx] * (
                    meta_lr * pow(lr_decay, epoch)) / batch_size
                theta['logSigma'][idx] = theta['logSigma'][idx] - gradients[idx]*(meta_lr*pow(lr_decay, epoch))/batch_size\
                    *np.exp(theta['logSigma'][idx])
        # evaluate the model
        metrics = validate_classification(train_idxs, val_idxs, raw_tasks,
                                          topics_emb, net, theta, dataset)
        for score in metrics:
            print(score)
        print("\nVal average: [loss, accuracy, f1_score, precision, recall]\n{}\n".format(\
                np.mean(metrics, axis=0)))
        with open(log_file, 'a+') as f:
            f.write("Val average epoch={}: {}\n".format(\
               epoch+1, np.mean(metrics, axis=0)))

        # #-------------------------------------------
        # print("test_data:")
        # fixed_weights = net.get_weights().copy()    # advoid updating the meta_model and the inner_model
        # metrics = validate_classification(test_data, net, theta, train_flag=False, csv_flag=False)
        # net.set_weights(fixed_weights)  # advoid updating the meta_model and the inner_model
        # for score in metrics:
        #     print(score)
        # #-------------------------------------------

        if ((epoch + 1) % num_epochs_save == 0):
            checkpoint = {'theta': theta}
            print('SAVING WEIGHTS...')
            checkpoint_filename = 'Epoch_{0:d}.pt'.format(epoch + 1)
            print(checkpoint_filename)
            torch.save(checkpoint, os.path.join(dst_folder,
                                                checkpoint_filename))
        print()