Beispiel #1
0
    def test_normalize_rows(self):
        arg = np.zeros((3, 2))
        self.assertTrue(np.array_equal(normalize_rows(arg), arg))

        arg = np.array([[0, 3, 4], [1, 6, 4]])

        expected = np.array([[0, 0.6, 0.8],
                             [0.13736056, 0.82416338, 0.54944226]])

        self.assertTrue(np.allclose(normalize_rows(arg), expected))
Beispiel #2
0
def compute_metrics(model, loader, k=5, mode='bilinear'):
    global GPU_AVAILABLE

    p_at_1 = 0
    p_at_k = 0
    ndcg = 0

    for X, Y in loader:
        X = Variable(X)
        Y = [Variable(y) for y in Y]
        if GPU_AVAILABLE:
            X = X.cuda()
            Y = [y.cuda() for y in Y]

        if mode == "bilinear":
            outputs = model(X, Y)
            if GPU_AVAILABLE:
                outputs = [out.cpu() for out in outputs]
            outputs = [out.data.numpy().squeeze() for out in outputs]

        elif mode == "project_x":
            X_proj = model.project_x(X).data.numpy()
            X_proj = normalize_rows(X_proj)
            Y = [y.data.numpy() for y in Y]
            outputs = [
                x.reshape(1, -1).dot(np.atleast_2d(y).T).squeeze()
                for x, y in zip(X_proj, Y)
            ]

        elif mode == "project_y":
            Y_proj = [model.project_y(y).data.numpy() for y in Y]
            Y_proj = [normalize_rows(y) for y in Y_proj]
            X = X.data.numpy()
            outputs = [
                x.reshape(1, -1).dot(np.atleast_2d(y).T).squeeze()
                for x, y in zip(X, Y_proj)
            ]

        elif mode == "random":
            outputs = [np.random.random(len(y)) for y in Y]

        else:
            raise ValueError("not a valid mode")

        idxs = [np.argsort(out)[::-1] for out in outputs]
        p_at_1 += sum([np.mean(idx[:1] < 1) for idx in idxs])
        p_at_k += sum([np.mean(idx[:k] < k) for idx in idxs])
        ndcg += sum(
            [ndcg_at_k(out.tolist(), k=k, method=0) for out in outputs])

    N = len(loader.dataset)
    return p_at_1 / N, p_at_k / N, ndcg / N
Beispiel #3
0
 def load_vectors(self):
     self.db = np.load(self.vectors_path, allow_pickle=True)
     if self.normalize:
         self.db = normalize_rows(self.db)
     self.dim = self.db.shape[1]
     self.index = faiss.IndexFlatIP(self.dim)
     self.db = np.ascontiguousarray(self.db, dtype=np.float32)
     self.index.add(self.db)
Beispiel #4
0
    def __init__(self, path, tags_file, set_):
        super().__init__()

        self._path = abspath(path)
        if not exists(self._path):
            raise OSError('{} doesn\'t exists'.format(self._path))

        self._data = json.load(open(tags_file, 'r'))

        self._set = set_
        if not self._set in data.keys():
            raise ValueError('{} isn\'t a valid key'.format(set_))

        # sort images for reproducibility reasons
        sorted_keys = sorted(list(self._data[self._set]))
        self._data[self._set] = [self._data[self._set][k] for k in sorted_keys]

        # L2 normalization of word embeddings
        if not 'vectors' in self._data:
            raise RuntimeError('there are no word embeddings for this dataset')
        vectors = self._data['vectors']
        for tag, vec in vectors.items():
            if vec is not None:
                vectors[tag] = normalize_rows(vec).astype(np.float32)
Beispiel #5
0
    images_test = [images[i] for i in range(1, len(images), 2)]

    tags = sorted(list(tag2im.keys()))

    # --------------------------------------------------------------------------

    print("processing input (training) embeddings ...")
    ebd_x = []
    ebd_x_proj = []
    for im in progressbar(images):
        feat_file = join(args.features_path, im.replace('.jpg', '.dat'))
        x = load(feat_file)
        ebd_x.append(x)
        x_proj = model.project_x(torch.from_numpy(x)).squeeze_(0).data.numpy()
        ebd_x_proj.append(x_proj)
    ebd_x = normalize_rows(np.array(ebd_x), 2)
    ebd_x_proj = normalize_rows(np.array(ebd_x_proj), 2)
    print("{} image features".format(len(ebd_x)))

    # --------------------------------------------------------------------------

    print("processing output embeddings ...")
    ebd_y = []
    ebd_y_proj = []
    for tag in progressbar(tags):
        tag = tag.replace('*', '')
        if vecs[tag] is None:
            y = np.zeros(vec_dim, dtype=np.float32)
        else:
            y = np.array(vecs[tag], dtype=np.float32)
        ebd_y.append(y)
    X, Y = {}, {}

    feat_extractor = AutoEncoder(exp=args.exp, weights_path=args.weights_path)
    print("loading features ... ", end='')
    sys.stdout.flush()
    for set_ in [k for k in anno.keys() if k != "tags"]:
        imid_list = sorted(list(anno[set_].keys()))  # only for reproducibility
        X[set_] = None
        Y[set_] = [None for _ in range(len(imid_list))]

        if args.exp == 1:
            for i, imid in tqdm(enumerate(imid_list), total=len(imid_list)):
                # set image features
                fname = splitext(anno[set_][imid]["file_name"])[0] + ".dat"
                x = load(join(args.features_path, set_, fname))
                x = normalize_rows(x.reshape(1, -1)).squeeze()
                x = feat_extractor.predict(kind="img", x=x)
                if i == 0:
                    n_samples = len(imid_list)
                    n_dim = x.shape[0]
                    X[set_] = np.empty((n_samples, n_dim), dtype=np.float32)
                X[set_][i] = x

                # set word embeddings (OOV tags are set to the zero vector)
                tags = anno[set_][imid]["tags"]
                y = [[0] * vec_dim if vecs[w] is None else vecs[w]
                     for w in tags]
                y = normalize_rows(np.array(y, dtype=np.float32))
                y = [t.reshape(1, -1) for t in y]
                y = feat_extractor.predict(kind="text", y=y)
                Y[set_][i] = y
Beispiel #7
0
 def load_vectors(self):
     self.db = np.load(self.vectors_path, allow_pickle=True)
     if self.normalize:
         self.db = normalize_rows(self.db)
     self.dim = self.db.shape[1]
Beispiel #8
0
def test_model(pool_pctg, layer_size_1, layer_size_2):
    tf.reset_default_graph()

    X_train, y_train, X_test, y_test = loadEEG()
    X_val = X_test[:2]
    y_val = y_test[:2]
    X_test = X_test[2:]
    y_test = y_test[2:]

    labels = np.array([0, 1, 2, 3, 4, 5])
    digit_indices = [np.where(y_train == i)[0] for i in labels]
    tr_trip_idxs = create_triplet_idxs(X_train, digit_indices, labels)
    digit_indices = [np.where(y_test == i)[0] for i in labels]
    te_trip_idxs = create_triplet_idxs(X_test, digit_indices, labels)
    print 'There are ', len(tr_trip_idxs), ' training examples!'
    #p = np.random.permutation(len(tr_trip_idxs))
    #tr_trip_idxs = tr_trip_idxs[p]

    # Initializing the variables
    # the data, shuffled and split between train and test sets
    #"""
    X_train = normalize_rows(X_train)
    X_test = normalize_rows(X_test)
    #"""
    D = X_train.shape[1]
    ts_length = X_train.shape[2]
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.001
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               10,
                                               0.1,
                                               staircase=True)
    pool_width = pool_pctg * ts_length

    # create training+test positive and negative pairs
    anchor = tf.placeholder(tf.float32, shape=([None, D, ts_length]), name='L')
    same = tf.placeholder(tf.float32, shape=([None, D, ts_length]), name='R')
    different = tf.placeholder(tf.float32,
                               shape=([None, D, ts_length]),
                               name='R')
    labels = tf.placeholder(tf.float32, shape=([None]), name='gt')

    dropout_f = tf.placeholder("float")
    bn_train = tf.placeholder(tf.bool)

    with tf.variable_scope("siamese") as scope:
        model1, filters = build_conv_net(anchor, bn_train, dropout_f,
                                         ts_length, embedding_size, pool_width,
                                         layer_size_1, layer_size_2)
        scope.reuse_variables()
        model2, _ = build_conv_net(same, bn_train, dropout_f, ts_length,
                                   embedding_size, pool_width, layer_size_1,
                                   layer_size_2)
        scope.reuse_variables()
        model3, _ = build_conv_net(different, bn_train, dropout_f, ts_length,
                                   embedding_size, pool_width, layer_size_1,
                                   layer_size_2)

    distance = tf.sqrt(
        tf.reduce_sum(tf.pow(tf.subtract(model1, model2), 2),
                      1,
                      keep_dims=True))
    loss = triplet_loss(model1, model2,
                        model3)  #+ regularizer(model1, model2, model3)
    #loss = new_new_loss(model1, model2, model3) + regularizer(model1, model2, model3)

    debug_val = debug_loss(model1, model2, model3)
    regularization = regularizer(model1, model2, model3)
    tr_loss = triplet_loss(model1, model2, model3)

    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if 'l' in var.name]
    batch = tf.Variable(0)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)

    f1 = open('X_output.txt', 'w')
    f2 = open('X_labels.txt', 'w')
    f1_t = open('X_output_test.txt', 'w')
    f2_t = open('X_labels_test.txt', 'w')
    patience_window = 10
    early_stopping = False
    last_vals = [0 for i in range(patience_window)]
    skippable_batches = []

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        # Training cycle
        for epoch in range(400):
            avg_loss = 0.
            avg_r = 0.
            total_batch = int(
                np.ceil(tr_trip_idxs.shape[0] / float(batch_size)))
            start_time = time.time()
            # Loop over all batches
            loss_values = []
            avg_tr = 0.
            for i in range(total_batch):
                if i in skippable_batches:
                    continue
                s = i * batch_size
                e = (i + 1) * batch_size
                # Fit training using batch data
                input1, input2, input3 = next_batch_from_idx(
                    s, e, tr_trip_idxs, X_train)
                #anchor_embedding=model1.eval(feed_dict={anchor:input1,dropout_f:1.0})
                #same_embedding=model1.eval(feed_dict={anchor:input2,dropout_f:1.0})
                #diff_embedding=model1.eval(feed_dict={anchor:input3,dropout_f:1.0})
                _, loss_value, predict, r_loss, tr_val, d = sess.run(
                    [
                        optimizer, loss, distance, regularization, tr_loss,
                        debug_val
                    ],
                    feed_dict={
                        anchor: input1,
                        same: input2,
                        different: input3,
                        dropout_f: 1.0
                    })
                print loss_value
                if loss_value < .001:
                    skippable_batches.append(i)
                    if i % 30 == 0:
                        train_embedding = model1.eval(feed_dict={
                            anchor: X_train,
                            dropout_f: 1.0
                        })
                        test_embedding = model1.eval(feed_dict={
                            anchor: X_test,
                            dropout_f: 1.0
                        })
                        #val_embedding = model1.eval(feed_dict={anchor:X_val,dropout_f:1.0})
                        accuracy = evaluate_test_embedding(
                            train_embedding, y_train, test_embedding, y_test)
                        print 'ACCURACY: ', accuracy, ' EPOCH: ', epoch

                #pdb.set_trace()
                if math.isnan(loss_value):
                    pdb.set_trace()
                avg_loss += loss_value
                loss_values.append(loss_value)
                avg_r += r_loss
                avg_tr += tr_val

            #print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
            print('epoch %d  time: %f loss %0.5f r_loss %0.5f tr_loss %0.5f' %
                  (epoch, duration, avg_loss /
                   (total_batch), avg_r / total_batch, tr_val))

            duration = time.time() - start_time
            if epoch % 10 == 0:
                tr_acc = compute_accuracy(tr_trip_idxs)
                print "Training accuracy: ", tr_acc

                print(
                    'epoch %d  time: %f loss %0.5f r_loss %0.5f tr_loss %0.5f'
                    % (epoch, duration, avg_loss /
                       (total_batch), avg_r / total_batch, tr_val))
                train_embedding = model1.eval(feed_dict={
                    anchor: X_train,
                    dropout_f: 1.0
                })
                test_embedding = model1.eval(feed_dict={
                    anchor: X_test,
                    dropout_f: 1.0
                })
                #val_embedding = model1.eval(feed_dict={anchor:X_val,dropout_f:1.0})
                accuracy = evaluate_test_embedding(train_embedding, y_train,
                                                   test_embedding, y_test)
                #val_accuracy = evaluate_test_embedding(train_embedding, y_train, val_embedding, y_val)

                print('Accuracy given NN approach %0.2f' % (100 * accuracy))
                #print('Val Accuracy given NN approach %0.2f' %(100*val_accuracy))

                last_vals[(epoch / 100) % patience_window] = val_accuracy
                if last_vals.count(last_vals[0]) == len(last_vals):
                    early_stopping = True
            """
            if early_stopping:
                print 'Stopping early!'
                break
            """

        train_embedding = model1.eval(feed_dict={
            anchor: X_train,
            dropout_f: 1.0
        })
        test_embedding = model1.eval(feed_dict={
            anchor: X_test,
            dropout_f: 1.0
        })
        accuracy = evaluate_test_embedding(train_embedding, y_train,
                                           test_embedding, y_test)
        print('Accuracy given NN approach %0.2f' % (100 * accuracy))

        filter1_weights = sess.run(filters[0])
        for coord, label in zip(train_embedding, y_train):
            f1.write(' '.join([str(a) for a in coord]) + "\n")
            f2.write(str(label) + "\n")

        for coord, label in zip(test_embedding, y_test):
            f1_t.write(' '.join([str(a) for a in coord]) + "\n")
            f2_t.write(str(label) + "\n")

    return accuracy
def test_model(dataset, pool_pctg=.1, layer_size=40, stride_pct=-1):
    tf.reset_default_graph()
    """
    X_train, y_train, X_test, y_test = loadEEG()
    X_val = X_test[:2]
    y_val = y_test[:2]
    X_test = X_test[2:]
    y_test = y_test[2:]
    """
    def compute_accuracy(X, triplet_idxs):
        n_correct = 0.0
        for triplet in triplet_idxs:
            a = np.expand_dims(X[triplet[0]], 0)
            s = np.expand_dims(X[triplet[1]], 0)
            d = np.expand_dims(X[triplet[2]], 0)
            predict_same = distance.eval(feed_dict={
                anchor: a,
                same: s,
                dropout_f: 1.0
            })
            predict_diff = distance.eval(feed_dict={
                anchor: a,
                same: d,
                dropout_f: 1.0
            })
            if predict_same[0][0] < predict_diff[0][0]:
                n_correct += 1.0
        return n_correct / len(triplet_idxs)

    dataset_list = cv_splits_for_dataset(dataset)
    if len(dataset_list) <= n_fold:
        n_fold = 0
    X_train = dataset_list[n_fold].X_train
    y_train = dataset_list[n_fold].y_train
    X_test = dataset_list[n_fold].X_test
    y_test = dataset_list[n_fold].y_test

    if dataset == 'trajectories':
        X_train = [g.T for g in X_train]
        X_test = [g.T for g in X_test]

    n = max([
        np.max([v.shape[0] for v in X_train]),
        np.max([v.shape[0] for v in X_test])
    ])
    X_train = standardize_ts_lengths(X_train, n)
    X_test = standardize_ts_lengths(X_test, n)
    y_train = np.array(y_train)
    y_test = np.array(y_test)

    X_train = normalize_rows(X_train)
    X_test = normalize_rows(X_test)

    labels = np.unique(y_train)
    digit_indices = [np.where(y_train == i)[0] for i in labels]
    tr_trip_idxs = create_triplet_idxs(X_train, digit_indices, labels)
    digit_indices = [np.where(y_test == i)[0] for i in labels]
    te_trip_idxs = create_triplet_idxs(X_test, digit_indices, labels)

    N = X_train.shape[0]
    Ntest = X_test.shape[0]
    D = X_train.shape[1]

    ts_length = X_train.shape[2]
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.001
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               10,
                                               0.1,
                                               staircase=True)
    pool_width = pool_pctg * ts_length

    labels = np.unique(y_train)
    digit_indices = [np.where(y_train == i)[0] for i in labels]
    tr_pairs, tr_y = create_pairs(X_train, digit_indices, labels)
    pos_ind = np.where(tr_y == SAME_LABEL)[0]
    neg_ind = np.where(tr_y == NEG_LABEL)[0]

    # create training+test positive and negative pairs
    anchor = tf.placeholder(tf.float32, shape=([None, D, ts_length]), name='L')
    same = tf.placeholder(tf.float32, shape=([None, D, ts_length]), name='R')
    different = tf.placeholder(tf.float32,
                               shape=([None, D, ts_length]),
                               name='R')
    labels = tf.placeholder(tf.float32, shape=([None]), name='gt')

    #digit_indices = [np.where(y_val == i)[0] for i in labels]
    #val_pairs, val_y = create_pairs(X_val, digit_indices, labels)
    digit_indices = [np.where(y_test == i)[0] for i in labels]
    te_pairs, te_y = create_pairs(X_test, digit_indices, labels)

    # Initializing the variables
    r = 1
    N = tr_pairs.shape[0]
    # the data, shuffled and split between train and test sets

    batch_size = 24
    num_pos = 30
    num_neg = batch_size - num_pos
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.01
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               10,
                                               0.1,
                                               staircase=True)
    # create training+test positive and negative pairs
    images_L = tf.placeholder(tf.float32, shape=([None, ts_length]), name='L')
    images_R = tf.placeholder(tf.float32, shape=([None, ts_length]), name='R')
    labels = tf.placeholder(tf.float32, shape=([None, 1]), name='gt')
    embedding_size = 10
    dropout_f = tf.placeholder("float")
    bn_train = tf.placeholder(tf.bool)

    pool_width = pool_pctg * ts_length
    with tf.variable_scope("siamese") as scope:
        model1, filters = build_conv_net(images_L, bn_train, dropout_f,
                                         ts_length, embedding_size, pool_width)
        scope.reuse_variables()
        model2, _ = build_conv_net(images_R, bn_train, dropout_f, ts_length,
                                   embedding_size, pool_width)

    normalize_model1 = tf.nn.l2_normalize(model1, 0)
    normalize_model2 = tf.nn.l2_normalize(model2, 0)
    cos_similarity = tf.reduce_sum(tf.multiply(normalize_model1,
                                               normalize_model1),
                                   1,
                                   keep_dims=True)

    distance = tf.sqrt(
        tf.reduce_sum(tf.pow(tf.subtract(model1, model2), 2),
                      1,
                      keep_dims=True))
    #distance = 1-scipy.spatial.distance.cosine(model1, model2)
    #loss = contrastive_loss(labels,distance) + regularizer(model1, model2, r)
    #loss = c_loss(labels, model1, model2) + regularizer(model1, model2, r)
    loss = contrastive_loss(labels, distance) + regularizer(model1, model2, r)

    #ugh = c_loss(labels, model1, model2)
    ugh = contrastive_loss(labels, distance)
    #loss = contrastive_loss(labels,distance) + regularizer(model1, model2, r)
    regularization = regularizer(model1, model2, r)
    #contrastice loss
    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if 'l' in var.name]
    batch = tf.Variable(0)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss)
    #optimizer = tf.train.RMSPropOptimizer(0.00001,momentum=0.9,epsilon=1e-6).minimize(loss)
    # Launch the graph

    f1 = open('X_output.txt', 'w')
    f2 = open('X_labels.txt', 'w')
    f1_t = open('X_output_test.txt', 'w')
    f2_t = open('X_labels_test.txt', 'w')

    #filter2_summary = tf.summary.image("Filter_2", filters[1])
    patience_window = 5
    last_vals = [0 for i in range(patience_window)]
    early_stopping = False
    with tf.Session() as sess:

        tf.global_variables_initializer().run()
        summary_writer = tf.summary.FileWriter('/tmp/logs', sess.graph_def)
        #merged = tf.summary.merge_all()

        # Training cycle
        step = 0
        perf_collect = [[], []]
        for epoch in range(700):

            total_c_loss = 0
            for i in range(max(int(np.ceil(N / float(batch_size))), 1)):

                batch_ind = np.arange(i * batch_size,
                                      min((i + 1) * batch_size, N - 1))
                #pos_ind = np.arange(i*num_pos, min((i+1)*num_pos,N-1))
                #neg_ind = np.arange(i*num_neg, min((i+1)*num_neg,N-1))

                #pos_ind = np.random.choice( np.arange(N), num_pos)
                #neg_ind = np.random.choice( np.arange(N), num_neg)
                #batch_ind = np.concatenate((pos_ind, neg_ind))
                #print('VAL ACCURACY %0.2f' % perf_collect[1][-1])

                input1, input2, y = tr_pairs[batch_ind, [0]], tr_pairs[
                    batch_ind, 1], tr_y[batch_ind, np.newaxis]
                _, loss_value, predict, r_loss, c_loss = sess.run(
                    [optimizer, loss, distance, regularization, ugh],
                    feed_dict={
                        images_L: input1,
                        images_R: input2,
                        labels: y,
                        dropout_f: 1.0,
                        bn_train: True
                    })
                total_c_loss += c_loss
                if math.isnan(c_loss):
                    pdb.set_trace()

            if epoch % 400 == 0:

                #tr_acc = compute_accuracy(predict,y)

                print('epoch %d loss %0.5f r_loss %0.5f c_loss %0.5f ' %
                      (epoch, loss_value, r_loss, total_c_loss))

                train_embedding = model1.eval(feed_dict={
                    images_L: X_train,
                    dropout_f: 1.0,
                    bn_train: False
                })
                test_embedding = model1.eval(feed_dict={
                    images_L: X_test,
                    dropout_f: 1.0,
                    bn_train: False
                })
                val_embedding = model1.eval(feed_dict={
                    images_L: X_val,
                    dropout_f: 1.0,
                    bn_train: False
                })
                accuracy = evaluate_test_embedding(train_embedding, y_train,
                                                   test_embedding, y_test)
                val_accuracy = evaluate_test_embedding(train_embedding,
                                                       y_train, val_embedding,
                                                       y_val)
                last_vals[(epoch / 100) % patience_window] = val_accuracy
                if last_vals.count(last_vals[0]) == len(last_vals) and i > 900:
                    early_stopping = True
                print('Accuracy given NN approach %0.2f' % (100 * accuracy))
                print('Val Accuracy given NN approach %0.2f' %
                      (100 * val_accuracy))
            """
            if early_stopping:
                print 'Stopping early'
                break
            """

            #print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))

        # Test model
        """
        y = np.reshape(te_y,(te_y.shape[0],1))
        feature1=model1.eval(feed_dict={images_L:te_pairs[:,0],dropout_f:1.0, bn_train:False})
        feature2=model2.eval(feed_dict={images_R:te_pairs[:,1],dropout_f:1.0, bn_train:False})
        te_acc = compute_accuracy_features(feature1, feature2,te_y)
        print('Accuracy test set %0.2f' % (100 * te_acc))
        """
        train_embedding = model1.eval(feed_dict={
            images_L: X_train,
            dropout_f: 1.0,
            bn_train: False
        })
        test_embedding = model1.eval(feed_dict={
            images_L: X_test,
            dropout_f: 1.0,
            bn_train: False
        })

        accuracy = evaluate_test_embedding(train_embedding, y_train,
                                           test_embedding, y_test)
        print('Accuracy given NN approach %0.2f' % (100 * accuracy))
        return accuracy
Beispiel #10
0
    print("loading features ... ", end='')
    sys.stdout.flush()
    for set_ in [k for k in anno.keys() if k != "tags"]:
        imid_list = sorted(list(anno[set_].keys()))  # only for reproducibility
        X[set_] = None
        Y[set_] = [None for _ in range(len(imid_list))]

        for i, imid in enumerate(imid_list):
            # set image features
            fname = splitext(anno[set_][imid]["file_name"])[0] + ".dat"
            x = load(join(args.features_path, set_, fname))
            if i == 0:
                n_samples = len(imid_list)
                n_dim = len(x)
                X[set_] = np.empty((n_samples, n_dim), dtype=np.float32)
            X[set_][i] = normalize_rows(x.reshape(1, -1)).squeeze()

            # set word embeddings (OOV tags are set to the zero vector)
            tags = anno[set_][imid]["tags"]
            y = [[0] * vec_dim if vecs[w] is None else vecs[w] for w in tags]
            Y[set_][i] = normalize_rows(np.array(y, dtype=np.float32))

    print("done")

    # shuffle debug
    if args.debug:
        idxs = random_state.permutation(len(X["train2014"]))[:1000]
        X["train2014"] = X["train2014"][idxs]
        Y["train2014"] = [Y["train2014"][i] for i in idxs]

    # run