def get_data(self, task, nb_unlabelled, nb_labelled):

        if task in self.basic_task_names:

            task_index = self.basic_task_names.index(task)

            good_split = False
            nb_tries = 0
            while not good_split:
                nb_tries += 1
                X_train, _, _, _, X_test, y_test = data_utils.get_data_from_TH(self.tile_handler, nb_train=nb_unlabelled, nb_val=0, nb_test=nb_labelled)
                y_test = y_test[:,task_index]

                nb_unique = len(set(y_test))
                if nb_unique == 2:
                    good_split = True
                    return X_train, X_test, y_test

                assert nb_tries < 10, "ERROR: too many attempts required for good data split"

        elif task in self.bongard_task_names:

            bp_num = task.split("_")[1]

            good_split = False
            nb_tries = 0
            while not good_split:
                nb_tries += 1
                X_train, _, _, _, X_test, y_test = data_utils.get_data(self.BPG, bp_num, nb_train=nb_unlabelled, nb_val=0, nb_test=nb_labelled)

                nb_unique = len(set(y_test))
                if nb_unique == 2:
                    good_split = True
                    return X_train, X_test, y_test

                assert nb_tries < 10, "ERROR: too many attempts required for good data split"
Example #2
0
    def visualize_predictions(self):
        """
        :return:
        """
        print('[INFO] Visualization of the results starts')
        if os.path.exists(self.save_path):
            key = input(
                '[INFO] Taget directory already exists. You might lose previously saved images. Continue:Abort (y:n): '
            )
            if not key.lower() == 'y':
                print(
                    '[ABORT] Script stopped running. Images have not been saved'
                )
                sys.exit()
        else:
            os.makedirs(self.save_path)

        if self.test_loader is None:
            _, test_data = get_data(self.opt, use_train=False, use_test=True)
            print("[INFO] %s dataset has been retrieved" % self.dset_name)

            self.test_loader = torch.utils.data.DataLoader(test_data,
                                                           batch_size=1,
                                                           shuffle=False,
                                                           num_workers=1)
            print("[INFO] Test loader for %s dataset has been created" %
                  self.dset_name)

        _, seg_class_num = next(iter(self.dset_info.items()))

        if self.model is None:
            # Read the FuseNet model path that will be used for prediction and load the weights to the initialized model
            self.model = FuseNet(seg_class_num, self.opt.gpu_id,
                                 self.opt.use_class)

            checkpoint = torch.load(self.model_path)
            self.model.load_state_dict(checkpoint['state_dict'])
            print(
                "[INFO] Weights from pretrained FuseNet model has been loaded. Checkpoint: %s"
                % self.model_path)

        self.model.eval()

        test_class_labels = None
        test_class_preds = None

        print(
            "[INFO] Prediction starts. Resulting comparision images will be saved under: %s"
            % self.save_path)
        for num, batch in enumerate(self.test_loader):
            test_rgb_inputs = Variable(batch[0].cuda(self.gpu_device))
            test_depth_inputs = Variable(batch[1].cuda(self.gpu_device))
            test_seg_labels = Variable(batch[2].cuda(self.gpu_device))

            if self.opt.use_class:
                test_class_labels = Variable(batch[3].cuda(self.gpu_device))
                # Predict the pixel-wise classification and scene classification results
                test_seg_outputs, test_class_outputs = self.model(
                    test_rgb_inputs, test_depth_inputs)

                # Take the maximum values from the feature maps produced by the output layers for classification
                # Move the tensors to CPU as numpy arrays
                _, test_class_preds = torch.max(test_class_outputs, 1)
                test_class_labels = test_class_labels.data.cpu().numpy()[0]
                test_class_preds = test_class_preds.data.cpu().numpy()[0]
            else:
                test_seg_outputs = self.model(test_rgb_inputs,
                                              test_depth_inputs)

            # Take the maximum values from the feature maps produced by the output layers for segmentation
            # Move the tensors to CPU as numpy arrays
            _, test_seg_preds = torch.max(test_seg_outputs, 1)
            test_seg_preds = test_seg_preds.data.cpu().numpy()[0]
            test_seg_labels = test_seg_labels.data.cpu().numpy()[0]

            # Horizontally stack the predicted and ground-truth semantic segmentation labels
            comparison_images = np.hstack(
                (np.uint8(test_seg_labels), np.uint8(test_seg_preds + 1)))

            # Move the RGB image from GPU to CPU as numpy array and arrange dimensions appropriately
            test_rgb_inputs = test_rgb_inputs.data.cpu().numpy()[0].transpose(
                1, 2, 0)[:, :, ::-1]

            # Color semantic segmentation labels, print scene classification labels, and save comparison images
            self.paint_and_save(comparison_images, np.uint8(test_rgb_inputs),
                                test_class_labels, test_class_preds, num)

        print('[INFO] All %i images have been saved' % len(self.test_loader))
        print(
            '[COMPLETED] Boring prediction images are now nice and colorful!')
Example #3
0
 # 将数据保存起来
 save_data(configs['all_data_path'], configs['train_data_path'],
           configs['test_data_path'], configs['val_data_path'])
 with codecs.open(configs['all_data_path']) as f:
     content = f.readlines()
 length_list = []
 for line in content:
     line = line.strip()
     if len(line) == 0:
         continue
     length_list.append(len(line))
 # print(length_list)
 max_length = int(np.percentile(length_list, 95))
 logger.info(f"max length: {max_length}")
 json2text(configs['test_data_path'], configs['ptest_x_path'], key='text')
 train_data = get_data(configs['train_data_path'])
 val_data = get_data(configs['val_data_path'])
 train_collections = []
 val_collections = []
 for item in train_data:
     tags = ['O' for _ in range(len(item['text']))]
     label = item['label']
     for tag, info in label.items():
         temp = list(info.values())[0][0]
         from_index = temp[0]
         to_index = temp[1]
         for index in range(from_index, min(to_index + 1, len(tags))):
             tags[index] = f"I_{tag}"
         tags[from_index] = f"B_{tag}"
     # tags = "".join(tags)
     train_collections.append((item['text'], tags))
Example #4
0
from sklearn import svm
from sklearn.metrics import mean_absolute_error
from da_models import model_process
from utils.data_utils import get_data, train_test_split
from utils.metrics import rmse_score
import logging
import time


def base_model():
    return svm.SVR(kernel='rbf')


# load datasets
folder = '../data/'
feature_ns1, feature_ds1, rul_ds1 = get_data(folder + 'xBearing1_1.xlsx', 1490)
feature_ns2, feature_ds2, rul_ds2 = get_data(folder + 'xBearing1_2.xlsx', 827)
feature_ns3, feature_ds3, rul_ds3 = get_data(folder + 'xBearing1_3.xlsx', 1684)
feature_ns4, feature_ds4, rul_ds4 = get_data(folder + 'xBearing1_4.xlsx', 1083)
feature_ns5, feature_ds5, rul_ds5 = get_data(folder + 'xBearing1_5.xlsx', 680)
feature_ns6, feature_ds6, rul_ds6 = get_data(folder + 'xBearing1_6.xlsx', 649)
feature_ns7, feature_ds7, rul_ds7 = get_data(folder + 'xBearing1_7.xlsx', 1026)
print('Data loaded')

# train\test split
feature_ns = [
    feature_ns1, feature_ns2, feature_ns3, feature_ns4, feature_ns5,
    feature_ns6, feature_ns7
]
feature_ds = [
    feature_ds1, feature_ds2, feature_ds3, feature_ds4, feature_ds5,
Example #5
0
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from utils.data_utils import get_data
from sklearn.model_selection import train_test_split
from keras import backend as K
from keras.layers import Activation
from keras.layers import Input, Lambda, Dense, Dropout, Convolution2D, MaxPooling2D, Flatten
from keras.models import Sequential, Model
from keras.optimizers import RMSprop

size = 2
total_sample_size = 10000

X, Y = get_data(total_sample_size)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=.25)


def build_base_network(input_shape):
    seq = Sequential()

    nb_filter = [6, 12]
    kernel_size = 3

    # convolutional layer 1
    seq.add(Convolution2D(nb_filter[0], kernel_size, kernel_size, input_shape=input_shape,
                          border_mode='valid', dim_ordering='th'))
    seq.add(Activation('relu'))
    seq.add(MaxPooling2D(pool_size=(2, 2)))
    seq.add(Dropout(.25))
        def _fitness(learning_rate):
            """
            Hyper-parameters:
            learning_rate:     Learning-rate for the optimizer.
            hidden_dim:  Size of Hidden Dimension
            """

            # Print the hyper-parameters.
            print('learning rate: {0:.1e}'.format(learning_rate))
            print()

            # Dir-name for the TensorBoard log-files.
            log_dir = _log_dir_name(learning_rate, self.model)

            # Create a callback-function for Keras which will be
            # run after each epoch has ended during training.
            # This saves the log-files for TensorBoard.
            # Note that there are complications when histogram_freq=1.
            # It might give strange errors and it also does not properly
            # support Keras data-generators for the validation-set.
            callback_log = TensorBoard(log_dir=log_dir,
                                       histogram_freq=0,
                                       batch_size=32,
                                       write_graph=True,
                                       write_grads=False,
                                       write_images=False)

            model = None
            history = None
            validation_data = None
            # Create the neural network with these hyper-parameters.
            #K.clear_session()
            if self.model == 'toy':

                X = np.random.randint(0, 6, size=(3000, 50))
                Y = np.random.randint(0, 6, size=(3000, 50, 1))

                model = Sequential()
                model.add(Embedding(6, 50, input_length=50))
                model.add(Dense(300, activation='relu'))
                model.add(Dense(6, activation='softmax'))
                model.compile(optimizer='adam',
                              loss='sparse_categorical_crossentropy',
                              metrics=['accuracy'])
                history = model.fit(X,
                                    Y,
                                    epochs=1,
                                    batch_size=1024,
                                    validation_split=0.2,
                                    validation_data=validation_data,
                                    verbose=1,
                                    callbacks=[callback_log] +
                                    self.custom_metrics)
            else:
                if self.model[:4] == "cap2" or self.model[:4] == "vae2":
                    inputs, outputs = None, None
                    datagen, valgen = None, None
                    cap2 = None
                    callbacks = [callback_log]

                    hparams = HParams(
                        learning_rate=learning_rate,
                        hidden_dim=1024,
                        optimizer='adam',
                        dropout=0.5,
                        max_seq_length=self.data_helper.max_caption_len,
                        embed_dim=self.embedding_matrix.shape[-1],
                        num_embeddings=self.embedding_matrix.shape[0],
                        activation='relu',
                        latent_dim=1000)

                    if self.gen == 'train' or self.gen == 'all':
                        data = get_data(self.model, self.data_helper, gen=True)
                        if self.gen == 'all':
                            val_data = get_data(self.model,
                                                self.val_helper,
                                                gen=True)
                        else:
                            val_data = get_data(self.model, self.val_helper)
                    else:
                        data = get_data(self.model, self.data_helper)
                        val_data = get_data(self.model, self.val_helper)

                        # _, X, Y1, Y2 = self.data_helper.cap2cap()
                        # if self.max_samples is not None:
                        #     X, Y1, Y2, = X[:self.max_samples], Y1[:self.max_samples], Y2[:self.max_samples]
                        # Y2 = np.expand_dims(Y2, axis=2)
                        # validation_data=None
                        # inputs = {'encoder_input': X, 'decoder_input': Y1}
                        # outputs = {'decoder_output': Y2}

                    if self.model != 'cap2img':
                        self.custom_metrics[0].validation_data = val_data
                        callbacks += self.custom_metrics
                        # _, X, Y = self.data_helper.cap2resnet()
                        # Y = Y[:,0,:]
                        # inputs = {'encoder_input': X}
                        # outputs = {'projection_output': Y}

                        # _, X, Y1, Y2, Y3 = self.data_helper.cap2all()
                        # #X, Y1, Y2, Y3 = X[:20], Y1[:20], Y2[:20], Y3[:20]
                        # Y2 = np.expand_dims(Y2, axis=2)
                        # Y3 = Y3[:,0,:]
                        # if self.max_samples is not None:
                        #     X, Y1, Y2, Y3 = X[:self.max_samples], Y1[:self.max_samples], Y2[:self.max_samples], Y3[:self.max_samples]
                        # inputs = {'encoder_input': X, 'decoder_input': Y1}
                        # outputs = {'projection_output': Y3, 'decoder_output': Y2}

                    ModelClass = get_model(self.model)
                    model = ModelClass(hparams,
                                       embeddings=self.embedding_matrix)

                    if self.path_load_model is not None:
                        print("Loading model " + self.path_load_model + " ...")
                        model.load_model(self.path_load_model)

                    model.compile(num_gpu=self.gpu)
                    # history = model.fit(inputs,
                    #                 outputs,
                    #                 epochs=3,
                    #                 batch_size=256,
                    #                 validation_split=0.2,
                    #                 validation_data=validation_data,
                    #                 callbacks=callbacks)
                    if model.gpu_model is None:
                        model_to_run = model.model
                    else:
                        model_to_run = model.gpu_model
                    if isinstance(data, keras.utils.Sequence):
                        history = model_to_run.fit_generator(
                            data,
                            epochs=self.epochs,
                            validation_data=val_data,
                            validation_steps=len(val_data),
                            callbacks=callbacks,
                            workers=4,
                            use_multiprocessing=True)
                    elif isinstance(data, tuple):
                        history = model_to_run.fit(x=data[0],
                                                   y=data[1],
                                                   epochs=self.epochs,
                                                   validation_data=val_data,
                                                   callbacks=callbacks,
                                                   batch_size=self.batch_size)

            # Get the classification accuracy on the validation-set
            # after the last training-epoch.
            if self.model != 'cap2img':
                f1 = self.custom_metrics[0].val_f1s[-1]
                print()
                print("Val F1: {0:.2%}".format(f1))
                print()
            else:
                f1 = history.history['val_acc'][-1]
                print()
                print("Val Acc: {0:.2%}".format(f1))
                print()

            # Print the classification accuracy.

            # Save the model if it improves on the best-found performance.
            # We use the global keyword so we update the variable outside
            # of this function.
            # If the classification accuracy of the saved model is improved ...
            print(self.best_f1)
            if f1 > self.best_f1:
                print("saving model at {0}".format(self.path_best_model))
                # Save the new model to harddisk.
                model.model.save(self.path_best_model)
                # Update the classification accuracy.
                self.best_f1 = f1

            # Delete the Keras model with these hyper-parameters from memory.
            del model

            # Clear the Keras session, otherwise it will keep adding new
            # models to the same TensorFlow graph each time we create
            # a model with a different set of hyper-parameters.
            K.clear_session()

            # NOTE: Scikit-optimize does minimization so it tries to
            # find a set of hyper-parameters with the LOWEST fitness-value.
            # Because we are interested in the HIGHEST classification
            # accuracy, we need to negate this number so it can be minimized.
            return -f1
Example #7
0
from utils.utils import print_time_info


if __name__ == '__main__':
    opt = TrainOptions().parse()

    dset_name = os.path.basename(opt.dataroot)
    if dset_name.lower().find('nyu') is not -1:
        dset_info = {'NYU': 40}
    elif dset_name.lower().find('sun') is not -1:
        dset_info = {'SUN': 37}
    else:
        raise NameError('Name of the dataset file should accordingly contain either nyu or sun in it')

    print('[INFO] %s dataset is being processed' % list(dset_info.keys())[0])
    train_data, test_data = get_data(opt, use_train=True, use_test=True)

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=opt.num_workers)
    print("[INFO] Data loaders for %s dataset have been created" % list(dset_info.keys())[0])

    if opt.use_class:
        # Grid search for lambda values
        # Lambda is the coefficient of the classification loss
        # i.e.: total_loss = segmentation_loss + lambda * classification_loss
        start, end, steps = opt.lambda_class_range
        lambdas = torch.linspace(start, end, steps=int(steps)).cuda(opt.gpu_id)

        for i, lam in enumerate(lambdas):
            start_date_time = datetime.datetime.now().replace(microsecond=0)
            print('[INFO] Training session: [%i of %i]' % (i+1, steps))
Example #8
0
def word_embed(args):
    Captions = CocoCaptions(args.data, args.max_samples)
    WV = FilteredGloveVectors()
    Captions.initialize_WV(WV)
    embedding_matrix = WV.get_embedding_matrix()

    if args.model[:4] == "cap2" or args.model[:4] == "vae2":
        inputs, outputs = None, None
        datagen, valgen = None, None
        cap2 = None

        hparams = HParams(learning_rate=args.learning_rate,
                          hidden_dim=1024,
                          optimizer='adam',
                          dropout=0.5,
                          max_seq_length=Captions.max_caption_len,
                          embed_dim=embedding_matrix.shape[-1],
                          num_embeddings=embedding_matrix.shape[0],
                          activation='relu',
                          latent_dim=1000)

        if args.gen == 'train' or args.gen == 'all':
            data = get_data(args.model, Captions, gen=True)
        else:
            data = get_data(args.model, Captions)

        ModelClass = get_model(args.model)
        model = ModelClass(hparams, embeddings=embedding_matrix)

        if args.load is not None:
            print("Loading model " + args.load + " ...")
            if args.model == "vae2all":
                model.load_model(
                    args.load,
                    custom_objects={"KLDivergenceLayer": KLDivergenceLayer})
            else:
                model.load_model(args.load)

        model.compile()

        word_encoder = model.get_word_encoder()
        if isinstance(data, keras.utils.Sequence):
            embeddings = word_encoder.predict_generator(data, verbose=1)

        elif isinstance(data, tuple):
            embeddings = word_encoder.predict(x=data[0], verbose=1)

        X = Captions.ordered_IDs
        print("ordered_X1", len(X), " ")

        new_X = []
        for image_id in X:
            captions = Captions.get_captions(image_id)
            X_group, Y_group = Captions.get_caption_convolutions(captions)
            for c, _ in zip(X_group, Y_group):
                new_X.append((c, image_id))

        print("ordered_X2", len(new_X), " ")
        print("Predicted ", embeddings.shape, " preds")

        embeddings_to_txt(new_X, embeddings, WV)
Example #9
0
def train(args):
    Captions = CocoCaptions(args.data, args.max_samples)
    WV = FilteredGloveVectors()
    Captions.initialize_WV(WV)

    Captions, ValCaptions = Captions.split_train_val()

    embedding_matrix = WV.get_embedding_matrix()
    metrics = Metrics()

    # Print the hyper-parameters.
    print('learning rate: {0:.1e}'.format(args.learning_rate))
    print()

    # Dir-name for the TensorBoard log-files.
    log_dir = log_dir_name(args.learning_rate, args.model)

    # Create a callback-function for Keras which will be
    # run after each epoch has ended during training.
    # This saves the log-files for TensorBoard.
    # Note that there are complications when histogram_freq=1.
    # It might give strange errors and it also does not properly
    # support Keras data-generators for the validation-set.
    callback_log = TensorBoard(log_dir=log_dir,
                               histogram_freq=0,
                               batch_size=32,
                               write_graph=True,
                               write_grads=False,
                               write_images=False)

    model = None
    history = None
    validation_data = None

    if args.model == 'toy':

        X = np.random.randint(0, 6, size=(3000, 50))
        Y = np.random.randint(0, 6, size=(3000, 50, 1))

        model = Sequential()
        model.add(Embedding(6, 50, input_length=50))
        model.add(Dense(300, activation='relu'))
        model.add(Dense(6, activation='softmax'))
        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        history = model.fit(X,
                            Y,
                            epochs=1,
                            batch_size=1024,
                            validation_split=0.2,
                            validation_data=validation_data,
                            callbacks=[callback_log] + [metrics])
    else:
        if args.model[:4] == "cap2" or args.model[:4] == "vae2":
            inputs, ordered_outputs = None, None
            datagen, valgen = None, None
            cap2 = None
            callbacks = [callback_log]

            hparams = HParams(learning_rate=args.learning_rate,
                              hidden_dim=1024,
                              optimizer='adam',
                              dropout=0.5,
                              max_seq_length=Captions.max_caption_len,
                              embed_dim=embedding_matrix.shape[-1],
                              num_embeddings=embedding_matrix.shape[0],
                              activation='relu',
                              latent_dim=1000)

            if args.gen == 'train' or args.gen == 'all':
                data = get_data(args.model, Captions, gen=True)
                if args.gen == 'all':
                    val_data = get_data(args.model, ValCaptions, gen=True)
                else:
                    val_data = get_data(args.model, ValCaptions)
            else:
                data = get_data(args.model, Captions)
                val_data = get_data(args.model, ValCaptions)

            if args.model is not 'cap2img':
                metrics.validation_data = val_data
                callbacks += [metrics]

            ModelClass = get_model(args.model)
            model = ModelClass(hparams, embeddings=embedding_matrix)

            if args.load is not None:
                print("Loading model " + args.load + " ...")
                model.load_model(args.load)

            model.compile()

            if isinstance(data, keras.utils.Sequence):
                history = model.model.fit_generator(
                    data,
                    epochs=args.epochs,
                    validation_data=val_data,
                    callbacks=callbacks,
                )
            elif isinstance(data, tuple):
                history = model.model.fit(
                    x=data[0],
                    y=data[1],
                    epochs=args.epochs,
                    validation_data=val_data,
                    callbacks=callbacks,
                )

    # Get the classification accuracy on the validation-set
    # after the last training-epoch.
    if args.model != 'cap2img':
        f1 = metrics[0].val_f1s[-1]
        print()
        print("Val F1: {0:.2%}".format(f1))
        print()
    else:
        f1 = history.history['val_acc'][-1]
        print()
        print("Val Acc: {0:.2%}".format(f1))
        print()

    # Print the classification accuracy.

    # Save the model if it improves on the best-found performance.
    # We use the global keyword so we update the variable outside
    # of this function.
    # If the classification accuracy of the saved model is improved ...
    print("saving model at {0}".format(args.path))
    # Save the new model to harddisk.
    model.save(args.path)
    # Update the classification accuracy.

    # Delete the Keras model with these hyper-parameters from memory.
    del model

    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    K.clear_session()
Example #10
0
def encode(args):

    Captions = CocoCaptions(args.data, args.max_samples)
    WV = FilteredGloveVectors()
    Captions.initialize_WV(WV)
    embedding_matrix = WV.get_embedding_matrix()

    if args.model[:4] == "cap2" or args.model[:4] == "vae2":
        inputs, outputs = None, None
        datagen, valgen = None, None
        cap2 = None

        hparams = HParams(learning_rate=args.learning_rate,
                          hidden_dim=1024,
                          optimizer='adam',
                          dropout=0.5,
                          max_seq_length=Captions.max_caption_len,
                          embed_dim=embedding_matrix.shape[-1],
                          num_embeddings=embedding_matrix.shape[0],
                          activation='relu',
                          latent_dim=1000)

        if args.gen == 'train' or args.gen == 'all':
            data = get_data(args.model, Captions, gen=True)
        else:
            data = get_data(args.model, Captions)

        ModelClass = get_model(args.model)
        model = ModelClass(hparams, embeddings=embedding_matrix)

        if args.load is not None:
            print("Loading model " + args.load + " ...")
            if args.model == "vae2all":
                model.load_model(
                    args.load,
                    custom_objects={"KLDivergenceLayer": KLDivergenceLayer})
            else:
                model.load_model(args.load)

        model.compile()

        encoder = model.get_encoder()

        if isinstance(data, keras.utils.Sequence):
            if args.model == "vae2all":
                pred_names = [output.name for output in encoder.output_layers]
                preds, mean, variance = encoder.predict_generator(data,
                                                                  verbose=1)
            else:
                preds = encoder.predict_generator(data, verbose=1)

        elif isinstance(data, tuple):
            if args.model == "vae2all":
                pred_names = [output.name for output in encoder.output_layers]
                preds, mean, variance = encoder.predict(x=data[0], verbose=1)
            else:
                preds = encoder.predict(x=data[0], verbose=1)

        X = Captions.ordered_IDs
        print("ordered_X1", len(X), " ")

        new_X = []
        for image_id in X:
            captions = Captions.get_captions(image_id)
            X_group, Y_group = Captions.get_caption_convolutions(captions)
            for c, _ in zip(X_group, Y_group):
                new_X.append((c, image_id))

        print("ordered_X2", len(new_X), " ")
        print("Predicted ", len(preds), " preds")

        output = []

        for x, y in zip(new_X, preds):
            c, image_id = x
            sentence = Captions.WV.indices_to_words(c)
            sentence = " ".join(sentence[1:-1])
            resnet = Captions.get_resnet_output(image_id)
            output.append((sentence, resnet, y))

        print("U ", len(output), " outputs")

        save_loc = base_fp + "/skip-thoughts/our_model_encodings.pkl"
        pkl.dump(output, open(save_loc, "wb+"), 2)
        print("Output saved")