Example #1
0
def main():
    data = load_coco_data(data_path='./data', split='val')
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
        model = CaptionGenerator(word_to_idx,
                                 dim_feature=[196, 512],
                                 dim_embed=512,
                                 dim_hidden=1024,
                                 n_time_step=16,
                                 prev2out=True,
                                 ctx2out=True,
                                 alpha_c=1.0,
                                 selector=True,
                                 dropout=True)
        solver = CaptioningSolver(model,
                                  data,
                                  data,
                                  n_epochs=20,
                                  batch_size=128,
                                  update_rule='adam',
                                  learning_rate=0.001,
                                  print_every=1000,
                                  save_every=1,
                                  image_path='./image/val2014_resized',
                                  pretrained_model=None,
                                  model_path='model/lstmval/',
                                  test_model='model/lstm/model-10',
                                  print_bleu=True,
                                  log_path='log/')

        solver.test(data, split='val')
Example #2
0
def main():
    # load train dataset
    with open("/nfs/private/cas/dataset_0_N/week/train_features.pkl",
              "rb") as f:
        train_features = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/train_labels.pkl", "rb") as f:
        train_labels = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/test_features.pkl",
              "rb") as f:
        test_features = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/test_labels.pkl", "rb") as f:
        test_labels = pickle.load(f)
    word_to_idx = {"<START>": -3, "<END>": -2, "<NULL>": -1}
    dim_feature = train_features.shape[1]
    n_time_step = train_labels.shape[1] - 1
    print "n_time_step:%d" % n_time_step

    model = CaptionGenerator(word_to_idx,
                             V=int(np.max(train_features) + 1),
                             dim_feature=dim_feature,
                             dim_embed=128,
                             dim_hidden=128,
                             n_time_step=n_time_step,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    data = {"features": train_features, "labels": train_labels}
    val_data = {"features": test_features, "labels": test_labels}
    solver = CaptioningSolver(
        model,
        data,
        val_data,
        n_epochs=50000,
        batch_size=100,
        update_rule='adam',
        learning_rate=1e-4,
        print_every=100,
        save_every=10,
        image_path='./image/',
        pretrained_model=None,
        model_path='./model/0_N/cnn/week/',
        test_model=
        '/ais/gobi5/linghuan/basic-attention/model/lstm/lstm/model-19',
        print_bleu=True,
        log_path='./log/')

    solver.train()
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    discrim = Discriminator(word_to_idx,
                            dim_feature=[196, 512],
                            dim_embed=512,
                            dim_hidden=1024,
                            n_time_step=16,
                            prev2out=True,
                            ctx2out=True,
                            alpha_c=0.0,
                            selector=True,
                            dropout=True,
                            learning_rate=0.01)

    solver = CaptioningSolver(model,
                              discrim,
                              data,
                              data,
                              n_epochs=20,
                              batch_size=128,
                              gpu_list="1,2,3",
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              train_new='./model/lstm/model-20',
                              test_model='model/lstm/model-21',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
Example #4
0
    def __init__(self, sess, model_path, use_inception):

        path_prefix = os.path.dirname(os.path.realpath(__file__))
        # word to index mapping
        with open(os.path.join(path_prefix, 'data/train/word_to_idx.pkl'),
                  "rb") as f:
            self.word_to_idx = pickle.load(f)

        if use_inception:
            L = 64
            D = 2048
            cnn_model_path = os.path.join(path_prefix,
                                          'data/inception_v3.ckpt')
        else:
            L = 196
            D = 512
            cnn_model_path = os.path.join(
                path_prefix, './data/imagenet-vgg-verydeep-19.mat')

        self.batch_size = 128
        self.sess = sess
        self.use_inception = use_inception
        print("Creating model...")
        self.model = CaptionGenerator(
            self.word_to_idx,
            dim_feature=[L, D],
            dim_embed=512,
            dim_hidden=1800,
            n_time_step=16,
            prev2out=True,
            ctx2out=True,
            alpha_c=5.0,
            selector=True,
            dropout=True,
            use_cnn="inception" if use_inception else "vgg",
            cnn_model_path=cnn_model_path)

        print("Loading CNN weights...")
        self.model.cnn.load_weights(sess)
        print("Building sampler...")
        self.alphas, self.betas, self.generated_captions = self.model.build_sampler(
            max_len=20)

        # initialize model and load weights
        print("Loading LSTM weights...")
        # tf.global_variables_initializer().run()
        saver = tf.train.Saver(self.model.sampler_vars)
        saver.restore(sess, model_path)
Example #5
0
def main():
    # load train dataset
    # data = load_coco_data(data_path='./our_data', split='train')
    # word_to_idx = data['word_to_idx']
    # # load val dataset to print out bleu scores every epoch
    # test_data = load_coco_data(data_path='./our_data', split='test')
    #our train:
    data = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    our_test = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    word_to_idx = data['word_to_idx']
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[216, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=26,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=False)

    solver = CaptioningSolver(model,
                              data,
                              our_test,
                              n_epochs=5000,
                              batch_size=64,
                              update_rule='adam',
                              learning_rate=1e-4,
                              print_every=1000,
                              save_every=100,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/our_train0414/',
                              test_model='model/our_train0414/model-2000',
                              print_bleu=False,
                              log_path='log/')

    # solver.train()
    solver.test(our_test)
Example #6
0
def main():
    # load train dataset
    print '@@@@@@@@@@@@@'
    data = load_data(data_path='./data', split='train')
    label_to_idx = data['label_to_idx']
    print '@@@@@'
    # load val dataset to print out bleu scores every epoch
    #val_data = load_data(data_path='./data', split='val')
    val_data = []

    print label_to_idx
    print data['labels']

    model = CaptionGenerator(label_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=30,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=1,
                              batch_size=1,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=3,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/')
    print 'model + solver'

    solver.train()
Example #7
0
def main():
    # check if there are new images in pred_folder
    if not os.path.exists(pred_folder):
        os.makedirs(pred_folder)
    new_images = []
    for img in os.listdir(pred_folder):
        if img[0] == '.':
            continue
        new_images.append(img)
    if new_images:
        shutil.rmtree(resized_folder)
        os.makedirs(resized_folder)
        for img in new_images:
            with open(os.path.join(pred_folder, img), 'r+b') as f:
                with Image.open(f) as image:
                    image = resize_image(image)
                    image.save(os.path.join(resized_folder, img))
        shutil.rmtree(pred_folder)
        os.makedirs(pred_folder)

    prep = PreProcessor(batch_size=5,
                        max_length=15,
                        word_count_threshold=1,
                        cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    prep.run('pred')

    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    data = load_data(data_path='./data', split='pred')

    model = CaptionGenerator(word_to_idx,
                             feature_dim=[196, 512],
                             embed_dim=512,
                             hidden_dim=1024,
                             len_sent=16,
                             lamda=1.0)
    solver = CaptioningSolver(model, data, None, n_epochs=5, batch_size=5, learning_rate=0.001, print_every=1,\
     save_every=1, image_path='./image/pred_resized', model_path='./model/lstm', test_model='./model/lstm/model-5')

    return solver.test(
        data, display=True)  # display True to print results in terminal
def main():
    # train_data = load_data(current_path + 'data_set/', 'test')
    # length = len(train_data['video_ids'])
    # train_data['features'] = train_data['features'][:int(0.7 * length)]
    # train_data['labels'] = train_data['labels'][:int(0.7 * length)]
    # train_data['video_ids'] = train_data['video_ids'][:int(0.7 * length)]
    # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)]
    with open('train_data_vgg.pkl', 'rb') as handle:
        train_data = pickle.load(handle)
    # length = len(train_data['new_filename'])
    train_data['features'] = train_data['features']
    train_data['labels'] = train_data['labels']
    train_data['video_ids'] = train_data['new_filename']
    # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)]
    if train_for_test == 1:
        with open('val_data_vgg.pkl', 'rb') as handle:
            val_data = pickle.load(handle)
        # length = len(train_data['new_filename'])
        train_data['features'] = np.concatenate(
            (train_data['features'], val_data['features']), axis=0)
        train_data['labels'] = np.concatenate(
            (train_data['labels'], val_data['labels']), axis=0)
        train_data['video_ids'] = np.concatenate(
            (train_data['new_filename'], val_data['new_filename']), axis=0)
    # train_data = {}

    data = {'train_data': train_data}
    # label_to_idx = load_pickle(current_path + 'data_set/label_to_idx.pkl')
    label_to_idx = load_pickle('labels_to_idx.pkl')
    num_images_per_video = 17

    model = CaptionGenerator(
        label_to_idx=label_to_idx,
        # dim_feature=[49, 1280],
        dim_feature=[196, 512],
        dim_hidden=1024,
        n_time_step=num_images_per_video,
        ctx2out=True,
        alpha_c=1.0,
        selector=True,
        dropout=False)

    solver = CaptioningSolver(model,
                              data,
                              n_epochs=300,
                              batch_size=15,
                              update_rule='adam',
                              learning_rate=0.0006,
                              print_every=3,
                              save_every=10,
                              pretrained_model=None,
                              model_path=current_path + 'model/lstm/',
                              test_model=current_path + 'model/lstm/model-310',
                              log_path=current_path + 'log/',
                              data_path=current_path + '/data_set/',
                              test_result_save_path=current_path +
                              'data_set/test/model_test_result/',
                              models_val_disp=current_path +
                              'model/lstm/models_accuracy_val.txt')

    if train == 1:
        solver.train()
    if validate == 1:
        solver.all_model_val()
    if test == 1:
        solver.test()

    if train_for_test == 1:
        solver.train()
Example #9
0
def main(params):
    loader = LoadData(input_json=params.input_labels,
                      input_h5=params.input_caps,
                      feature_path=params.input_feats,
                      batch_img=params.batch_img,
                      seq_per_img=params.seq_per_img,
                      train_only=params.train_only)

    if params.mode == "EVAL":
        model = CaptionGenerator(loader.word_to_idx,
                                 num_features=params.num_objects,
                                 dim_feature=params.dim_features,
                                 dim_embed=params.dim_word_emb,
                                 dim_hidden=params.rnn_hid_size,
                                 dim_attention=params.att_hid_size,
                                 n_time_step=loader.seq_length - 1)
        solver = CaptioningSolver(data_loader=loader,
                                  model=model,
                                  ngram_file=params.input_ngram,
                                  n_epochs=params.epoch,
                                  update_rule=params.optimizer,
                                  learning_rate=params.lr,
                                  print_every=params.print_every,
                                  start_epoch=params.start_epoch,
                                  log_path=params.log_path,
                                  model_path=params.model_path,
                                  pretrained_model=params.pretrained,
                                  test_model=params.test_model)
        solver.output_result_for_eval()
    elif params.mode == "TEST":
        model = CaptionGenerator(loader.word_to_idx,
                                 num_features=params.num_objects,
                                 dim_feature=params.dim_features,
                                 dim_embed=params.dim_word_emb,
                                 dim_hidden=params.rnn_hid_size,
                                 dim_attention=params.att_hid_size,
                                 n_time_step=loader.seq_length - 1)
        solver = CaptioningSolver(data_loader=loader,
                                  model=model,
                                  ngram_file=params.input_ngram,
                                  n_epochs=params.epoch,
                                  update_rule=params.optimizer,
                                  learning_rate=params.lr,
                                  print_every=params.print_every,
                                  start_epoch=params.start_epoch,
                                  log_path=params.log_path,
                                  model_path=params.model_path,
                                  pretrained_model=params.pretrained,
                                  test_model=params.test_model)
        solver.test()
    elif params.mode == "TRAIN":
        with tf.Graph().as_default():
            model = CaptionGenerator(loader.word_to_idx,
                                     num_features=params.num_objects,
                                     dim_feature=params.dim_features,
                                     dim_embed=params.dim_word_emb,
                                     dim_hidden=params.rnn_hid_size,
                                     dim_attention=params.att_hid_size,
                                     n_time_step=loader.seq_length - 1)
            solver = CaptioningSolver(data_loader=loader,
                                      model=model,
                                      ngram_file=params.input_ngram,
                                      n_epochs=params.epoch,
                                      update_rule=params.optimizer,
                                      learning_rate=params.lr,
                                      print_every=params.print_every,
                                      start_epoch=params.start_epoch,
                                      log_path=params.log_path,
                                      model_path=params.model_path,
                                      pretrained_model=params.pretrained,
                                      test_model=params.test_model)
            solver.train()
        with tf.Graph().as_default():
            model = CaptionGenerator(loader.word_to_idx,
                                     num_features=params.num_objects,
                                     dim_feature=params.dim_features,
                                     dim_embed=params.dim_word_emb,
                                     dim_hidden=params.rnn_hid_size,
                                     dim_attention=params.att_hid_size,
                                     n_time_step=loader.seq_length - 1)
            solver = CaptioningSolver(data_loader=loader,
                                      model=model,
                                      ngram_file=params.input_ngram,
                                      n_epochs=params.epoch,
                                      update_rule=params.optimizer,
                                      learning_rate=params.lr,
                                      print_every=params.print_every,
                                      start_epoch=params.start_epoch,
                                      log_path=params.log_path,
                                      model_path=params.model_path,
                                      pretrained_model=params.pretrained,
                                      test_model=params.test_model)
            solver.train_reinforce()