コード例 #1
0
    def __init__(self):
        self.word_to_idx = utils.load_pickle(Const.vocab_path)
        self.model = CaptionGenerator(self.word_to_idx,
                                      dim_feature=[196, 512],
                                      dim_embed=512,
                                      dim_hidden=1024,
                                      n_time_step=33,
                                      prev2out=True,
                                      ctx2out=True,
                                      alpha_c=1.0,
                                      selector=True,
                                      dropout=True)

        self.n_epochs = TrainingArg.n_epochs
        self.batch_size = TrainingArg.batch_size
        self.update_rule = TrainingArg.update_rule
        self.learning_rate = TrainingArg.learning_rate
        self.print_bleu = TrainingArg.print_bleu
        self.print_every = TrainingArg.print_every
        self.save_every = TrainingArg.save_every
        self.log_path = TrainingArg.log_path
        self.model_path = TrainingArg.model_path
        self.pretrained_model = TrainingArg.pretrained_model
        self.test_model = TrainingArg.test_model
        self.max_words_len = 35

        self.pre_mgr = PreData(vgg19_path=TrainingArg.vgg19_path)  # 数据管理
コード例 #2
0
    def __init__(self, sess, model_path, use_inception):

        path_prefix = os.path.dirname(os.path.realpath(__file__))
        # word to index mapping
        with open(os.path.join(path_prefix, 'data/train/word_to_idx.pkl'),
                  "rb") as f:
            self.word_to_idx = pickle.load(f)

        if use_inception:
            L = 64
            D = 2048
            cnn_model_path = os.path.join(path_prefix,
                                          'data/inception_v3.ckpt')
        else:
            L = 196
            D = 512
            cnn_model_path = os.path.join(
                path_prefix, './data/imagenet-vgg-verydeep-19.mat')

        self.batch_size = 128
        self.sess = sess
        self.use_inception = use_inception
        print("Creating model...")
        self.model = CaptionGenerator(
            self.word_to_idx,
            dim_feature=[L, D],
            dim_embed=512,
            dim_hidden=1800,
            n_time_step=16,
            prev2out=True,
            ctx2out=True,
            alpha_c=5.0,
            selector=True,
            dropout=True,
            use_cnn="inception" if use_inception else "vgg",
            cnn_model_path=cnn_model_path)

        print("Loading CNN weights...")
        self.model.cnn.load_weights(sess)
        print("Building sampler...")
        self.alphas, self.betas, self.generated_captions = self.model.build_sampler(
            max_len=20)

        # initialize model and load weights
        print("Loading LSTM weights...")
        # tf.global_variables_initializer().run()
        saver = tf.train.Saver(self.model.sampler_vars)
        saver.restore(sess, model_path)
コード例 #3
0
def main():

    val_data = load_coco_data(data_path='./data', split='val')

    with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f:
        word_to_idx = pickle.load(f)
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=21,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(
        model,
        val_data,
        n_epochs=100,
        batch_size=128,
        update_rule='adam',
        learning_rate=0.0012,
        print_every=100,
        save_every=5,
        image_path='./image/',
        pretrained_model='train_batch/model0.001/model.ckpt-30',
        model_path='train_batch/model0.002/',
        test_model=None,
        print_bleu=True,
        log_path='train_batch/log/')

    solver.train()
コード例 #4
0
ファイル: train.py プロジェクト: vhvkhoa/show-attend-and-tell
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth],
        dim_embed=FLAGS.embed_dim,
        dim_hidden=FLAGS.lstm_hidden_size,
        n_time_step=FLAGS.time_steps,
        prev2out=FLAGS.prev2out,
        ctx2out=FLAGS.ctx2out,
        alpha_c=1.0,
        enable_selector=FLAGS.enable_selector,
        dropout=FLAGS.dropout)

    solver = CaptioningSolver(model,
                              n_epochs=FLAGS.num_epochs,
                              batch_size=FLAGS.batch_size,
                              update_rule=FLAGS.optimizer,
                              learning_rate=FLAGS.learning_rate,
                              metric=FLAGS.metric,
                              print_every=FLAGS.snapshot_steps,
                              eval_every=FLAGS.eval_steps,
                              pretrained_model=FLAGS.pretrained_model,
                              start_from=FLAGS.start_from,
                              checkpoint_dir=FLAGS.checkpoint_dir,
                              log_path=FLAGS.log_path)

    solver.train(data, val_data, beam_size=FLAGS.beam_size)
コード例 #5
0
def main():
    word_to_idx = load_word_to_idx(data_path='./nusdata', split='train')
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=64,
                             dim_hidden=1024,
                             n_time_step=11,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)
    data_path = './nusdata'
    solver = CaptioningSolver(model,
                              data_path,
                              n_epochs=50,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.0001,
                              print_every=30,
                              save_every=1,
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-1',
                              print_bleu=True,
                              log_path='log/',
                              V=len(word_to_idx),
                              n_time_step=11)
    solver.train()
コード例 #6
0
ファイル: train.py プロジェクト: vicely07/FallDetectionSystem
def main():
    train_data = load_data(current_path + 'data_set/', 'test')
    length = len(train_data['video_ids'])
    train_data['features'] = train_data['features'][:int(0.7 * length)]
    train_data['labels'] = train_data['labels'][:int(0.7 * length)]
    train_data['video_ids'] = train_data['video_ids'][:int(0.7 * length)]
    train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)]

    # train_data = {}

    data = {'train_data': train_data}
    label_to_idx = load_pickle(current_path + 'data_set/label_to_idx.pkl')
    num_images_per_video = 17

    model = CaptionGenerator(label_to_idx=label_to_idx, dim_feature=[196, 512],
                             dim_hidden=1024, n_time_step=num_images_per_video, ctx2out=True,
                             alpha_c=1.0, selector=True, dropout=False)

    solver = CaptioningSolver(model, data, n_epochs=500, batch_size=15, update_rule='adam',
                              learning_rate=0.0006, print_every=3, save_every=10,
                              pretrained_model=None, model_path=current_path + 'model/lstm/',
                              test_model=current_path + 'model/lstm/model-430', log_path=current_path + 'log/',
                              data_path=current_path + '/data_set/',
                              test_result_save_path=current_path + 'data_set/test/model_test_result/',
                              models_val_disp=current_path + 'model/models_accuracy_val.txt')

    solver.train()
    solver.all_model_val()
コード例 #7
0
def main(use_inception):
    # load train dataset
    print "Loading COCO training data..."
    data = load_coco_data(data_path='./data', split='train')
    print "Done!"
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    if use_inception:
        L = 64
        D = 2048
    else:
        L = 196
        D = 512

    from core.solver import CaptioningSolver
    from core.model import CaptionGenerator
    model = CaptionGenerator(word_to_idx, dim_feature=[L, D], dim_embed=512,
                                       dim_hidden=1800, n_time_step=16, prev2out=True, 
                                                 ctx2out=True, alpha_c=5.0, selector=True, dropout=True)

    solver = CaptioningSolver(model, data, val_data, n_epochs=100, batch_size=256, update_rule='adam',
                                          learning_rate=0.0005, print_every=1000, summary_every=10000, save_every=1, image_path='./image/',
                                    pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10',
                                     print_bleu=True, log_path='log/')

    solver.train()
コード例 #8
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data/coco_data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data/coco_data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=20,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=10,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/preview_model',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/preview_model_log/')

    solver.train()
コード例 #9
0
def main():
    data = load_coco_data(data_path='./data', split='val')
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
        model = CaptionGenerator(word_to_idx,
                                 dim_feature=[196, 512],
                                 dim_embed=512,
                                 dim_hidden=1024,
                                 n_time_step=16,
                                 prev2out=True,
                                 ctx2out=True,
                                 alpha_c=1.0,
                                 selector=True,
                                 dropout=True)
        solver = CaptioningSolver(model,
                                  data,
                                  data,
                                  n_epochs=20,
                                  batch_size=128,
                                  update_rule='adam',
                                  learning_rate=0.001,
                                  print_every=1000,
                                  save_every=1,
                                  image_path='./image/val2014_resized',
                                  pretrained_model=None,
                                  model_path='model/lstmval/',
                                  test_model='model/lstm/model-10',
                                  print_bleu=True,
                                  log_path='log/')

        #solver.test(data, split='val')
        solver.test(data, split='test')
コード例 #10
0
def main():

    prep = PreProcessor(batch_size=100,
                        max_length=15,
                        word_count_threshold=1,
                        cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    prep.run('test')

    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    data = load_data(data_path='./data', split='test')

    model = CaptionGenerator(word_to_idx,
                             feature_dim=[196, 512],
                             embed_dim=512,
                             hidden_dim=1024,
                             len_sent=16,
                             lamda=1.0)
    solver = CaptioningSolver(model, data, None, n_epochs=20, batch_size=128, learning_rate=0.001, print_every=1,\
     save_every=1, image_path='./image/test_resized', model_path='./model/lstm', test_model='./model/lstm/model-5')

    solver.test(data, get_bleu=True)

    caption_file = 'data/annotations/captions_test.json'
    with open(caption_file) as f:
        groundtruth = json.load(f)
    with open('groundtruth.txt', 'w') as f:
        for i, ann in enumerate(groundtruth['annotations']):
            f.write(ann['file_name'] + '  ' + ann['caption'] + '\n')
            if (i + 1) % 5 == 0:
                f.write('\n')
コード例 #11
0
    def __init__(self):
        self.word_to_idx = utils.load_pickle(Const.vocab_path)
        self.model = CaptionGenerator(self.word_to_idx,
                                      dim_feature=[196, 512],
                                      dim_embed=512,
                                      dim_hidden=1024,
                                      n_time_step=33,
                                      prev2out=True,
                                      ctx2out=True,
                                      alpha_c=1.0,
                                      selector=True,
                                      dropout=True)

        self.n_epochs = TrainingArg.n_epochs
        self.batch_size = TrainingArg.batch_size
        self.update_rule = TrainingArg.update_rule
        self.learning_rate = TrainingArg.learning_rate
        self.print_bleu = TrainingArg.print_bleu
        self.print_every = TrainingArg.print_every
        self.save_every = TrainingArg.save_every
        self.log_path = TrainingArg.log_path  # FLAGS.log_dir
        self.model_path = TrainingArg.model_path  # FLAGS.output_dir  # TrainingArg.model_path
        self.data_dir = Const.resize_train_out_path  # FLAGS.data_dir
        self.pretrained_model = TrainingArg.pretrained_model
        self.test_model = TrainingArg.test_model
        self.max_words_len = 35

        self.pre_mgr = PreData(vgg19_path=TrainingArg.vgg19_path)  # 数据管理

        # set an optimizer by update rule
        if self.update_rule == 'adam':
            self.optimizer = tf.train.AdamOptimizer
        elif self.update_rule == 'momentum':
            self.optimizer = tf.train.MomentumOptimizer
        elif self.update_rule == 'rmsprop':
            self.optimizer = tf.train.RMSPropOptimizer

        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)
        if not os.path.exists(self.log_path):
            os.makedirs(self.log_path)

        self.org_decoded = {}
        self.val_data_flag = False
コード例 #12
0
def main():
    # check if there are new images in pred_folder
    if not os.path.exists(pred_folder):
        os.makedirs(pred_folder)
    new_images = []
    for img in os.listdir(pred_folder):
        if img[0] == '.':
            continue
        new_images.append(img)
    if new_images:
        shutil.rmtree(resized_folder)
        os.makedirs(resized_folder)
        for img in new_images:
            with open(os.path.join(pred_folder, img), 'r+b') as f:
                with Image.open(f) as image:
                    image = resize_image(image)
                    image.save(os.path.join(resized_folder, img))
        shutil.rmtree(pred_folder)
        os.makedirs(pred_folder)

    prep = PreProcessor(batch_size=100,
                        max_length=15,
                        word_count_threshold=1,
                        cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    prep.run('pred')

    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    data = load_data(data_path='./data', split='pred')

    model = CaptionGenerator(word_to_idx,
                             feature_dim=[196, 512],
                             embed_dim=512,
                             hidden_dim=1024,
                             len_sent=16,
                             lamda=1.0)
    solver = CaptioningSolver(model, data, None, n_epochs=5, batch_size=128, learning_rate=0.001, print_every=1,\
     save_every=1, image_path='./image/pred_resized', model_path='./model/lstm', test_model='./model/lstm/model-5')

    solver.test(data)

    correct_cap = {'cat': 0, 'dog': 0}
    with open('output.txt') as f:
        for line in f.readlines():
            line = line.split('\n')[0]
            name = line.split('#')[0]
            cap = line.split('#')[1]
            animal = 'cat' if name[0] == 'C' else 'dog'
            if animal in cap:
                correct_cap[animal] += 1
    print('Accuracy on cat images: ' + str(correct_cap['cat'] / float(100)))
    print('Accuracy on dog images: ' + str(correct_cap['dog'] / float(100)))
コード例 #13
0
def main():
    start = time.time()
    with open('./data/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    with open('./data/top1k.pkl', 'rb') as f:
        top1k = pickle.load(f)
    captions = {}

    for wnid_idx, (wnid, word) in enumerate(top1k):
        print ' ----- Processing %s, %s, %d / %d' % (wnid, word, wnid_idx,
                                                     len(top1k))

        save_path = './data/imagenet/features/%s.hkl' % wnid
        all_feats = hickle.load(save_path)
        data = {}
        data['features'] = all_feats

        model = CaptionGenerator(word_to_idx,
                                 dim_feature=[196, 512],
                                 dim_embed=512,
                                 dim_hidden=1024,
                                 n_time_step=16,
                                 prev2out=True,
                                 ctx2out=True,
                                 alpha_c=1.0,
                                 selector=True,
                                 dropout=True)
        solver = CaptioningSolver(model,
                                  data,
                                  data,
                                  n_epochs=15,
                                  batch_size=256,
                                  update_rule='adam',
                                  learning_rate=0.0025,
                                  print_every=2000,
                                  save_every=1,
                                  image_path='./data/imagenet/features/%s' %
                                  wnid,
                                  pretrained_model=None,
                                  model_path='./data/model/attention',
                                  test_model='./data/model/attention/model-18',
                                  print_bleu=False,
                                  log_path='./log/')

        captions[wnid] = solver.test_imagenet(all_feats)
        tf.get_variable_scope().reuse_variables()
        end = time.time()
        print(end - start)

    save_pickle(captions, './data/imagenet_top1k_captions.pkl')
    end = time.time()
    print(end - start)
コード例 #14
0
def main():
    # load train dataset
    with open("/nfs/private/cas/dataset_0_N/week/train_features.pkl",
              "rb") as f:
        train_features = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/train_labels.pkl", "rb") as f:
        train_labels = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/test_features.pkl",
              "rb") as f:
        test_features = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/test_labels.pkl", "rb") as f:
        test_labels = pickle.load(f)
    word_to_idx = {"<START>": -3, "<END>": -2, "<NULL>": -1}
    dim_feature = train_features.shape[1]
    n_time_step = train_labels.shape[1] - 1
    print "n_time_step:%d" % n_time_step

    model = CaptionGenerator(word_to_idx,
                             V=int(np.max(train_features) + 1),
                             dim_feature=dim_feature,
                             dim_embed=128,
                             dim_hidden=128,
                             n_time_step=n_time_step,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    data = {"features": train_features, "labels": train_labels}
    val_data = {"features": test_features, "labels": test_labels}
    solver = CaptioningSolver(
        model,
        data,
        val_data,
        n_epochs=50000,
        batch_size=100,
        update_rule='adam',
        learning_rate=1e-4,
        print_every=100,
        save_every=10,
        image_path='./image/',
        pretrained_model=None,
        model_path='./model/0_N/cnn/week/',
        test_model=
        '/ais/gobi5/linghuan/basic-attention/model/lstm/lstm/model-19',
        print_bleu=True,
        log_path='./log/')

    solver.train()
コード例 #15
0
def main():
    word_to_idx = load_word_to_idx(data_path='./nusdata', split='train')
    val_data = load_coco_data(data_path='./nusdata', split='val')
    # test_data = load_coco_data(data_path='./nusdata', split='test')
    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=64,
                            dim_hidden=1024, n_time_step=11, prev2out=True,
                            ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    data_path = './nusdata'
    solver = CaptioningSolver(model, data_path, n_epochs=100, batch_size=1,
                update_rule='adam', learning_rate=0.0001, print_every=100, save_every=1,
                pretrained_model=None, model_path='model/lstm/',
                test_model=('model/lstm/%s' %modelname), print_bleu=True, log_path='log/', 
                V=len(word_to_idx))
    solver.test(val_data, split='val', filename=filename, attention_visualization=False, \
                thres=float(thres))
コード例 #16
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    discrim = Discriminator(word_to_idx,
                            dim_feature=[196, 512],
                            dim_embed=512,
                            dim_hidden=1024,
                            n_time_step=16,
                            prev2out=True,
                            ctx2out=True,
                            alpha_c=0.0,
                            selector=True,
                            dropout=True,
                            learning_rate=0.01)

    solver = CaptioningSolver(model,
                              discrim,
                              data,
                              data,
                              n_epochs=20,
                              batch_size=128,
                              gpu_list="1,2,3",
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              train_new='./model/lstm/model-20',
                              test_model='model/lstm/model-21',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
コード例 #17
0
def main():
    # preprocessing
    prep = PreProcessor(batch_size=100, max_length=15, word_count_threshold=1, cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    for split in ['train', 'dev']:
        prep.run(split, cat_img_num=0) # define the number of additional training cat images in the second argument

    # load train dataset
    train_data = load_data(data_path='./data', split='train')
    dev_data = load_data(data_path='./data', split='dev')
    word_to_idx = train_data['word_to_idx']

    model = CaptionGenerator(word_to_idx, feature_dim=[196, 512], embed_dim=512, hidden_dim=1024, len_sent=16, lamda=1.0)

    solver = CaptioningSolver(model, train_data, dev_data, n_epochs=5, batch_size=128, learning_rate=0.001, print_every=1000, save_every=5, \
     model_path='model/lstm/', test_model='model/lstm/model-5')

    solver.train()
コード例 #18
0
def main():
  data = h5py.File(dataset_path, 'r')

  train_data = data.get('train')

  val_data = data.get('val')

  word_to_index = load_pickle(word_to_index_path)

  model = CaptionGenerator(word_to_index, alpha_c=1.0)

  solver = CaptioningSolver(model, train_data, val_data,
                            n_epochs=20, batch_size=128, update_rule='adam',
                            learning_rate=0.001, print_every=1000, save_every=1,
                            model_path='model/lstm/', test_model='model/lstm/model-10',
                            print_bleu=True, log_path='log/')

  solver.train()
コード例 #19
0
ファイル: train.py プロジェクト: saddiesh/Image_captioning
def main():
    # load train dataset
    # data = load_coco_data(data_path='./our_data', split='train')
    # word_to_idx = data['word_to_idx']
    # # load val dataset to print out bleu scores every epoch
    # test_data = load_coco_data(data_path='./our_data', split='test')
    #our train:
    data = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    our_test = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    word_to_idx = data['word_to_idx']
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[216, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=26,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=False)

    solver = CaptioningSolver(model,
                              data,
                              our_test,
                              n_epochs=5000,
                              batch_size=64,
                              update_rule='adam',
                              learning_rate=1e-4,
                              print_every=1000,
                              save_every=100,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/our_train0414/',
                              test_model='model/our_train0414/model-2000',
                              print_bleu=False,
                              log_path='log/')

    # solver.train()
    solver.test(our_test)
コード例 #20
0
ファイル: train.py プロジェクト: chanmin92/multi-media
def test(toy=None):
    if toy == True:
        toy = "toy_"
    else:
        toy =""
    data_path = os.path.join('./data', 'train')
    with open(os.path.join(data_path, '%sword_to_idx.pkl' % toy), 'rb') as f:
        word_to_idx = pickle.load(f)

    val_data = load_coco_data(data_path='./data', split='val', toy=toy)
    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
                             dim_hidden=512, n_time_step=16, prev2out=True,
                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    solver = CaptioningSolver(model, None, val_data, n_epochs=20, batch_size=128, update_rule='adam',
                              learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/',
                              pretrained_model=None, model_path='model/HighwayLSTM01_lstm/', test_model='model/HighwayLSTM01_lstm/model-20',
                              print_bleu=True, log_path='log/')

    solver.test(val_data)
コード例 #21
0
def main():
    # load train dataset
    print '@@@@@@@@@@@@@'
    data = load_data(data_path='./data', split='train')
    label_to_idx = data['label_to_idx']
    print '@@@@@'
    # load val dataset to print out bleu scores every epoch
    #val_data = load_data(data_path='./data', split='val')
    val_data = []

    print label_to_idx
    print data['labels']

    model = CaptionGenerator(label_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=30,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=1,
                              batch_size=1,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=3,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/')
    print 'model + solver'

    solver.train()
コード例 #22
0
def main():
    # check if there are new images in pred_folder
    if not os.path.exists(pred_folder):
        os.makedirs(pred_folder)
    new_images = []
    for img in os.listdir(pred_folder):
        if img[0] == '.':
            continue
        new_images.append(img)
    if new_images:
        shutil.rmtree(resized_folder)
        os.makedirs(resized_folder)
        for img in new_images:
            with open(os.path.join(pred_folder, img), 'r+b') as f:
                with Image.open(f) as image:
                    image = resize_image(image)
                    image.save(os.path.join(resized_folder, img))
        shutil.rmtree(pred_folder)
        os.makedirs(pred_folder)

    prep = PreProcessor(batch_size=5,
                        max_length=15,
                        word_count_threshold=1,
                        cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    prep.run('pred')

    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    data = load_data(data_path='./data', split='pred')

    model = CaptionGenerator(word_to_idx,
                             feature_dim=[196, 512],
                             embed_dim=512,
                             hidden_dim=1024,
                             len_sent=16,
                             lamda=1.0)
    solver = CaptioningSolver(model, data, None, n_epochs=5, batch_size=5, learning_rate=0.001, print_every=1,\
     save_every=1, image_path='./image/pred_resized', model_path='./model/lstm', test_model='./model/lstm/model-5')

    return solver.test(
        data, display=True)  # display True to print results in terminal
コード例 #23
0
def main(params):
    batch_size = params['batch_size']
    n_epochs = params['epoch']
    n_time_step = params['n_time_step']
    learning_rate = params['lr']
    model_path = params['model_path']
    log_path = params['log_path']

    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=256,
                             dim_hidden=1024,
                             n_time_step=n_time_step,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=n_epochs,
                              batch_size=batch_size,
                              update_rule='adam',
                              learning_rate=learning_rate,
                              print_every=3000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path=model_path,
                              test_model='./model/lstm/model-10',
                              print_bleu=True,
                              log_path=log_path)

    solver.train()
コード例 #24
0
def main():
    # load train dataset
    print "start loading data"
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')
    print "data loaded"
    # Elapse time: 15.95

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[196, 512],
        dim_embed=512,
        dim_hidden=1024,
        n_time_step=16,
        prev2out=True,  # n_time_step is max length + 1
        ctx2out=True,
        alpha_c=1.0,
        selector=True,
        dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=20,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/')
    # change model path accordingly

    solver.train()
コード例 #25
0
def main():
    args = parser.parse_args()
    # load train dataset
    train_data = CocoCaptionDataset(
        caption_file='./data/annotations/captions_train2017.json',
        split='train')
    val_data = CocoCaptionDataset(
        caption_file='./data/annotations/captions_val2017.json', split='val')
    word_to_idx = train_data.get_vocab_dict()
    # load val dataset to print out scores every epoch

    model = CaptionGenerator(
        feature_dim=[args.image_feature_size, args.image_feature_depth],
        embed_dim=args.embed_dim,
        hidden_dim=args.lstm_hidden_size,
        prev2out=args.prev2out,
        len_vocab=len(word_to_idx),
        ctx2out=args.ctx2out,
        enable_selector=args.enable_selector,
        dropout=args.dropout).to(device=args.device)

    solver = CaptioningSolver(model,
                              word_to_idx,
                              train_data,
                              val_data,
                              n_time_steps=args.time_steps,
                              batch_size=args.batch_size,
                              beam_size=args.beam_size,
                              optimizer=args.optimizer,
                              learning_rate=args.learning_rate,
                              metric=args.metric,
                              snapshot_steps=args.snapshot_steps,
                              eval_every=args.eval_steps,
                              checkpoint=args.checkpoint,
                              checkpoint_dir=args.checkpoint_dir,
                              log_path=args.log_path,
                              device=args.device)

    solver.train(num_epochs=args.num_epochs)
コード例 #26
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./new_data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./new_data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_att=[4, 512],
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    idx_to_word = {v: k for k, v in word_to_idx.iteritems()}

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              idx_to_word,
                              n_epochs=15,
                              batch_size=64,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=50,
                              save_every=5,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm2/',
                              test_model='model/lstm2/model-15',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
コード例 #27
0
def main():
	# load train dataset
	data = load_coco_data(data_path='./data', split='train')
	word_to_idx = data['word_to_idx']

	model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
							dim_hidden=1024, n_time_step=16, prev2out=True,
							ctx2out=True, alpha_c=1.0, selector=True, dropout=True)

	dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 12, 16]
	dis_num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100]
	dis_l2_reg_lambda = 0.2

	discrim = Discriminator(sequence_length=16, num_classes=2, vocab_size=len(word_to_idx),
            embedding_size=128, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda)

	solver = CaptioningSolver(model, discrim, data, data, n_epochs=20, batch_size=64, gpu_list="0,1,2", update_rule='adam',
								learning_rate=0.0025, print_every=20, save_every=1, image_path='./image/',
								pretrained_model=None, model_path='model/lstm/', train_new=None,
								test_model='model/lstm/model-42',
								print_bleu=False, log_path='log/', num_rollout=10)

	solver.train_adversarial()
コード例 #28
0
ファイル: infer.py プロジェクト: vhvkhoa/show-attend-and-tell
def main():
    # load dataset and vocab
    data = load_coco_data(data_path='./data', split=FLAGS.split)
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth],
        dim_embed=FLAGS.embed_dim,
        dim_hidden=FLAGS.lstm_hidden_size,
        n_time_step=FLAGS.time_steps,
        prev2out=FLAGS.prev2out,
        ctx2out=FLAGS.ctx2out,
        alpha_c=1.0,
        enable_selector=FLAGS.enable_selector,
        dropout=FLAGS.dropout)

    solver = CaptioningSolver(model,
                              batch_size=FLAGS.batch_size,
                              test_checkpoint=FLAGS.test_checkpoint)

    solver.test(data, beam_size=3, attention_visualization=FLAGS.att_vis)
コード例 #29
0
def main():

    test1_data = {}
    test1_data['features'] = hickle.load('./data/test1/test1.features.hkl')
    test1_data['file_names'] = load_pickle('./data/test1/test1.file.names.pkl')
    print "Fnished loading..."
    with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f:
        word_to_idx = pickle.load(f)
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=21,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(
        model,
        test1_data,
        n_epochs=100,
        batch_size=50,
        update_rule='adam',
        learning_rate=0.001,
        print_every=100,
        save_every=5,
        image_path='./image/',
        test_model='./train_batch/model0.001/model.ckpt-30',
        print_bleu=True,
        log_path='train_batch/log_test/')

    solver.test(test1_data,
                split='test1',
                attention_visualization=False,
                save_sampled_captions=True)
コード例 #30
0
ファイル: test.py プロジェクト: leejk526/show-attend-and-tell
def main():

    data = load_coco_data(data_path='./data', split='val', if_train=True)
    word_to_idx = data['word_to_idx']
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)
    solver = CaptioningSolver(model,
                              data,
                              data,
                              n_epochs=10,
                              batch_size=100,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=5,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm_hard/',
                              test_model='model/lstm_hard/model-40',
                              print_bleu=True,
                              log_path='log/')

    test_data = load_coco_data(data_path='./data',
                               split='test',
                               if_train=False)
    solver.test(test_data, split='test')

    if __name__ == "__main__":
        main()