def main():

    prep = PreProcessor(batch_size=100,
                        max_length=15,
                        word_count_threshold=1,
                        cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    prep.run('test')

    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    data = load_data(data_path='./data', split='test')

    model = CaptionGenerator(word_to_idx,
                             feature_dim=[196, 512],
                             embed_dim=512,
                             hidden_dim=1024,
                             len_sent=16,
                             lamda=1.0)
    solver = CaptioningSolver(model, data, None, n_epochs=20, batch_size=128, learning_rate=0.001, print_every=1,\
     save_every=1, image_path='./image/test_resized', model_path='./model/lstm', test_model='./model/lstm/model-5')

    solver.test(data, get_bleu=True)

    caption_file = 'data/annotations/captions_test.json'
    with open(caption_file) as f:
        groundtruth = json.load(f)
    with open('groundtruth.txt', 'w') as f:
        for i, ann in enumerate(groundtruth['annotations']):
            f.write(ann['file_name'] + '  ' + ann['caption'] + '\n')
            if (i + 1) % 5 == 0:
                f.write('\n')
Beispiel #2
0
def main():

    val_data = load_coco_data(data_path='./data', split='val')

    with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f:
        word_to_idx = pickle.load(f)
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=21,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(
        model,
        val_data,
        n_epochs=100,
        batch_size=128,
        update_rule='adam',
        learning_rate=0.0012,
        print_every=100,
        save_every=5,
        image_path='./image/',
        pretrained_model='train_batch/model0.001/model.ckpt-30',
        model_path='train_batch/model0.002/',
        test_model=None,
        print_bleu=True,
        log_path='train_batch/log/')

    solver.train()
Beispiel #3
0
def main():
    data = load_coco_data(data_path='./data', split='val')
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
        model = CaptionGenerator(word_to_idx,
                                 dim_feature=[196, 512],
                                 dim_embed=512,
                                 dim_hidden=1024,
                                 n_time_step=16,
                                 prev2out=True,
                                 ctx2out=True,
                                 alpha_c=1.0,
                                 selector=True,
                                 dropout=True)
        solver = CaptioningSolver(model,
                                  data,
                                  data,
                                  n_epochs=20,
                                  batch_size=128,
                                  update_rule='adam',
                                  learning_rate=0.001,
                                  print_every=1000,
                                  save_every=1,
                                  image_path='./image/val2014_resized',
                                  pretrained_model=None,
                                  model_path='model/lstmval/',
                                  test_model='model/lstm/model-10',
                                  print_bleu=True,
                                  log_path='log/')

        #solver.test(data, split='val')
        solver.test(data, split='test')
def main():
    # load train dataset
    data = load_coco_data(data_path='./data/coco_data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data/coco_data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=20,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=10,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/preview_model',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/preview_model_log/')

    solver.train()
def main(use_inception):
    # load train dataset
    print "Loading COCO training data..."
    data = load_coco_data(data_path='./data', split='train')
    print "Done!"
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    if use_inception:
        L = 64
        D = 2048
    else:
        L = 196
        D = 512

    from core.solver import CaptioningSolver
    from core.model import CaptionGenerator
    model = CaptionGenerator(word_to_idx, dim_feature=[L, D], dim_embed=512,
                                       dim_hidden=1800, n_time_step=16, prev2out=True, 
                                                 ctx2out=True, alpha_c=5.0, selector=True, dropout=True)

    solver = CaptioningSolver(model, data, val_data, n_epochs=100, batch_size=256, update_rule='adam',
                                          learning_rate=0.0005, print_every=1000, summary_every=10000, save_every=1, image_path='./image/',
                                    pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10',
                                     print_bleu=True, log_path='log/')

    solver.train()
def main():
    word_to_idx = load_word_to_idx(data_path='./nusdata', split='train')
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=64,
                             dim_hidden=1024,
                             n_time_step=11,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)
    data_path = './nusdata'
    solver = CaptioningSolver(model,
                              data_path,
                              n_epochs=50,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.0001,
                              print_every=30,
                              save_every=1,
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-1',
                              print_bleu=True,
                              log_path='log/',
                              V=len(word_to_idx),
                              n_time_step=11)
    solver.train()
Beispiel #7
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth],
        dim_embed=FLAGS.embed_dim,
        dim_hidden=FLAGS.lstm_hidden_size,
        n_time_step=FLAGS.time_steps,
        prev2out=FLAGS.prev2out,
        ctx2out=FLAGS.ctx2out,
        alpha_c=1.0,
        enable_selector=FLAGS.enable_selector,
        dropout=FLAGS.dropout)

    solver = CaptioningSolver(model,
                              n_epochs=FLAGS.num_epochs,
                              batch_size=FLAGS.batch_size,
                              update_rule=FLAGS.optimizer,
                              learning_rate=FLAGS.learning_rate,
                              metric=FLAGS.metric,
                              print_every=FLAGS.snapshot_steps,
                              eval_every=FLAGS.eval_steps,
                              pretrained_model=FLAGS.pretrained_model,
                              start_from=FLAGS.start_from,
                              checkpoint_dir=FLAGS.checkpoint_dir,
                              log_path=FLAGS.log_path)

    solver.train(data, val_data, beam_size=FLAGS.beam_size)
Beispiel #8
0
def main():
    # check if there are new images in pred_folder
    if not os.path.exists(pred_folder):
        os.makedirs(pred_folder)
    new_images = []
    for img in os.listdir(pred_folder):
        if img[0] == '.':
            continue
        new_images.append(img)
    if new_images:
        shutil.rmtree(resized_folder)
        os.makedirs(resized_folder)
        for img in new_images:
            with open(os.path.join(pred_folder, img), 'r+b') as f:
                with Image.open(f) as image:
                    image = resize_image(image)
                    image.save(os.path.join(resized_folder, img))
        shutil.rmtree(pred_folder)
        os.makedirs(pred_folder)

    prep = PreProcessor(batch_size=100,
                        max_length=15,
                        word_count_threshold=1,
                        cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    prep.run('pred')

    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    data = load_data(data_path='./data', split='pred')

    model = CaptionGenerator(word_to_idx,
                             feature_dim=[196, 512],
                             embed_dim=512,
                             hidden_dim=1024,
                             len_sent=16,
                             lamda=1.0)
    solver = CaptioningSolver(model, data, None, n_epochs=5, batch_size=128, learning_rate=0.001, print_every=1,\
     save_every=1, image_path='./image/pred_resized', model_path='./model/lstm', test_model='./model/lstm/model-5')

    solver.test(data)

    correct_cap = {'cat': 0, 'dog': 0}
    with open('output.txt') as f:
        for line in f.readlines():
            line = line.split('\n')[0]
            name = line.split('#')[0]
            cap = line.split('#')[1]
            animal = 'cat' if name[0] == 'C' else 'dog'
            if animal in cap:
                correct_cap[animal] += 1
    print('Accuracy on cat images: ' + str(correct_cap['cat'] / float(100)))
    print('Accuracy on dog images: ' + str(correct_cap['dog'] / float(100)))
Beispiel #9
0
def main():
    start = time.time()
    with open('./data/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    with open('./data/top1k.pkl', 'rb') as f:
        top1k = pickle.load(f)
    captions = {}

    for wnid_idx, (wnid, word) in enumerate(top1k):
        print ' ----- Processing %s, %s, %d / %d' % (wnid, word, wnid_idx,
                                                     len(top1k))

        save_path = './data/imagenet/features/%s.hkl' % wnid
        all_feats = hickle.load(save_path)
        data = {}
        data['features'] = all_feats

        model = CaptionGenerator(word_to_idx,
                                 dim_feature=[196, 512],
                                 dim_embed=512,
                                 dim_hidden=1024,
                                 n_time_step=16,
                                 prev2out=True,
                                 ctx2out=True,
                                 alpha_c=1.0,
                                 selector=True,
                                 dropout=True)
        solver = CaptioningSolver(model,
                                  data,
                                  data,
                                  n_epochs=15,
                                  batch_size=256,
                                  update_rule='adam',
                                  learning_rate=0.0025,
                                  print_every=2000,
                                  save_every=1,
                                  image_path='./data/imagenet/features/%s' %
                                  wnid,
                                  pretrained_model=None,
                                  model_path='./data/model/attention',
                                  test_model='./data/model/attention/model-18',
                                  print_bleu=False,
                                  log_path='./log/')

        captions[wnid] = solver.test_imagenet(all_feats)
        tf.get_variable_scope().reuse_variables()
        end = time.time()
        print(end - start)

    save_pickle(captions, './data/imagenet_top1k_captions.pkl')
    end = time.time()
    print(end - start)
Beispiel #10
0
def main():
    # load train dataset
    with open("/nfs/private/cas/dataset_0_N/week/train_features.pkl",
              "rb") as f:
        train_features = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/train_labels.pkl", "rb") as f:
        train_labels = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/test_features.pkl",
              "rb") as f:
        test_features = pickle.load(f)
    with open("/nfs/private/cas/dataset_0_N/week/test_labels.pkl", "rb") as f:
        test_labels = pickle.load(f)
    word_to_idx = {"<START>": -3, "<END>": -2, "<NULL>": -1}
    dim_feature = train_features.shape[1]
    n_time_step = train_labels.shape[1] - 1
    print "n_time_step:%d" % n_time_step

    model = CaptionGenerator(word_to_idx,
                             V=int(np.max(train_features) + 1),
                             dim_feature=dim_feature,
                             dim_embed=128,
                             dim_hidden=128,
                             n_time_step=n_time_step,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    data = {"features": train_features, "labels": train_labels}
    val_data = {"features": test_features, "labels": test_labels}
    solver = CaptioningSolver(
        model,
        data,
        val_data,
        n_epochs=50000,
        batch_size=100,
        update_rule='adam',
        learning_rate=1e-4,
        print_every=100,
        save_every=10,
        image_path='./image/',
        pretrained_model=None,
        model_path='./model/0_N/cnn/week/',
        test_model=
        '/ais/gobi5/linghuan/basic-attention/model/lstm/lstm/model-19',
        print_bleu=True,
        log_path='./log/')

    solver.train()
Beispiel #11
0
def main():
    word_to_idx = load_word_to_idx(data_path='./nusdata', split='train')
    val_data = load_coco_data(data_path='./nusdata', split='val')
    # test_data = load_coco_data(data_path='./nusdata', split='test')
    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=64,
                            dim_hidden=1024, n_time_step=11, prev2out=True,
                            ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    data_path = './nusdata'
    solver = CaptioningSolver(model, data_path, n_epochs=100, batch_size=1,
                update_rule='adam', learning_rate=0.0001, print_every=100, save_every=1,
                pretrained_model=None, model_path='model/lstm/',
                test_model=('model/lstm/%s' %modelname), print_bleu=True, log_path='log/', 
                V=len(word_to_idx))
    solver.test(val_data, split='val', filename=filename, attention_visualization=False, \
                thres=float(thres))
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    discrim = Discriminator(word_to_idx,
                            dim_feature=[196, 512],
                            dim_embed=512,
                            dim_hidden=1024,
                            n_time_step=16,
                            prev2out=True,
                            ctx2out=True,
                            alpha_c=0.0,
                            selector=True,
                            dropout=True,
                            learning_rate=0.01)

    solver = CaptioningSolver(model,
                              discrim,
                              data,
                              data,
                              n_epochs=20,
                              batch_size=128,
                              gpu_list="1,2,3",
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              train_new='./model/lstm/model-20',
                              test_model='model/lstm/model-21',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
def main():
    # preprocessing
    prep = PreProcessor(batch_size=100, max_length=15, word_count_threshold=1, cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    for split in ['train', 'dev']:
        prep.run(split, cat_img_num=0) # define the number of additional training cat images in the second argument

    # load train dataset
    train_data = load_data(data_path='./data', split='train')
    dev_data = load_data(data_path='./data', split='dev')
    word_to_idx = train_data['word_to_idx']

    model = CaptionGenerator(word_to_idx, feature_dim=[196, 512], embed_dim=512, hidden_dim=1024, len_sent=16, lamda=1.0)

    solver = CaptioningSolver(model, train_data, dev_data, n_epochs=5, batch_size=128, learning_rate=0.001, print_every=1000, save_every=5, \
     model_path='model/lstm/', test_model='model/lstm/model-5')

    solver.train()
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
                                       dim_hidden=1024, n_time_step=16, prev2out=True, 
                                                 ctx2out=True, alpha_c=1.0, selector=True, dropout=True)

    solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam',
                                          learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/',
                                    pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10',
                                     print_bleu=True, log_path='log/')

    solver.train()
Beispiel #15
0
def main():
  data = h5py.File(dataset_path, 'r')

  train_data = data.get('train')

  val_data = data.get('val')

  word_to_index = load_pickle(word_to_index_path)

  model = CaptionGenerator(word_to_index, alpha_c=1.0)

  solver = CaptioningSolver(model, train_data, val_data,
                            n_epochs=20, batch_size=128, update_rule='adam',
                            learning_rate=0.001, print_every=1000, save_every=1,
                            model_path='model/lstm/', test_model='model/lstm/model-10',
                            print_bleu=True, log_path='log/')

  solver.train()
Beispiel #16
0
def main():
    # load train dataset
    # data = load_coco_data(data_path='./our_data', split='train')
    # word_to_idx = data['word_to_idx']
    # # load val dataset to print out bleu scores every epoch
    # test_data = load_coco_data(data_path='./our_data', split='test')
    #our train:
    data = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    our_test = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    word_to_idx = data['word_to_idx']
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[216, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=26,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=False)

    solver = CaptioningSolver(model,
                              data,
                              our_test,
                              n_epochs=5000,
                              batch_size=64,
                              update_rule='adam',
                              learning_rate=1e-4,
                              print_every=1000,
                              save_every=100,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/our_train0414/',
                              test_model='model/our_train0414/model-2000',
                              print_bleu=False,
                              log_path='log/')

    # solver.train()
    solver.test(our_test)
Beispiel #17
0
def test(toy=None):
    if toy == True:
        toy = "toy_"
    else:
        toy =""
    data_path = os.path.join('./data', 'train')
    with open(os.path.join(data_path, '%sword_to_idx.pkl' % toy), 'rb') as f:
        word_to_idx = pickle.load(f)

    val_data = load_coco_data(data_path='./data', split='val', toy=toy)
    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
                             dim_hidden=512, n_time_step=16, prev2out=True,
                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    solver = CaptioningSolver(model, None, val_data, n_epochs=20, batch_size=128, update_rule='adam',
                              learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/',
                              pretrained_model=None, model_path='model/HighwayLSTM01_lstm/', test_model='model/HighwayLSTM01_lstm/model-20',
                              print_bleu=True, log_path='log/')

    solver.test(val_data)
Beispiel #18
0
def main():
    # load train dataset
    print '@@@@@@@@@@@@@'
    data = load_data(data_path='./data', split='train')
    label_to_idx = data['label_to_idx']
    print '@@@@@'
    # load val dataset to print out bleu scores every epoch
    #val_data = load_data(data_path='./data', split='val')
    val_data = []

    print label_to_idx
    print data['labels']

    model = CaptionGenerator(label_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=30,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=1,
                              batch_size=1,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=3,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/')
    print 'model + solver'

    solver.train()
Beispiel #19
0
def main():
    # check if there are new images in pred_folder
    if not os.path.exists(pred_folder):
        os.makedirs(pred_folder)
    new_images = []
    for img in os.listdir(pred_folder):
        if img[0] == '.':
            continue
        new_images.append(img)
    if new_images:
        shutil.rmtree(resized_folder)
        os.makedirs(resized_folder)
        for img in new_images:
            with open(os.path.join(pred_folder, img), 'r+b') as f:
                with Image.open(f) as image:
                    image = resize_image(image)
                    image.save(os.path.join(resized_folder, img))
        shutil.rmtree(pred_folder)
        os.makedirs(pred_folder)

    prep = PreProcessor(batch_size=5,
                        max_length=15,
                        word_count_threshold=1,
                        cnn_model_path='data/imagenet-vgg-verydeep-19.mat')
    prep.run('pred')

    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    data = load_data(data_path='./data', split='pred')

    model = CaptionGenerator(word_to_idx,
                             feature_dim=[196, 512],
                             embed_dim=512,
                             hidden_dim=1024,
                             len_sent=16,
                             lamda=1.0)
    solver = CaptioningSolver(model, data, None, n_epochs=5, batch_size=5, learning_rate=0.001, print_every=1,\
     save_every=1, image_path='./image/pred_resized', model_path='./model/lstm', test_model='./model/lstm/model-5')

    return solver.test(
        data, display=True)  # display True to print results in terminal
Beispiel #20
0
def main(params):
    batch_size = params['batch_size']
    n_epochs = params['epoch']
    n_time_step = params['n_time_step']
    learning_rate = params['lr']
    model_path = params['model_path']
    log_path = params['log_path']

    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=256,
                             dim_hidden=1024,
                             n_time_step=n_time_step,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=n_epochs,
                              batch_size=batch_size,
                              update_rule='adam',
                              learning_rate=learning_rate,
                              print_every=3000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path=model_path,
                              test_model='./model/lstm/model-10',
                              print_bleu=True,
                              log_path=log_path)

    solver.train()
Beispiel #21
0
def main():
    train_data = load_data(current_path + 'data_set/', 'test')
    length = len(train_data['video_ids'])
    train_data['features'] = train_data['features'][:int(0.7 * length)]
    train_data['labels'] = train_data['labels'][:int(0.7 * length)]
    train_data['video_ids'] = train_data['video_ids'][:int(0.7 * length)]
    train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)]

    # train_data = {}

    data = {'train_data': train_data}
    label_to_idx = load_pickle(current_path + 'data_set/label_to_idx.pkl')
    num_images_per_video = 17

    model = CaptionGenerator(label_to_idx=label_to_idx, dim_feature=[196, 512],
                             dim_hidden=1024, n_time_step=num_images_per_video, ctx2out=True,
                             alpha_c=1.0, selector=True, dropout=False)

    solver = CaptioningSolver(model, data, n_epochs=500, batch_size=15, update_rule='adam',
                              learning_rate=0.0006, print_every=3, save_every=10,
                              pretrained_model=None, model_path=current_path + 'model/lstm/',
                              test_model=current_path + 'model/lstm/model-430', log_path=current_path + 'log/',
                              data_path=current_path + '/data_set/',
                              test_result_save_path=current_path + 'data_set/test/model_test_result/',
                              models_val_disp=current_path + 'model/models_accuracy_val.txt')

    solver.train()
    solver.all_model_val()
Beispiel #22
0
def main():
    # load train dataset
    print "start loading data"
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')
    print "data loaded"
    # Elapse time: 15.95

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[196, 512],
        dim_embed=512,
        dim_hidden=1024,
        n_time_step=16,
        prev2out=True,  # n_time_step is max length + 1
        ctx2out=True,
        alpha_c=1.0,
        selector=True,
        dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=20,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/')
    # change model path accordingly

    solver.train()
Beispiel #23
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./new_data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./new_data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_att=[4, 512],
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    idx_to_word = {v: k for k, v in word_to_idx.iteritems()}

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              idx_to_word,
                              n_epochs=15,
                              batch_size=64,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=50,
                              save_every=5,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm2/',
                              test_model='model/lstm2/model-15',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
def main():
    args = parser.parse_args()
    # load train dataset
    train_data = CocoCaptionDataset(
        caption_file='./data/annotations/captions_train2017.json',
        split='train')
    val_data = CocoCaptionDataset(
        caption_file='./data/annotations/captions_val2017.json', split='val')
    word_to_idx = train_data.get_vocab_dict()
    # load val dataset to print out scores every epoch

    model = CaptionGenerator(
        feature_dim=[args.image_feature_size, args.image_feature_depth],
        embed_dim=args.embed_dim,
        hidden_dim=args.lstm_hidden_size,
        prev2out=args.prev2out,
        len_vocab=len(word_to_idx),
        ctx2out=args.ctx2out,
        enable_selector=args.enable_selector,
        dropout=args.dropout).to(device=args.device)

    solver = CaptioningSolver(model,
                              word_to_idx,
                              train_data,
                              val_data,
                              n_time_steps=args.time_steps,
                              batch_size=args.batch_size,
                              beam_size=args.beam_size,
                              optimizer=args.optimizer,
                              learning_rate=args.learning_rate,
                              metric=args.metric,
                              snapshot_steps=args.snapshot_steps,
                              eval_every=args.eval_steps,
                              checkpoint=args.checkpoint,
                              checkpoint_dir=args.checkpoint_dir,
                              log_path=args.log_path,
                              device=args.device)

    solver.train(num_epochs=args.num_epochs)
Beispiel #25
0
def main():
    # load dataset and vocab
    data = load_coco_data(data_path='./data', split=FLAGS.split)
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth],
        dim_embed=FLAGS.embed_dim,
        dim_hidden=FLAGS.lstm_hidden_size,
        n_time_step=FLAGS.time_steps,
        prev2out=FLAGS.prev2out,
        ctx2out=FLAGS.ctx2out,
        alpha_c=1.0,
        enable_selector=FLAGS.enable_selector,
        dropout=FLAGS.dropout)

    solver = CaptioningSolver(model,
                              batch_size=FLAGS.batch_size,
                              test_checkpoint=FLAGS.test_checkpoint)

    solver.test(data, beam_size=3, attention_visualization=FLAGS.att_vis)
def main():
	# load train dataset
	data = load_coco_data(data_path='./data', split='train')
	word_to_idx = data['word_to_idx']

	model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
							dim_hidden=1024, n_time_step=16, prev2out=True,
							ctx2out=True, alpha_c=1.0, selector=True, dropout=True)

	dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 12, 16]
	dis_num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100]
	dis_l2_reg_lambda = 0.2

	discrim = Discriminator(sequence_length=16, num_classes=2, vocab_size=len(word_to_idx),
            embedding_size=128, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda)

	solver = CaptioningSolver(model, discrim, data, data, n_epochs=20, batch_size=64, gpu_list="0,1,2", update_rule='adam',
								learning_rate=0.0025, print_every=20, save_every=1, image_path='./image/',
								pretrained_model=None, model_path='model/lstm/', train_new=None,
								test_model='model/lstm/model-42',
								print_bleu=False, log_path='log/', num_rollout=10)

	solver.train_adversarial()
Beispiel #27
0
def main():

    data = load_coco_data(data_path='./data', split='val', if_train=True)
    word_to_idx = data['word_to_idx']
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)
    solver = CaptioningSolver(model,
                              data,
                              data,
                              n_epochs=10,
                              batch_size=100,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=5,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm_hard/',
                              test_model='model/lstm_hard/model-40',
                              print_bleu=True,
                              log_path='log/')

    test_data = load_coco_data(data_path='./data',
                               split='test',
                               if_train=False)
    solver.test(test_data, split='test')

    if __name__ == "__main__":
        main()
Beispiel #28
0
def main():

    test1_data = {}
    test1_data['features'] = hickle.load('./data/test1/test1.features.hkl')
    test1_data['file_names'] = load_pickle('./data/test1/test1.file.names.pkl')
    print "Fnished loading..."
    with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f:
        word_to_idx = pickle.load(f)
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=21,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(
        model,
        test1_data,
        n_epochs=100,
        batch_size=50,
        update_rule='adam',
        learning_rate=0.001,
        print_every=100,
        save_every=5,
        image_path='./image/',
        test_model='./train_batch/model0.001/model.ckpt-30',
        print_bleu=True,
        log_path='train_batch/log_test/')

    solver.test(test1_data,
                split='test1',
                attention_visualization=False,
                save_sampled_captions=True)
Beispiel #29
0
                         prev2out=True,
                         ctx2out=True,
                         alpha_c=1.0,
                         selector=True,
                         dropout=True)

# In[4]:

solver = CaptioningSolver(model,
                          data,
                          data,
                          n_epochs=15,
                          batch_size=128,
                          update_rule='adam',
                          learning_rate=0.0025,
                          print_every=2000,
                          save_every=1,
                          image_path='./image/val2014_resized',
                          pretrained_model=None,
                          model_path='./model/lstm',
                          test_model='./model/lstm3/model-18',
                          print_bleu=False,
                          log_path='./log/')

# In[7]:

solver.test(data, split='val')

# In[8]:

test = load_coco_data(data_path='./data', split='test')
# In[3]:

model = CaptionGenerator('h', word_to_idx, dim_feature=[196, 512], dim_embed=512,
                                   dim_hidden=1024, n_time_step=16, prev2out=True, 
                                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)


# In[4]:
'''
solver = CaptioningSolver(model, data, data, n_epochs=15, batch_size=128, update_rule='adam',
                                      learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized',
                                pretrained_model=None, model_path='./model/lstm', test_model='./model/lstm/model-10',
                                 print_bleu=False, log_path='./log/')
'''
solver = CaptioningSolver(model, data, data, n_epochs=10, batch_size=50, update_rule='adam',
                                          learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/',
                                    pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10',
                                     print_bleu=True, log_path='log/')

# In[7]:

#solver.test(data, split='val')


# In[8]:

test = load_coco_data(data_path='./data', split='test')


# In[13]:

#tf.get_variable_scope().reuse_variables()
def main():
    # train_data = load_data(current_path + 'data_set/', 'test')
    # length = len(train_data['video_ids'])
    # train_data['features'] = train_data['features'][:int(0.7 * length)]
    # train_data['labels'] = train_data['labels'][:int(0.7 * length)]
    # train_data['video_ids'] = train_data['video_ids'][:int(0.7 * length)]
    # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)]
    with open('train_data_vgg.pkl', 'rb') as handle:
        train_data = pickle.load(handle)
    # length = len(train_data['new_filename'])
    train_data['features'] = train_data['features']
    train_data['labels'] = train_data['labels']
    train_data['video_ids'] = train_data['new_filename']
    # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)]
    if train_for_test == 1:
        with open('val_data_vgg.pkl', 'rb') as handle:
            val_data = pickle.load(handle)
        # length = len(train_data['new_filename'])
        train_data['features'] = np.concatenate(
            (train_data['features'], val_data['features']), axis=0)
        train_data['labels'] = np.concatenate(
            (train_data['labels'], val_data['labels']), axis=0)
        train_data['video_ids'] = np.concatenate(
            (train_data['new_filename'], val_data['new_filename']), axis=0)
    # train_data = {}

    data = {'train_data': train_data}
    # label_to_idx = load_pickle(current_path + 'data_set/label_to_idx.pkl')
    label_to_idx = load_pickle('labels_to_idx.pkl')
    num_images_per_video = 17

    model = CaptionGenerator(
        label_to_idx=label_to_idx,
        # dim_feature=[49, 1280],
        dim_feature=[196, 512],
        dim_hidden=1024,
        n_time_step=num_images_per_video,
        ctx2out=True,
        alpha_c=1.0,
        selector=True,
        dropout=False)

    solver = CaptioningSolver(model,
                              data,
                              n_epochs=300,
                              batch_size=15,
                              update_rule='adam',
                              learning_rate=0.0006,
                              print_every=3,
                              save_every=10,
                              pretrained_model=None,
                              model_path=current_path + 'model/lstm/',
                              test_model=current_path + 'model/lstm/model-310',
                              log_path=current_path + 'log/',
                              data_path=current_path + '/data_set/',
                              test_result_save_path=current_path +
                              'data_set/test/model_test_result/',
                              models_val_disp=current_path +
                              'model/lstm/models_accuracy_val.txt')

    if train == 1:
        solver.train()
    if validate == 1:
        solver.all_model_val()
    if test == 1:
        solver.test()

    if train_for_test == 1:
        solver.train()