def main(use_inception): # load train dataset print "Loading COCO training data..." data = load_coco_data(data_path='./data', split='train') print "Done!" word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') if use_inception: L = 64 D = 2048 else: L = 196 D = 512 from core.solver import CaptioningSolver from core.model import CaptionGenerator model = CaptionGenerator(word_to_idx, dim_feature=[L, D], dim_embed=512, dim_hidden=1800, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=5.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=100, batch_size=256, update_rule='adam', learning_rate=0.0005, print_every=1000, summary_every=10000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') solver.train()
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out scores every epoch val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator( word_to_idx, dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth], dim_embed=FLAGS.embed_dim, dim_hidden=FLAGS.lstm_hidden_size, n_time_step=FLAGS.time_steps, prev2out=FLAGS.prev2out, ctx2out=FLAGS.ctx2out, alpha_c=1.0, enable_selector=FLAGS.enable_selector, dropout=FLAGS.dropout) solver = CaptioningSolver(model, n_epochs=FLAGS.num_epochs, batch_size=FLAGS.batch_size, update_rule=FLAGS.optimizer, learning_rate=FLAGS.learning_rate, metric=FLAGS.metric, print_every=FLAGS.snapshot_steps, eval_every=FLAGS.eval_steps, pretrained_model=FLAGS.pretrained_model, start_from=FLAGS.start_from, checkpoint_dir=FLAGS.checkpoint_dir, log_path=FLAGS.log_path) solver.train(data, val_data, beam_size=FLAGS.beam_size)
def main(): word_to_idx = load_word_to_idx(data_path='./nusdata', split='train') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=64, dim_hidden=1024, n_time_step=11, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) data_path = './nusdata' solver = CaptioningSolver(model, data_path, n_epochs=50, batch_size=128, update_rule='adam', learning_rate=0.0001, print_every=30, save_every=1, pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-1', print_bleu=True, log_path='log/', V=len(word_to_idx), n_time_step=11) solver.train()
def main(): train_data = load_data(current_path + 'data_set/', 'test') length = len(train_data['video_ids']) train_data['features'] = train_data['features'][:int(0.7 * length)] train_data['labels'] = train_data['labels'][:int(0.7 * length)] train_data['video_ids'] = train_data['video_ids'][:int(0.7 * length)] train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)] # train_data = {} data = {'train_data': train_data} label_to_idx = load_pickle(current_path + 'data_set/label_to_idx.pkl') num_images_per_video = 17 model = CaptionGenerator(label_to_idx=label_to_idx, dim_feature=[196, 512], dim_hidden=1024, n_time_step=num_images_per_video, ctx2out=True, alpha_c=1.0, selector=True, dropout=False) solver = CaptioningSolver(model, data, n_epochs=500, batch_size=15, update_rule='adam', learning_rate=0.0006, print_every=3, save_every=10, pretrained_model=None, model_path=current_path + 'model/lstm/', test_model=current_path + 'model/lstm/model-430', log_path=current_path + 'log/', data_path=current_path + '/data_set/', test_result_save_path=current_path + 'data_set/test/model_test_result/', models_val_disp=current_path + 'model/models_accuracy_val.txt') solver.train() solver.all_model_val()
def main(): # load train dataset data = load_coco_data(data_path='./data/coco_data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data/coco_data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=10, image_path='./image/', pretrained_model=None, model_path='model/preview_model', test_model='model/lstm/model-10', print_bleu=True, log_path='log/preview_model_log/') solver.train()
def main(): val_data = load_coco_data(data_path='./data', split='val') with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=21, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver( model, val_data, n_epochs=100, batch_size=128, update_rule='adam', learning_rate=0.0012, print_every=100, save_every=5, image_path='./image/', pretrained_model='train_batch/model0.001/model.ckpt-30', model_path='train_batch/model0.002/', test_model=None, print_bleu=True, log_path='train_batch/log/') solver.train()
def main(): # load train dataset with open("/nfs/private/cas/dataset_0_N/week/train_features.pkl", "rb") as f: train_features = pickle.load(f) with open("/nfs/private/cas/dataset_0_N/week/train_labels.pkl", "rb") as f: train_labels = pickle.load(f) with open("/nfs/private/cas/dataset_0_N/week/test_features.pkl", "rb") as f: test_features = pickle.load(f) with open("/nfs/private/cas/dataset_0_N/week/test_labels.pkl", "rb") as f: test_labels = pickle.load(f) word_to_idx = {"<START>": -3, "<END>": -2, "<NULL>": -1} dim_feature = train_features.shape[1] n_time_step = train_labels.shape[1] - 1 print "n_time_step:%d" % n_time_step model = CaptionGenerator(word_to_idx, V=int(np.max(train_features) + 1), dim_feature=dim_feature, dim_embed=128, dim_hidden=128, n_time_step=n_time_step, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) data = {"features": train_features, "labels": train_labels} val_data = {"features": test_features, "labels": test_labels} solver = CaptioningSolver( model, data, val_data, n_epochs=50000, batch_size=100, update_rule='adam', learning_rate=1e-4, print_every=100, save_every=10, image_path='./image/', pretrained_model=None, model_path='./model/0_N/cnn/week/', test_model= '/ais/gobi5/linghuan/basic-attention/model/lstm/lstm/model-19', print_bleu=True, log_path='./log/') solver.train()
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) discrim = Discriminator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=0.0, selector=True, dropout=True, learning_rate=0.01) solver = CaptioningSolver(model, discrim, data, data, n_epochs=20, batch_size=128, gpu_list="1,2,3", update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', train_new='./model/lstm/model-20', test_model='model/lstm/model-21', print_bleu=True, log_path='log/') solver.train()
def main(): # preprocessing prep = PreProcessor(batch_size=100, max_length=15, word_count_threshold=1, cnn_model_path='data/imagenet-vgg-verydeep-19.mat') for split in ['train', 'dev']: prep.run(split, cat_img_num=0) # define the number of additional training cat images in the second argument # load train dataset train_data = load_data(data_path='./data', split='train') dev_data = load_data(data_path='./data', split='dev') word_to_idx = train_data['word_to_idx'] model = CaptionGenerator(word_to_idx, feature_dim=[196, 512], embed_dim=512, hidden_dim=1024, len_sent=16, lamda=1.0) solver = CaptioningSolver(model, train_data, dev_data, n_epochs=5, batch_size=128, learning_rate=0.001, print_every=1000, save_every=5, \ model_path='model/lstm/', test_model='model/lstm/model-5') solver.train()
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') solver.train()
def main(): data = h5py.File(dataset_path, 'r') train_data = data.get('train') val_data = data.get('val') word_to_index = load_pickle(word_to_index_path) model = CaptionGenerator(word_to_index, alpha_c=1.0) solver = CaptioningSolver(model, train_data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') solver.train()
def main(): # load train dataset print '@@@@@@@@@@@@@' data = load_data(data_path='./data', split='train') label_to_idx = data['label_to_idx'] print '@@@@@' # load val dataset to print out bleu scores every epoch #val_data = load_data(data_path='./data', split='val') val_data = [] print label_to_idx print data['labels'] model = CaptionGenerator(label_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=30, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=1, batch_size=1, update_rule='adam', learning_rate=0.001, print_every=3, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') print 'model + solver' solver.train()
def main(): # load train dataset print "start loading data" data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') print "data loaded" # Elapse time: 15.95 model = CaptionGenerator( word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, # n_time_step is max length + 1 ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') # change model path accordingly solver.train()
def main(params): batch_size = params['batch_size'] n_epochs = params['epoch'] n_time_step = params['n_time_step'] learning_rate = params['lr'] model_path = params['model_path'] log_path = params['log_path'] data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=256, dim_hidden=1024, n_time_step=n_time_step, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=n_epochs, batch_size=batch_size, update_rule='adam', learning_rate=learning_rate, print_every=3000, save_every=1, image_path='./image/', pretrained_model=None, model_path=model_path, test_model='./model/lstm/model-10', print_bleu=True, log_path=log_path) solver.train()
def main(): # load train dataset data = load_coco_data(data_path='./new_data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./new_data', split='val') model = CaptionGenerator(word_to_idx, dim_att=[4, 512], dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) idx_to_word = {v: k for k, v in word_to_idx.iteritems()} solver = CaptioningSolver(model, data, val_data, idx_to_word, n_epochs=15, batch_size=64, update_rule='adam', learning_rate=0.001, print_every=50, save_every=5, image_path='./image/', pretrained_model=None, model_path='model/lstm2/', test_model='model/lstm2/model-15', print_bleu=True, log_path='log/') solver.train()
def main(): args = parser.parse_args() # load train dataset train_data = CocoCaptionDataset( caption_file='./data/annotations/captions_train2017.json', split='train') val_data = CocoCaptionDataset( caption_file='./data/annotations/captions_val2017.json', split='val') word_to_idx = train_data.get_vocab_dict() # load val dataset to print out scores every epoch model = CaptionGenerator( feature_dim=[args.image_feature_size, args.image_feature_depth], embed_dim=args.embed_dim, hidden_dim=args.lstm_hidden_size, prev2out=args.prev2out, len_vocab=len(word_to_idx), ctx2out=args.ctx2out, enable_selector=args.enable_selector, dropout=args.dropout).to(device=args.device) solver = CaptioningSolver(model, word_to_idx, train_data, val_data, n_time_steps=args.time_steps, batch_size=args.batch_size, beam_size=args.beam_size, optimizer=args.optimizer, learning_rate=args.learning_rate, metric=args.metric, snapshot_steps=args.snapshot_steps, eval_every=args.eval_steps, checkpoint=args.checkpoint, checkpoint_dir=args.checkpoint_dir, log_path=args.log_path, device=args.device) solver.train(num_epochs=args.num_epochs)
from core.utils import load_coco_data import numpy as np import pickle data = pickle.load(open('data.txt','r')) word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = pickle.load(open('val_data.txt','r')) from core.model import CaptionGenerator from core.solver import CaptioningSolver model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') solver.train()
def main(params): loader = LoadData(input_json=params.input_labels, input_h5=params.input_caps, feature_path=params.input_feats, batch_img=params.batch_img, seq_per_img=params.seq_per_img, train_only=params.train_only) if params.mode == "EVAL": model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.output_result_for_eval() elif params.mode == "TEST": model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.test() elif params.mode == "TRAIN": with tf.Graph().as_default(): model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.train() with tf.Graph().as_default(): model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.train_reinforce()
def main(): # train_data = load_data(current_path + 'data_set/', 'test') # length = len(train_data['video_ids']) # train_data['features'] = train_data['features'][:int(0.7 * length)] # train_data['labels'] = train_data['labels'][:int(0.7 * length)] # train_data['video_ids'] = train_data['video_ids'][:int(0.7 * length)] # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)] with open('train_data_vgg.pkl', 'rb') as handle: train_data = pickle.load(handle) # length = len(train_data['new_filename']) train_data['features'] = train_data['features'] train_data['labels'] = train_data['labels'] train_data['video_ids'] = train_data['new_filename'] # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)] if train_for_test == 1: with open('val_data_vgg.pkl', 'rb') as handle: val_data = pickle.load(handle) # length = len(train_data['new_filename']) train_data['features'] = np.concatenate( (train_data['features'], val_data['features']), axis=0) train_data['labels'] = np.concatenate( (train_data['labels'], val_data['labels']), axis=0) train_data['video_ids'] = np.concatenate( (train_data['new_filename'], val_data['new_filename']), axis=0) # train_data = {} data = {'train_data': train_data} # label_to_idx = load_pickle(current_path + 'data_set/label_to_idx.pkl') label_to_idx = load_pickle('labels_to_idx.pkl') num_images_per_video = 17 model = CaptionGenerator( label_to_idx=label_to_idx, # dim_feature=[49, 1280], dim_feature=[196, 512], dim_hidden=1024, n_time_step=num_images_per_video, ctx2out=True, alpha_c=1.0, selector=True, dropout=False) solver = CaptioningSolver(model, data, n_epochs=300, batch_size=15, update_rule='adam', learning_rate=0.0006, print_every=3, save_every=10, pretrained_model=None, model_path=current_path + 'model/lstm/', test_model=current_path + 'model/lstm/model-310', log_path=current_path + 'log/', data_path=current_path + '/data_set/', test_result_save_path=current_path + 'data_set/test/model_test_result/', models_val_disp=current_path + 'model/lstm/models_accuracy_val.txt') if train == 1: solver.train() if validate == 1: solver.all_model_val() if test == 1: solver.test() if train_for_test == 1: solver.train()