def main(): prep = PreProcessor(batch_size=100, max_length=15, word_count_threshold=1, cnn_model_path='data/imagenet-vgg-verydeep-19.mat') prep.run('test') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) data = load_data(data_path='./data', split='test') model = CaptionGenerator(word_to_idx, feature_dim=[196, 512], embed_dim=512, hidden_dim=1024, len_sent=16, lamda=1.0) solver = CaptioningSolver(model, data, None, n_epochs=20, batch_size=128, learning_rate=0.001, print_every=1,\ save_every=1, image_path='./image/test_resized', model_path='./model/lstm', test_model='./model/lstm/model-5') solver.test(data, get_bleu=True) caption_file = 'data/annotations/captions_test.json' with open(caption_file) as f: groundtruth = json.load(f) with open('groundtruth.txt', 'w') as f: for i, ann in enumerate(groundtruth['annotations']): f.write(ann['file_name'] + ' ' + ann['caption'] + '\n') if (i + 1) % 5 == 0: f.write('\n')
def main(): data = load_coco_data(data_path='./data', split='val') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='model/lstmval/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') #solver.test(data, split='val') solver.test(data, split='test')
def main(): # check if there are new images in pred_folder if not os.path.exists(pred_folder): os.makedirs(pred_folder) new_images = [] for img in os.listdir(pred_folder): if img[0] == '.': continue new_images.append(img) if new_images: shutil.rmtree(resized_folder) os.makedirs(resized_folder) for img in new_images: with open(os.path.join(pred_folder, img), 'r+b') as f: with Image.open(f) as image: image = resize_image(image) image.save(os.path.join(resized_folder, img)) shutil.rmtree(pred_folder) os.makedirs(pred_folder) prep = PreProcessor(batch_size=100, max_length=15, word_count_threshold=1, cnn_model_path='data/imagenet-vgg-verydeep-19.mat') prep.run('pred') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) data = load_data(data_path='./data', split='pred') model = CaptionGenerator(word_to_idx, feature_dim=[196, 512], embed_dim=512, hidden_dim=1024, len_sent=16, lamda=1.0) solver = CaptioningSolver(model, data, None, n_epochs=5, batch_size=128, learning_rate=0.001, print_every=1,\ save_every=1, image_path='./image/pred_resized', model_path='./model/lstm', test_model='./model/lstm/model-5') solver.test(data) correct_cap = {'cat': 0, 'dog': 0} with open('output.txt') as f: for line in f.readlines(): line = line.split('\n')[0] name = line.split('#')[0] cap = line.split('#')[1] animal = 'cat' if name[0] == 'C' else 'dog' if animal in cap: correct_cap[animal] += 1 print('Accuracy on cat images: ' + str(correct_cap['cat'] / float(100))) print('Accuracy on dog images: ' + str(correct_cap['dog'] / float(100)))
def main(): word_to_idx = load_word_to_idx(data_path='./nusdata', split='train') val_data = load_coco_data(data_path='./nusdata', split='val') # test_data = load_coco_data(data_path='./nusdata', split='test') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=64, dim_hidden=1024, n_time_step=11, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) data_path = './nusdata' solver = CaptioningSolver(model, data_path, n_epochs=100, batch_size=1, update_rule='adam', learning_rate=0.0001, print_every=100, save_every=1, pretrained_model=None, model_path='model/lstm/', test_model=('model/lstm/%s' %modelname), print_bleu=True, log_path='log/', V=len(word_to_idx)) solver.test(val_data, split='val', filename=filename, attention_visualization=False, \ thres=float(thres))
def main(): # load train dataset with open("/nfs/private/cas/dataset_0_N/week/train_features.pkl", "rb") as f: train_features = pickle.load(f) with open("/nfs/private/cas/dataset_0_N/week/train_labels.pkl", "rb") as f: train_labels = pickle.load(f) with open("/nfs/private/cas/dataset_0_N/week/test_features.pkl", "rb") as f: test_features = pickle.load(f) with open("/nfs/private/cas/dataset_0_N/week/test_labels.pkl", "rb") as f: test_labels = pickle.load(f) word_to_idx = {"<START>": -3, "<END>": -2, "<NULL>": -1} dim_feature = test_features.shape[1] n_time_step = test_labels.shape[1] - 1 print "n_time_step:%d" % n_time_step model = CaptionGenerator(word_to_idx, V=int(np.max(train_features) + 1), dim_feature=dim_feature, dim_embed=128, dim_hidden=128, n_time_step=n_time_step, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) data = {"features": train_features, "labels": train_labels} val_data = {"features": test_features, "labels": test_labels} solver = CaptioningSolver(model, data, val_data, n_epochs=50000, batch_size=100, update_rule='adam', learning_rate=1e-4, print_every=100, save_every=10, image_path='./image/', pretrained_model=None, model_path='./model/0_N/cnn/week/', test_model='./model/0_N/cnn/week/model-255', print_bleu=True, log_path='./log/') solver.test(val_data, split='val', save_sampled_labels=True)
def main(): # load train dataset data = load_coco_data(data_path='./data', split='train') word_to_idx = data['word_to_idx'] # load val dataset to print out bleu scores every epoch val_data = load_coco_data(data_path='./data', split='val') model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=10, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model=test_model_path, print_bleu=True, log_path='log/') # solver.train() solver.test(val_data, split='val')
def main(): # load train dataset # data = load_coco_data(data_path='./our_data', split='train') # word_to_idx = data['word_to_idx'] # # load val dataset to print out bleu scores every epoch # test_data = load_coco_data(data_path='./our_data', split='test') #our train: data = load_coco_data( data_path='.\image_data_to_be_labeled\Object_feature\our_data', split='train') our_test = load_coco_data( data_path='.\image_data_to_be_labeled\Object_feature\our_data', split='train') word_to_idx = data['word_to_idx'] model = CaptionGenerator(word_to_idx, dim_feature=[216, 512], dim_embed=512, dim_hidden=1024, n_time_step=26, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=False) solver = CaptioningSolver(model, data, our_test, n_epochs=5000, batch_size=64, update_rule='adam', learning_rate=1e-4, print_every=1000, save_every=100, image_path='./image/', pretrained_model=None, model_path='model/our_train0414/', test_model='model/our_train0414/model-2000', print_bleu=False, log_path='log/') # solver.train() solver.test(our_test)
def test(toy=None): if toy == True: toy = "toy_" else: toy ="" data_path = os.path.join('./data', 'train') with open(os.path.join(data_path, '%sword_to_idx.pkl' % toy), 'rb') as f: word_to_idx = pickle.load(f) val_data = load_coco_data(data_path='./data', split='val', toy=toy) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=512, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, None, val_data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/HighwayLSTM01_lstm/', test_model='model/HighwayLSTM01_lstm/model-20', print_bleu=True, log_path='log/') solver.test(val_data)
def main(): # load dataset and vocab data = load_coco_data(data_path='./data', split=FLAGS.split) with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator( word_to_idx, dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth], dim_embed=FLAGS.embed_dim, dim_hidden=FLAGS.lstm_hidden_size, n_time_step=FLAGS.time_steps, prev2out=FLAGS.prev2out, ctx2out=FLAGS.ctx2out, alpha_c=1.0, enable_selector=FLAGS.enable_selector, dropout=FLAGS.dropout) solver = CaptioningSolver(model, batch_size=FLAGS.batch_size, test_checkpoint=FLAGS.test_checkpoint) solver.test(data, beam_size=3, attention_visualization=FLAGS.att_vis)
def main(): test1_data = {} test1_data['features'] = hickle.load('./data/test1/test1.features.hkl') test1_data['file_names'] = load_pickle('./data/test1/test1.file.names.pkl') print "Fnished loading..." with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=21, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver( model, test1_data, n_epochs=100, batch_size=50, update_rule='adam', learning_rate=0.001, print_every=100, save_every=5, image_path='./image/', test_model='./train_batch/model0.001/model.ckpt-30', print_bleu=True, log_path='train_batch/log_test/') solver.test(test1_data, split='test1', attention_visualization=False, save_sampled_captions=True)
def main(): data = load_coco_data(data_path='./data', split='val', if_train=True) word_to_idx = data['word_to_idx'] model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=10, batch_size=100, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/', pretrained_model=None, model_path='model/lstm_hard/', test_model='model/lstm_hard/model-40', print_bleu=True, log_path='log/') test_data = load_coco_data(data_path='./data', split='test', if_train=False) solver.test(test_data, split='test') if __name__ == "__main__": main()
def main(): # check if there are new images in pred_folder if not os.path.exists(pred_folder): os.makedirs(pred_folder) new_images = [] for img in os.listdir(pred_folder): if img[0] == '.': continue new_images.append(img) if new_images: shutil.rmtree(resized_folder) os.makedirs(resized_folder) for img in new_images: with open(os.path.join(pred_folder, img), 'r+b') as f: with Image.open(f) as image: image = resize_image(image) image.save(os.path.join(resized_folder, img)) shutil.rmtree(pred_folder) os.makedirs(pred_folder) prep = PreProcessor(batch_size=5, max_length=15, word_count_threshold=1, cnn_model_path='data/imagenet-vgg-verydeep-19.mat') prep.run('pred') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) data = load_data(data_path='./data', split='pred') model = CaptionGenerator(word_to_idx, feature_dim=[196, 512], embed_dim=512, hidden_dim=1024, len_sent=16, lamda=1.0) solver = CaptioningSolver(model, data, None, n_epochs=5, batch_size=5, learning_rate=0.001, print_every=1,\ save_every=1, image_path='./image/pred_resized', model_path='./model/lstm', test_model='./model/lstm/model-5') return solver.test( data, display=True) # display True to print results in terminal
n_epochs=15, batch_size=128, update_rule='adam', learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='./model/lstm', test_model='./model/lstm3/model-18', print_bleu=False, log_path='./log/') # In[7]: solver.test(data, split='val') # In[8]: test = load_coco_data(data_path='./data', split='test') # In[13]: tf.get_variable_scope().reuse_variables() solver.test(test, split='test') # In[14]: evaluate(data_path='./data', split='val') # In[15]:
def main(params): loader = LoadData(input_json=params.input_labels, input_h5=params.input_caps, feature_path=params.input_feats, batch_img=params.batch_img, seq_per_img=params.seq_per_img, train_only=params.train_only) if params.mode == "EVAL": model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.output_result_for_eval() elif params.mode == "TEST": model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.test() elif params.mode == "TRAIN": with tf.Graph().as_default(): model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.train() with tf.Graph().as_default(): model = CaptionGenerator(loader.word_to_idx, num_features=params.num_objects, dim_feature=params.dim_features, dim_embed=params.dim_word_emb, dim_hidden=params.rnn_hid_size, dim_attention=params.att_hid_size, n_time_step=loader.seq_length - 1) solver = CaptioningSolver(data_loader=loader, model=model, ngram_file=params.input_ngram, n_epochs=params.epoch, update_rule=params.optimizer, learning_rate=params.lr, print_every=params.print_every, start_epoch=params.start_epoch, log_path=params.log_path, model_path=params.model_path, pretrained_model=params.pretrained, test_model=params.test_model) solver.train_reinforce()
import cPickle as pickle import tensorflow as tf from core.solver import CaptioningSolver from core.model import CaptionGenerator from core.utils import load_coco_data from core.bleu import evaluate %matplotlib inline plt.rcParams['figure.figsize'] = (8.0, 6.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' %load_ext autoreload %autoreload 2 data = load_coco_data(data_path='./data', split='val') with open('./data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=15, batch_size=128, update_rule='adam', learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='./model/lstm', test_model='./model/lstm/model-2', print_bleu=False, log_path='./log/') solver.test(data, split='val')
# test = load_coco_data(data_path='./data', split='test') test = load_my_data(data_path='./data', split='X_test') with open('./ch_data/train/word_to_idx.pkl', 'rb') as f: word_to_idx = pickle.load(f) model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512, dim_hidden=1024, n_time_step=16, prev2out=True, ctx2out=True, alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, None, None, n_epochs=15, batch_size=128, update_rule='adam', learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='./ch_model/lstm', test_model='./ch_model/lstm/model-20', print_bleu=False, log_path='./log/') solver.test(test, split='test')
solver = CaptioningSolver(model, data, data, n_epochs=10, batch_size=50, update_rule='adam', learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/', pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10', print_bleu=True, log_path='log/') # In[7]: #solver.test(data, split='val') # In[8]: test = load_coco_data(data_path='./data', split='test') # In[13]: #tf.get_variable_scope().reuse_variables() solver.test('hard', test, split='test') # In[14]: #evaluate(data_path='./data', split='val') # In[15]: evaluate(data_path='./data', split='test')
def main(): # train_data = load_data(current_path + 'data_set/', 'test') # length = len(train_data['video_ids']) # train_data['features'] = train_data['features'][:int(0.7 * length)] # train_data['labels'] = train_data['labels'][:int(0.7 * length)] # train_data['video_ids'] = train_data['video_ids'][:int(0.7 * length)] # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)] with open('train_data_vgg.pkl', 'rb') as handle: train_data = pickle.load(handle) # length = len(train_data['new_filename']) train_data['features'] = train_data['features'] train_data['labels'] = train_data['labels'] train_data['video_ids'] = train_data['new_filename'] # train_data['video_filenames'] = train_data['video_filenames'][:int(0.7 * length)] if train_for_test == 1: with open('val_data_vgg.pkl', 'rb') as handle: val_data = pickle.load(handle) # length = len(train_data['new_filename']) train_data['features'] = np.concatenate( (train_data['features'], val_data['features']), axis=0) train_data['labels'] = np.concatenate( (train_data['labels'], val_data['labels']), axis=0) train_data['video_ids'] = np.concatenate( (train_data['new_filename'], val_data['new_filename']), axis=0) # train_data = {} data = {'train_data': train_data} # label_to_idx = load_pickle(current_path + 'data_set/label_to_idx.pkl') label_to_idx = load_pickle('labels_to_idx.pkl') num_images_per_video = 17 model = CaptionGenerator( label_to_idx=label_to_idx, # dim_feature=[49, 1280], dim_feature=[196, 512], dim_hidden=1024, n_time_step=num_images_per_video, ctx2out=True, alpha_c=1.0, selector=True, dropout=False) solver = CaptioningSolver(model, data, n_epochs=300, batch_size=15, update_rule='adam', learning_rate=0.0006, print_every=3, save_every=10, pretrained_model=None, model_path=current_path + 'model/lstm/', test_model=current_path + 'model/lstm/model-310', log_path=current_path + 'log/', data_path=current_path + '/data_set/', test_result_save_path=current_path + 'data_set/test/model_test_result/', models_val_disp=current_path + 'model/lstm/models_accuracy_val.txt') if train == 1: solver.train() if validate == 1: solver.all_model_val() if test == 1: solver.test() if train_for_test == 1: solver.train()
alpha_c=1.0, selector=True, dropout=True) solver = CaptioningSolver(model, data, data, n_epochs=20, batch_size=128, update_rule='adam', learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized', pretrained_model=None, model_path='./model/preview_model/', test_model='./model/preview_model/model-20', print_bleu=False, log_path='./log/') #solver.test(data, split='val') #test = load_coco_data(data_path='./data/coco_data', split='test') #tf.get_variable_scope().reuse_variables() solver.test(data, split='test') #evaluate(data_path='./data/coco_data', split='val') evaluate(data_path='./data/coco_data', split='test') #solver.test(data, split='test') # #evaluate(data_path='./data', split='test')