def main(params): # main training and validation loop goes here # This code should be independent of which model we use batch_size = params["batch_size"] max_epochs = params["max_epochs"] # fetch the data provider object dp = DataProvider(params) # Get the solver object solver = Solver(params["solver"]) ## Add the model intiailization code here modelObj = getModelObj(params) # Now let's build a gradient computation graph and rmsprop update mechanism # grads = tensor.grad(cost, wrt=model.values()) # lr = tensor.scalar(name='lr',dtype=config.floatX) # f_grad_shared, f_update, zg, rg, ud = solver.build_solver_model(lr, model, grads, # inp_list, cost, params) num_frames_total = dp.getSplitSize("train") num_iters_one_epoch = num_frames_total / batch_size max_iters = max_epochs * num_iters_one_epoch for it in xrange(max_iters): batch = dp.getBatch(batch_size)
def main(params): # main training and validation loop goes here # This code should be independent of which model we use batch_size = params['batch_size'] max_epochs = params['max_epochs'] # fetch the data provider object dp = DataProvider(params) params['feat_size'] = dp.feat_size params['phone_vocab_size'] = dp.phone_vocab # Get the solver object, optional not needed for kerras # solver = Solver(params['solver']) ## Add the model intiailization code here modelObj = getModelObj(params) # Build the model Architecture f_train = modelObj.build_model(params) if params['saved_model'] !=None: cv = json.load(open(params['saved_model'],'r')) modelObj.model.load_weights(cv['weights_file']) print 'Conitnuing training from model %s'%(params['saved_model']) train_x, train_y, val_x, val_y = dp.get_data_array(params['model_type'], ['train', 'devel'], cntxt=params['context']) fname, best_val_loss = modelObj.train_model(train_x, train_y, val_x, val_y, params) checkpoint = {} checkpoint['params'] = params checkpoint['weights_file'] = fname.format(val_loss=best_val_loss) filename = 'model_%s_%s_%s_%.2f.json' % (params['dataset'], params['model_type'], params['out_file_append'], best_val_loss) filename = os.path.join(params['out_dir'],filename) print 'Saving to File %s'%(filename) json.dump(checkpoint, open(filename,'w')) ## Now let's build a gradient computation graph and rmsprop update mechanism ##grads = tensor.grad(cost, wrt=model.values()) ##lr = tensor.scalar(name='lr',dtype=config.floatX) ##f_grad_shared, f_update, zg, rg, ud = solver.build_solver_model(lr, model, grads, ## inp_list, cost, params) #num_frames_total = dp.getSplitSize('train') #num_iters_one_epoch = num_frames_total/ batch_size #max_iters = max_epochs * num_iters_one_epoch ## #for it in xrange(max_iters): # batch = dp.getBatch(batch_size) # cost = f_train(*batch) #cost = f_grad_shared(inp_list) #f_update(params['learning_rate']) #Save model periodically return modelObj
def testModel(modelidx): dp = DataProvider( config.batchsize, config.video_frames, config.video_feature_num, config.video_feature_dim, config.word_len_before, config.word_len_after, video_dir=config.video_data_dir, dataset_dir=config.text_data_dir, word2vec_dir=config.word2vec_model_dir, wordemb_dim=config.word_emb ) modelFile = os.path.join(config.modelsave_dir, str(modelidx)+'_pool5.npy') model = np.load(modelFile) net, train_fun, test_fun, val_fun, sh_lr = compile_func(model) # test test_sample = 0 total_acc = 0 test_data = dp.load_dataset(datatype='test') for batch in dp.iterator(test_data, shuffle=False): num_samples,\ videos, video_masks,\ befores, before_masks, afters, afters_masks,\ lables = \ dp.loadOneBatch(batch) prediction, acc = test_fun( videos, video_masks, befores, before_masks, afters, afters_masks, lables ) total_acc += acc*num_samples test_sample += num_samples # fp_test.write('epoch: '+str(epoch)+'err: '+str(total_err)+'\n') total_acc /= test_sample print 'acc: '+str(total_acc)
def main(argv): print("Testing the model ...") config = Config() config.beam_size = FLAGS.beam_size config.phase = 'test' if not os.path.exists(config.test_result_dir): os.mkdir(config.test_result_dir) print("Building the vocabulary...") vocabulary = Vocabulary(config.vocabulary_size) vocabulary.load(config.vocabulary_file) print("Vocabulary built.") print("Number of words = %d" % (vocabulary.size)) test_data = DataProvider(config) test_gt_coco = test_data.returncoco() model = ShowAttendTell(config) model.build() with tf.Session() as sess: model.setup_graph_from_checkpoint(sess, config.caption_checkpoint_dir) tf.get_default_graph().finalize() captiongen = CaptionGenerator(model, vocabulary, config.beam_size, config.max_caption_length, config.batch_size) # Generate the captions for the images results = [] idx = 0 for k in tqdm(list(range(test_data.num_batches)), desc='batch'): batch, images = test_data.next_batch_and_images() caption_data = captiongen.beam_search(sess, images, vocabulary) fake_cnt = 0 if k<test_data.num_batches-1 \ else test_data.fake_count for l in range(test_data.batch_size - fake_cnt): word_idxs = caption_data[l][0].sentence score = caption_data[l][0].score caption = vocabulary.get_sentence(word_idxs) results.append({ 'image_id': test_data.image_ids[idx], 'caption': caption }) idx += 1 # Save the result in an image file, if requested if config.save_test_result_as_image: image_file = batch[l] image_name = image_file.split(os.sep)[-1] image_name = os.path.splitext(image_name)[0] img = plt.imread(image_file) plt.switch_backend('agg') plt.imshow(img) plt.axis('off') plt.title(caption) plt.savefig( os.path.join(config.test_result_dir, image_name + '_result.png')) fp = open(config.test_result_file, 'wb') json.dump(results, fp) fp.close() # Evaluate these captions test_result_coco = test_gt_coco.loadRes(config.test_result_file) scorer = COCOEvalCap(test_gt_coco, test_result_coco) scorer.evaluate() print("Evaluation complete.")
def main(params): # main training and validation loop goes here # This code should be independent of which model we use batch_size = params['batch_size'] max_epochs = params['max_epochs'] # fetch the data provider object dp = DataProvider(params) params['feat_size'] = dp.feat_size params['phone_vocab_size'] = dp.phone_vocab # Get the solver object, optional not needed for kerras # solver = Solver(params['solver']) ## Add the model intiailization code here modelObj = getModelObj(params) # Build the model Architecture f_train = modelObj.build_model(params) if params['saved_model'] != None: cv = json.load(open(params['saved_model'], 'r')) modelObj.model.load_weights(cv['weights_file']) print 'Conitnuing training from model %s' % (params['saved_model']) train_x, train_y, val_x, val_y = dp.get_data_array(params['model_type'], ['train', 'devel'], cntxt=params['context']) fname, best_val_loss = modelObj.train_model(train_x, train_y, val_x, val_y, params) checkpoint = {} checkpoint['params'] = params checkpoint['weights_file'] = fname.format(val_loss=best_val_loss) filename = 'model_%s_%s_%s_%.2f.json' % ( params['dataset'], params['model_type'], params['out_file_append'], best_val_loss) filename = os.path.join(params['out_dir'], filename) print 'Saving to File %s' % (filename) json.dump(checkpoint, open(filename, 'w')) ## Now let's build a gradient computation graph and rmsprop update mechanism ##grads = tensor.grad(cost, wrt=model.values()) ##lr = tensor.scalar(name='lr',dtype=config.floatX) ##f_grad_shared, f_update, zg, rg, ud = solver.build_solver_model(lr, model, grads, ## inp_list, cost, params) #num_frames_total = dp.getSplitSize('train') #num_iters_one_epoch = num_frames_total/ batch_size #max_iters = max_epochs * num_iters_one_epoch ## #for it in xrange(max_iters): # batch = dp.getBatch(batch_size) # cost = f_train(*batch) #cost = f_grad_shared(inp_list) #f_update(params['learning_rate']) #Save model periodically return modelObj
def main(params): # check if having a model_list if params['model_list'] != None: with open(params['model_list']) as f: model_file_list = f.readlines() else: model_file_list = [(params['saved_model'])] # check dp loaded or not to load it once dp_loaded = False for m in model_file_list: m = re.sub("\n", "", m) cv = json.load(open(m, 'r')) cv_params = cv['params'] if params['dataset'] != None: cv_params['dataset'] = params['dataset'] cv_params['dataset_desc'] = params['dataset_desc'] if not dp_loaded: dp_loaded = True dp = DataProvider(cv_params) cv_params['feat_size'] = dp.feat_size cv_params['phone_vocab_size'] = dp.phone_vocab # Get the model object and build the model Architecture modelObj = getModelObj(cv_params) f_train = modelObj.build_model(cv_params) modelObj.model.load_weights(cv['weights_file']) #inpt_x, inpt_y = dp.get_data_array(cv_params['model_type'],[params['split']],cntxt = cv_params['context']) #inpt_x, inpt_y = dp.get_data_array(cv_params['model_type'],[params['split']]) splt = params['split'] if splt == 'eval': dataDesc = json.load( open( os.path.join('data', cv_params['dataset'], cv_params['dataDesc']), 'r')) ph2bin = dataDesc['ph2bin'] phoneList = [''] * len(ph2bin) for ph in ph2bin: phoneList[ph2bin[ph].split().index('1')] = ph in_dim = cv_params['in_dim'] in_file_list = dataDesc[splt + '_x'] out_file_list = dataDesc[splt + '_y'] for i in xrange(len(in_file_list)): test_x = np.fromfile(in_file_list[i], dtype=np.float32, sep=' ', count=-1) test_x.resize(len(test_x) / in_dim, in_dim) pred_y = modelObj.model.predict_classes(test_x, batch_size=16) out_file = os.path.basename(out_file_list[i]) with io.open(os.path.join('eval_out', out_file), 'w', encoding='utf-8') as f: out_labels = [phoneList[i] for i in pred_y] f.write('\n'.join(out_labels)) print out_file #predOut = modelObj.model.predict_classes(inpt_x, batch_size=100) predOut = modelObj.model.predict(inpt_x, batch_size=100)
from config.config import Config from utils.dataprovider import DataProvider from utils.metrics.pycocoevalcap.eval import COCOEvalCap print("Evaluating the model ...") config = Config() config.phase = 'eval' eval_data = DataProvider(config) eval_gt_coco = eval_data.returncoco() # Evaluate these captions eval_result_coco = eval_gt_coco.loadRes(config.eval_result_file) scorer = COCOEvalCap(eval_gt_coco, eval_result_coco) scorer.evaluate()
def main(params): # check if having a model_list if params['model_list'] != None: with open(params['model_list']) as f: model_file_list = f.readlines() else: model_file_list = [(params['saved_model'])] # check dp loaded or not to load it once dp_loaded = False for m in model_file_list: m = re.sub("\n","", m) cv = json.load(open(m,'r')) cv_params = cv['params'] if params['dataset'] != None: cv_params['dataset'] = params['dataset'] cv_params['dataset_desc'] = params['dataset_desc'] if not dp_loaded: dp_loaded = True dp = DataProvider(cv_params) cv_params['feat_size'] = dp.feat_size cv_params['phone_vocab_size'] = dp.phone_vocab # Get the model object and build the model Architecture if cv_params['model_type']!='DBN': modelObj = getModelObj(cv_params) f_train = modelObj.build_model(cv_params) modelObj.model.load_weights(cv['weights_file']) else: modelObj = cPickle.load(open(cv['weights_file'])) inpt_x, inpt_y = dp.get_data_array(cv_params['model_type'],[params['split']],cntxt = cv_params['context']) predOut = modelObj.model.predict_classes(inpt_x, batch_size=100) accuracy = 100.0*np.sum(predOut == inpt_y.nonzero()[1]) / predOut.shape[0] print('Accuracy of %s the %s set is %0.2f'%(params['saved_model'], params['split'],accuracy)) # Get the phone order ph2bin = dp.dataDesc['ph2bin'] phoneList = ['']*len(ph2bin) for ph in ph2bin: phoneList[ph2bin[ph].split().index('1')] = ph # plotting confusion matrix if params['plot_confmat'] != 0: cm = confusion_matrix(inpt_y.nonzero()[1], predOut) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.figure() plot_confusion_matrix(cm, phoneList) plt.show() if params['dump_lna_dir'] != None: spt = params['split'] phones_targ = [l.strip() for l in codecs.open(params['lna_ph_order'], encoding='utf-8')] assert(set(phones_targ) == set(phoneList)) shuffle_order = np.zeros(len(phones_targ),dtype=np.int32) for i,ph in enumerate(phones_targ): shuffle_order[i] = phoneList.index(ph) ## Now for evert utterance sample predict probabilities and dump lna files for i,inp_file in enumerate(dp.dataDesc[spt+'_x']): lna_file = os.path.join(params['dump_lna_dir'], os.path.basename(inp_file).split('.')[0]+'.lna') inpt_x,inp_y = dp.get_data_array(cv_params['model_type'],[params['split']],cntxt = cv_params['context'], shufdata=0, idx = i) probs = modelObj.model.predict(inpt_x, batch_size=100) #dump_lna(inp_y[:,shuffle_order].flatten(), lna_file, probs.shape[1]) dump_lna(probs[:,shuffle_order].flatten(), lna_file, probs.shape[1]) print lna_file
def main(): dp = DataProvider( config.batchsize, config.video_frames, config.video_feature_num, config.video_feature_dim, config.word_len_before, config.word_len_after, video_dir=config.video_data_dir, dataset_dir=config.text_data_dir, word2vec_dir=config.word2vec_model_dir, wordemb_dim=config.word_emb ) train_data = dp.load_dataset(datatype='train') val_data = dp.load_dataset(datatype='val') model = None if config.startidx > 0: modelFile = os.path.join(config.modelsave_dir, str(config.startidx-1)+'_pool5.npy') model = np.load(modelFile) print '----------compile-------------' net, train_fun, test_fun, val_fun, sh_lr = compile_func(model) print '-------compile end------------' train_file = os.path.join(config.performance_dir, 'train_pool5.txt') val_file = os.path.join(config.performance_dir, 'val_pool5.txt') fp_train = open(train_file, 'a+') fp_train.write('--------------\n') fp_train.write(time.strftime('%Y-%m-%d\t%H:%M:%S', time.localtime(time.time()))+'\n') fp_train.write(str(config.batchsize)+'\n') fp_train.write(str(config.num_epoches)+'\n') fp_train.write(str(config.weight_decay)+'\n') fp_train.write(str(config.lr)+'\n') fp_train.write('-------result-------\n') fp_train.close() fp_val = open(val_file, 'a+') fp_val.write('--------------\n') fp_val.write(time.strftime('%Y-%m-%d\t%H:%M:%S', time.localtime(time.time()))+'\n') fp_val.write(str(config.batchsize)+'\n') fp_val.write(str(config.num_epoches)+'\n') fp_val.write(str(config.weight_decay)+'\n') fp_val.write(str(config.lr)+'\n') fp_val.write('-------result-------\n') fp_val.close() for epoch in range(config.num_epoches): if epoch < config.startidx: continue print '-----------epoch '+str(epoch)+'-------------' # train train_sample = 0 total_err = 0 i = 0 for batch in dp.iterator(train_data, shuffle=True): num_samples,\ videos, video_masks,\ befores, before_masks, afters, afters_masks,\ lables = \ dp.loadOneBatch(batch) err = train_fun( videos, video_masks, befores, before_masks, afters, afters_masks, lables ) print 'train '+str(epoch)+':'+str(i)+':'+str(err) i += 1 total_err += err*num_samples train_sample += num_samples model_train = get_all_param_values(net['fill']) model_train_file = os.path.join( config.modelsave_dir, str(epoch)+'_pool5.npy' ) np.save(model_train_file, model_train) total_err /= train_sample fp_train = open(train_file, 'a+') fp_train.write('epoch: '+str(epoch)+'\t err: '+str(total_err)+'\n') fp_train.close() print 'train:\t epoch: '+str(epoch)+'\t err: '+str(total_err) # val val_sample = 0 total_err = 0 total_acc = 0 i = 0 for batch in dp.iterator(val_data, shuffle=False): num_samples,\ videos, video_masks,\ befores, before_masks, afters, afters_masks,\ lables = \ dp.loadOneBatch(batch) prediction, err, acc = val_fun( videos, video_masks, befores, before_masks, afters, afters_masks, lables ) print 'val '+str(epoch)+':'+str(i) i += 1 total_err += err*num_samples total_acc += acc*num_samples val_sample += num_samples total_err /= val_sample total_acc /= val_sample fp_val = open(val_file, 'a+') fp_val.write('epoch: '+str(epoch)+'\t err: '+str(total_err)+'\t acc: '+str(total_acc)+'\n') fp_val.close() print 'val:\t epoch: '+str(epoch)+'\t err: '+str(total_err)+'\t acc: '+str(total_acc) if (epoch+1) % config.lr_change == 0: sh_lr.set_value(sh_lr.get_value()/10)
from config.config import Config from utils.dataprovider import DataProvider from utils.metrics.pycocoevalcap.eval import COCOEvalCap print("Evaluating the model ...") config = Config() config.phase = 'test' test_data = DataProvider(config) test_gt_coco = test_data.returncoco() # Evaluate these captions test_result_coco = test_gt_coco.loadRes(config.test_result_file) scorer = COCOEvalCap(test_gt_coco, test_result_coco) scorer.evaluate()
def main(params): # check if having a model_list if params['model_list'] != None: with open(params['model_list']) as f: model_file_list = f.readlines() else: model_file_list = [(params['saved_model'])] # check dp loaded or not to load it once dp_loaded = False for m in model_file_list: m = re.sub("\n", "", m) cv = json.load(open(m, 'r')) cv_params = cv['params'] if params['dataset'] != None: cv_params['dataset'] = params['dataset'] cv_params['dataset_desc'] = params['dataset_desc'] if not dp_loaded: dp_loaded = True dp = DataProvider(cv_params) cv_params['feat_size'] = dp.feat_size cv_params['phone_vocab_size'] = dp.phone_vocab # Get the model object and build the model Architecture if cv_params['model_type'] != 'DBN': modelObj = getModelObj(cv_params) f_train = modelObj.build_model(cv_params) modelObj.model.load_weights(cv['weights_file']) else: modelObj = cPickle.load(open(cv['weights_file'])) inpt_x, inpt_y = dp.get_data_array(cv_params['model_type'], [params['split']], cntxt=cv_params['context']) predOut = modelObj.model.predict_classes(inpt_x, batch_size=100) accuracy = 100.0 * np.sum( predOut == inpt_y.nonzero()[1]) / predOut.shape[0] print('Accuracy of %s the %s set is %0.2f' % (params['saved_model'], params['split'], accuracy)) # Get the phone order ph2bin = dp.dataDesc['ph2bin'] phoneList = [''] * len(ph2bin) for ph in ph2bin: phoneList[ph2bin[ph].split().index('1')] = ph # plotting confusion matrix if params['plot_confmat'] != 0: cm = confusion_matrix(inpt_y.nonzero()[1], predOut) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.figure() plot_confusion_matrix(cm, phoneList) plt.show() if params['dump_lna_dir'] != None: spt = params['split'] phones_targ = [ l.strip() for l in codecs.open(params['lna_ph_order'], encoding='utf-8') ] assert (set(phones_targ) == set(phoneList)) shuffle_order = np.zeros(len(phones_targ), dtype=np.int32) for i, ph in enumerate(phones_targ): shuffle_order[i] = phoneList.index(ph) ## Now for evert utterance sample predict probabilities and dump lna files for i, inp_file in enumerate(dp.dataDesc[spt + '_x']): lna_file = os.path.join( params['dump_lna_dir'], os.path.basename(inp_file).split('.')[0] + '.lna') inpt_x, inp_y = dp.get_data_array(cv_params['model_type'], [params['split']], cntxt=cv_params['context'], shufdata=0, idx=i) probs = modelObj.model.predict(inpt_x, batch_size=100) #dump_lna(inp_y[:,shuffle_order].flatten(), lna_file, probs.shape[1]) dump_lna(probs[:, shuffle_order].flatten(), lna_file, probs.shape[1]) print lna_file