def process(option, trainCollection, valCollection, testCollection): rootpath = option.rootpath overwrite = option.overwrite opt_pkl = os.path.join(option.model_path, 'option.pkl') opt = readPkl(opt_pkl) opt.n_caption = option.n_caption opt.model_path = option.model_path opt.weight_name = option.weight_name print(option.model_path) #print(trainCollection) # result file info assert trainCollection in option.model_path assert valCollection in option.model_path model_path_1, model_path_2 = option.model_path.strip().split( '\\' + trainCollection + '\\') print(model_path_1) print(model_path_2) model_path = os.path.join(model_path_1, testCollection, 'results', model_path_2) model_path = model_path.replace('\\%s\\' % opt.checkpoint, '\\') output_dir = os.path.join(model_path, option.weight_name) print(output_dir) result_perf = os.path.join(output_dir, 'perf.txt') result_pkl = os.path.join(output_dir, 'test_errors.pkl') #if checkToSkip(result_perf, overwrite): # sys.exit(0) makedirsforfile(result_perf) #sys.exit() # text style if '@' in opt.text_style and opt.model_name.endswith('_ms'): rnn_style, bow_style, w2v_style = opt.text_style.strip().split('@') print(rnn_style) text_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", "bow", opt.rnn_vocab) bow_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", bow_style, opt.bow_vocab) w2v_data_path = os.path.join(rootpath, "word2vec", opt.corpus, opt.word2vec) else: print(opt.text_style + " is not supported, please check the 'text_style' parameter") sys.exit(0) # text embedding (text representation) text2vec = get_text_encoder(rnn_style)(text_data_path) bow2vec = get_text_encoder(bow_style)(bow_data_path) w2v2vec = get_text_encoder(w2v_style)(w2v_data_path) # img2vec img_feats_path = os.path.join(rootpath, testCollection, 'FeatureData', opt.img_feature) img_feats = BigFile(img_feats_path) # similarity function losser = get_losser(opt.simi_fun)() # model_name selection abs_model_path = os.path.join(opt.model_path, 'model.json') weight_path = os.path.join(opt.model_path, opt.weight_name) predictor = W2VV_MS_pred(abs_model_path, weight_path) test_sent_file = os.path.join(rootpath, testCollection, 'TextData', '%s.caption.txt' % testCollection) img_list, sents_id, sents = readImgSents(test_sent_file) all_errors = pred_mutual_error_ms(img_list, sents, predictor, text2vec, bow2vec, w2v2vec, img_feats, losser, opt=opt) # compute performance (r1i, r5i, r10i, medri, meanri) = i2t(all_errors, n_caption=opt.n_caption) print( "Image to text: recall@1 %.1f, recall@5 %.1f, recall@10 %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)) fout_perf = open(os.path.join(output_dir, 'perf.txt'), 'w') fout_perf.write( "Image to text: recall@1 %.1f, recall@5 %.1f, recall@10 %.1f, %.1f, %.1f\n" % (r1i, r5i, r10i, medri, meanri)) fout_perf.close() writePkl({'errors': all_errors}, result_pkl)
def process(opt, trainCollection, valCollection, testCollection): rootpath = opt.rootpath overwrite = opt.overwrite opt.n_text_layers = map(int, opt.n_text_layers.strip().split('-')) if opt.init_model_from != '': assert opt.img_feature in opt.init_model_from init_model_name = opt.init_model_from.strip().split("/")[-1] train_style = opt.model_name + "_" + INFO + "_ft_" + init_model_name else: train_style = opt.model_name + "_" + INFO # text embedding style if '@' in opt.text_style and opt.model_name.endswith('_ms'): rnn_style, bow_style, w2v_style = opt.text_style.strip().split('@') opt.rnn_style = rnn_style text_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", "bow", opt.rnn_vocab) bow_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", bow_style, opt.bow_vocab) w2v_data_path = os.path.join(rootpath, "word2vec", opt.corpus, opt.word2vec) text_name = opt.bow_vocab + "_rnn_%d_%s_sent_%d" % ( opt.rnn_size, opt.rnn_vocab, opt.sent_maxlen) else: print opt.text_style + " is not supported, please check the 'text_style' parameter" sys.exit(0) optm_style = opt.optimizer + '_clipnorm_%.1f_lr_%.5f_dp_%.2f_l2_%.5f_%s_bs_%d' % \ (opt.clipnorm, opt.lr, opt.dropout, opt.l2_p, opt.loss_fun, opt.batch_size) model_style = "-".join(map( str, opt.n_text_layers)) + '_' + opt.hidden_act + '_' + opt.simi_fun checkpoint_dir = os.path.join(rootpath, trainCollection, opt.checkpoint, 'w2vv', valCollection, train_style, opt.text_style + '_' + text_name, opt.img_feature, optm_style, model_style, opt.postfix) # output visualization script runfile_vis = 'do_visual.sh' open(runfile_vis, 'w').write( 'port=$1\ntensorboard --logdir %s --port $port' % checkpoint_dir) os.system('chmod +x %s' % runfile_vis) val_per_hist_file = os.path.join(checkpoint_dir, 'val_per_hist.txt') if checkToSkip(val_per_hist_file, overwrite): sys.exit(0) # else: # if os.path.exists(checkpoint_dir): # os.system("rm -r " + checkpoint_dir) makedirsforfile(val_per_hist_file) model_file_name = os.path.join(checkpoint_dir, 'model.json') model_img_name = os.path.join(checkpoint_dir, 'model.png') tb_logger.configure(checkpoint_dir, flush_secs=5) # text embedding (text representation) if '@' in opt.text_style and opt.model_name.endswith('_ms'): text2vec = get_text_encoder(rnn_style)(text_data_path) bow2vec = get_text_encoder(bow_style)(bow_data_path) w2v2vec = get_text_encoder(w2v_style)(w2v_data_path) if opt.n_text_layers[0] == 0: opt.n_text_layers[0] = bow2vec.ndims + w2v2vec.ndims else: assert opt.n_text_layers[0] == bow2vec.ndims + w2v2vec.ndims opt.vocab_size = text2vec.n_vocab opt.embed_size = w2v2vec.ndims else: text2vec = get_text_encoder(opt.text_style)(text_data_path, ndims=opt.n_text_layers[0]) if opt.n_text_layers[0] == 0: opt.n_text_layers[0] = text2vec.ndims # img2vec img_feat_path = os.path.join(rootpath, trainCollection, 'FeatureData', opt.img_feature) img_feats = BigFile(img_feat_path) val_img_feat_path = os.path.join(rootpath, valCollection, 'FeatureData', opt.img_feature) val_img_feats = BigFile(val_img_feat_path) # write out options for evaluation pkl_file = os.path.join(checkpoint_dir, 'option.pkl') writePkl(opt, pkl_file) # define word2visualvec model if opt.model_name.endswith('_ms'): we_weights = get_we_parameter(text2vec.vocab, w2v_data_path) print we_weights.shape model = get_model(opt.model_name)(opt, we_weights=we_weights) else: model = get_model(opt.model_name)(opt) model.save_json_model(model_file_name) model.plot(model_img_name) model.compile_model(opt.loss_fun, opt=opt) if opt.init_model_from != '': print '*' * 20 print 'initialize the model form ' + opt.init_model_from print '*' * 20 model.init_model(opt.init_model_from) # training set caption_file = os.path.join(rootpath, trainCollection, 'TextData', '%s.caption.txt' % trainCollection) trainData = PairDataSet_MS(caption_file, opt.batch_size, text2vec, bow2vec, w2v2vec, img_feats, flag_maxlen=True, maxlen=opt.sent_maxlen) val_sent_file = os.path.join(rootpath, valCollection, 'TextData', '%s.caption.txt' % valCollection) val_img_list, val_sents_id, val_sents = readImgSents(val_sent_file) losser = get_losser(opt.simi_fun)() best_validation_perf = 0 n_step = 0 count = 0 lr_count = 0 best_epoch = -1 val_per_hist = [] for epoch in range(opt.max_epochs): print '\nEpoch', epoch print "Training..." print "learning rate: ", model.get_lr() tb_logger.log_value('lr', model.get_lr(), step=n_step) train_progbar = generic_utils.Progbar(trainData.datasize) trainBatchIter = trainData.getBatchData() for minibatch_index in xrange(trainData.max_batch_size): # for minibatch_index in xrange(10): n_step += 1 img_X_batch, text_X_batch = trainBatchIter.next() loss_batch = model.model.train_on_batch(text_X_batch, img_X_batch) train_progbar.add(img_X_batch.shape[0], values=[("loss", loss_batch)]) tb_logger.log_value('loss', loss_batch, step=n_step) tb_logger.log_value('n_step', n_step, step=n_step) print "\nValidating..." all_errors = pred_mutual_error_ms(val_img_list, val_sents, model, text2vec, bow2vec, w2v2vec, val_img_feats, losser, opt=opt) this_validation_perf = cal_val_perf(all_errors, opt=opt) tb_logger.log_value('val_accuracy', this_validation_perf, step=n_step) val_per_hist.append(this_validation_perf) print 'previous_best_performance: %.3f' % best_validation_perf print 'current_performance: %.3f' % this_validation_perf fout_file = os.path.join(checkpoint_dir, 'epoch_%d.h5' % (epoch)) lr_count += 1 if this_validation_perf > best_validation_perf: best_validation_perf = this_validation_perf count = 0 # save model model.model.save_weights(fout_file) if best_epoch != -1: os.system('rm ' + os.path.join(checkpoint_dir, 'epoch_%d.h5' % (best_epoch))) best_epoch = epoch else: # when the validation performance has decreased after an epoch, # we divide the learning rate by 2 and continue training; # but we use each learning rate for at least 3 epochs. if lr_count > 2: model.decay_lr(0.5) lr_count = 0 count += 1 if count > 10: print("Early stopping happened") break sorted_epoch_perf = sorted(zip(range(len(val_per_hist)), val_per_hist), key=lambda x: x[1], reverse=True) with open(val_per_hist_file, 'w') as fout: for i, perf in sorted_epoch_perf: fout.write("epoch_" + str(i) + " " + str(perf) + "\n") # generate the shell script for test templete = ''.join(open('TEMPLATE_do_test.sh').readlines()) striptStr = templete.replace('@@@rootpath@@@', rootpath) striptStr = striptStr.replace('@@@trainCollection@@@', trainCollection) striptStr = striptStr.replace('@@@valCollection@@@', valCollection) striptStr = striptStr.replace('@@@testCollection@@@', testCollection) striptStr = striptStr.replace('@@@model_path@@@', checkpoint_dir) striptStr = striptStr.replace('@@@weight_name@@@', 'epoch_%d.h5' % sorted_epoch_perf[0][0]) striptStr = striptStr.replace('@@@n_caption@@@', str(opt.n_caption)) print os.path.join(checkpoint_dir, 'epoch_%d.h5' % sorted_epoch_perf[0][0]) runfile = 'do_test_%s.sh' % (testCollection) open(runfile, 'w').write(striptStr + '\n') os.system('chmod +x %s' % runfile) # os.system('./'+runfile) os.system('cp %s/epoch_%d.h5 %s/best_model.h5' % (checkpoint_dir, sorted_epoch_perf[0][0], checkpoint_dir))