def main(): h = '' while (h != '-h'): h = input('enter -h for usage\n') with open('README.txt', 'r') as f: for lines in f: print(lines) x = int(input("Enter option number to see usage and -1 to exit: \n")) while x != -1: with open(str(x) + '.txt', 'r') as f: for lines in f: print(lines) x = int(input("Enter option number to see usage and -1 to exit: \n")) execute = evaluate.Evaluate() parser = parse.parse mydict = execute.modified_eval() while (True): try: prog = input('interactive@LispInter>>> ') retvalue = execute.fund(parser(prog), mydict) if retvalue is not None: print(PyToLisp(retvalue)) except Exception as e: print('Not valid!', 'reason: ' + str(e)) pass
def Run(output, test, gold, rule, dicts): print("Running...") train.Train(output, rule, dicts) analyze.Analyze(output, test, output + '.fore', rule) synthesize.Synthesize(output, output + '.phon', output + '.back', rule) clean.Clean(output, output + '.raw', rule) evaluate.Evaluate(output, output + '.parsed', gold)
def fitness(self): popfitness = [] #print(self.pop) #print("fitness") for i in self.pop: #i = [abcdefg] individual #print("individual is %r"%i) j = i[:self. baseusingnum] #j: all base will be used in the individual #print("all base will be used in the individual is %r"%j) parameter1 = [] #parameter = [basex,basey,baseh,x,y,h,fc,Tx,G,htb,hre,Noise,rsrpthre,sinrthre,coverthre,nbaseallx,nbaseally,ncost,ocost] #print("j is %r"%j) for k in range(3): parameter10 = [] for l in j: parameter10.append(self.parameter[k][l]) #print("l is %d"%l) #print("self.parameter[k][l] is %r"%self.parameter[k][l]) #print("parameter10 is %r"%parameter10) parameter1.append(parameter10) #print("%d step parameter1 is should be basex or y or h of %d size: %r"%(k,self.baseusingnum,parameter1)) #print("parameter1 %r is should be basex, basey and baseh of %d size"%(parameter1,self.baseusingnum)) parameter2 = [] parameter2.extend(parameter1) parameter2.extend(self.parameter[3:]) popfitness.append(evaluate.Evaluate(parameter2).Evaluate_main()) fmax = max(popfitness) #print('fmax = %d'%fmax) #print("popfitness is %r"%popfitness) popfitness1 = popfitness[:] popfitness2 = np.array(popfitness) popfitness2 = -popfitness2 + fmax + 5 popfitness2 = list(popfitness2) #print("self.F is %r"%popfitness) return popfitness1, popfitness2 #fitness value of each individual in pop
def total_report(): import define import prepare import fselect import evaluate import improve # label = 'LocalizationNew_Tx' label = 'BQ_AQUARIS_E5_' # for i in range(7, 8): for i in ['20', '30', '40']: data_name = "./uploads/" + label + str(i) + ".csv" print(data_name) # data_name = "iris.csv" class_name = "class" definer = define.Define(data_path=data_name, header=None, response=class_name).pipeline() preparer = prepare.Prepare(definer).pipeline() selector = fselect.Select(definer).pipeline() evaluator = evaluate.Evaluate(definer, preparer, selector) improver = improve.Improve(evaluator).pipeline() improver.save_full_report('./market/' + label + str(i)) improver.save_score_report('./market/' + label + str(i))
def EvaluateChild1(self, i): j = 0 while j < self._nv: v = Grey2Dec(self.Pop2[i], self.vindicesvar[j], self.vtam[j], self.vw) self.vv[j] = self.delta[j] * v + self.vmin[j] j += 1 self.vfobj2[i] = evaluate.Evaluate(self._nv, self.vv)
def report_improve(data_path, data_name, problem_type, optimizer, modelos): definer = define.Define(data_path=data_path,data_name=data_name,problem_type=problem_type).pipeline() preparer = prepare.Prepare(definer).pipeline() selector = fselect.Select(definer).pipeline() evaluator = evaluate.Evaluate(definer, preparer, selector) improver = improve.Improve(evaluator, optimizer, modelos).pipeline() plot = improver.plot_models() table = improver.report dict_report = {'plot': plot, 'table': table} #dict_report = {'table': table} return dict_report
def Gene_main(self): minfit = len(self.parameter[3] * self.parameter[17]) bestindividual = [] for i in range(self.iterationtime): self.cross() self.mutation() self.popleft() #print("here1") #print (self.pop) print("the best fitness of the %d's generation is %f " % (i, min(self.E))) print("the best individual of the %d's generation is" % i) print(self.pop[0][:self.baseusingnum]) print("\n\n\n") if min(self.E) < minfit: minfit = min(self.E) bestindividual = self.pop[0] print("the best fitness is %f " % minfit) print("the best individual is") print(bestindividual[:self.baseusingnum]) parameter1 = [] #parameter = [basex,basey,baseh,x,y,h,fc,Tx,G,htb,hre,Noise,rsrpthre,sinrthre,coverthre,nbaseallx,nbaseally,ncost,ocost] for k in range(3): parameter10 = [] for l in bestindividual[:self.baseusingnum]: parameter10.append(self.parameter[k][l]) parameter1.append(parameter10) print("parameter1 is %r" % parameter1) parameter2 = [] parameter2.extend(parameter1) parameter2.extend(self.parameter[3:]) cost = (evaluate.Evaluate(parameter2).cost()) cover = (evaluate.Evaluate(parameter2).coverage()) print("the cost of this individual is %r" % cost) print("the cover of this individual is %r" % cover) print("\n\n\n") return minfit, bestindividual[:self.baseusingnum], cost
def evaluate(self, test_labels): evaluate = ev.Evaluate(self.predict_labels, test_labels, self.rtl) hamming_loss = evaluate.hanmming_loss() one_error = evaluate.one_error() rank_loss = evaluate.rank_loss() coverage = evaluate.coverage() avg_precision = evaluate.avg_precison() return hamming_loss, one_error, rank_loss, coverage, avg_precision
def EvaluatePopulation1(self): i = 0 while i < self._pop: j = 0 while j < self._nv: v = Grey2Dec(self.Pop1[i], self.vindicesvar[j], self.vtam[j], self.vw) self.vv[j] = self.delta[j] * v + self.vmin[j] j += 1 self.vfobj1[i] = evaluate.Evaluate(self._nv, self.vv) i += 1 # The best individual is initialized as the first individual self._best = self.vfobj1[0]
def StrategyEvaluation(self, daily_report): daily_report = daily_report.sort_values(by=["trade_date"], ascending=True).reset_index( drop=True) # 日期从早到晚排序 if not daily_report.empty: self.evaluate = evaluate.Evaluate(daily_report=daily_report) average_daily_net_rise = self.evaluate.CalcAverageDailyNetRise( ) # 001 max_period_return, min_period_return = self.evaluate.CalcMaxMinPeriodReturn( ) # 002 go_up_probability = self.evaluate.CalcGoUpProbability() # 003 max_days_keep_up, max_days_keep_down = self.evaluate.CalcMaxDaysKeepUpOrDown( ) # 004 max_drawdown_value, max_drawdown_date, drawdown_start_date = self.evaluate.CalcMaxDrawdown( ) # 005 annual_return_rate, index_annual_return_rate = self.evaluate.CalcAnnualReturnRate( ) # 006 return_volatility = self.evaluate.CalcReturnVolatility() # 007 sharpe_ratio = self.evaluate.CalcSharpeRatio( annual_return_rate, return_volatility) # 008 beta_value = self.evaluate.CalcBetaValue() # 009 alpha_value = self.evaluate.CalcAlphaValue( annual_return_rate, index_annual_return_rate, beta_value) # 010 info_ratio = self.evaluate.CalcInfoRatio() # 011 print("平均每日净值涨幅:%f" % average_daily_net_rise) print("单周期最大涨幅:%f," % max_period_return, "单周期最大跌幅:%f" % min_period_return) print("上涨概率:%f" % go_up_probability) print("最大连续上涨天数:%f," % max_days_keep_up, "最大连续下跌天数:%f" % max_days_keep_down) print("最大回撤:%f," % max_drawdown_value, "最大回撤日期:%s," % max_drawdown_date.strftime("%Y-%m-%d"), "回撤开始日期:%s" % drawdown_start_date.strftime("%Y-%m-%d")) print("年化收益率:%f," % annual_return_rate, "参照指数年化收益率:%f" % index_annual_return_rate) print("收益波动率:%f" % return_volatility) print("夏普比率:%f" % sharpe_ratio) print("贝塔值:%f" % beta_value) print("阿尔法值:%f" % alpha_value) print("信息比率:%f" % info_ratio) self.evaluate.MakeNetValueCompare(self.rets_folder) # 012 return True else: return False
def report_model(response, data_path, data_name, problem_type): definer = define.Define(data_path=data_path,data_name=data_name,problem_type=problem_type).pipeline() preparer = prepare.Prepare(definer).pipeline() # scaler selector = fselect.Select(definer).pipeline() # pca evaluator = evaluate.Evaluate(definer, preparer, selector).pipeline() plot = evaluator.plot_models() table = evaluator.report data_name = data_name.replace(".csv", "") plot_path = os.path.join(app.config['MARKET_DIR'], data_name, 'model') tools.path_exists(plot_path) plot_path_plot = os.path.join(plot_path, 'boxplot.html') evaluator.save_plot(plot_path_plot) plot_path_report = os.path.join(plot_path, 'report.csv') evaluator.save_report(plot_path_report) dict_report = {'plot': plot, 'table': table} return dict_report
# smaller value for "tight graphs" such as ukbench (i.e., nearly-duplicate), # larger for "loose graphs" such as corel (i.e., category classification) # okay to choose same values for search_range and kNN. very subtle difference search_region = 40 kNN = 20 retri_amount = 25 # Load data (retrieval results for all images) img_name, result_idx, result_length = load_data(fn_result) # Generate reciprocal graphs for all images if re.search('voc', fn_result): feature_type = '.voc' elif re.search('hsv', fn_result): feature_type = '.hsv' elif re.search('gist', fn_result): feature_type = '.gist' else: feature_type = '.unknown' find_reciprocal_neighbors(img_name, result_idx, result_length, fn_result_reranking, fn_folder_graph, search_region, kNN, retri_amount, feature_type) # Evaluate the accuracy import evaluate print "Before building reciprocal kNN graphs:" evaluate.Evaluate(fn_label, fn_result, retri_amount - 2) print "After building reciprocal kNN graphs:" evaluate.Evaluate(fn_label, fn_result_reranking, retri_amount - 2)
def main(): args = parse_args() config = configparser.ConfigParser() """ARGS DETAIL""" config_file = args.config_file batch_size = args.batch n_epoch = args.epoch pretrain_epoch = args.pretrain_epoch gpu_id = args.gpu model_type = args.model pretrain_w2v = args.pretrain_w2v data_path = args.data_path load_model = args.load_model """DIR PREPARE""" config.read(config_file) vocab_size = int(config['Parameter']['vocab_size']) coefficient = float(config['Parameter']['coefficient']) shuffle_data = bool(config['Parameter']['shuffle']) if pretrain_w2v: vocab_size = 'p' + str(vocab_size) if model_type == 'multi': if shuffle_data: base_dir = './pseudo_{}_{}_{}_c{}_shuffle/'.format( model_type, vocab_size, data_path[0], coefficient) else: base_dir = './pseudo_{}_{}_{}_c{}/'.format(model_type, vocab_size, data_path[0], coefficient) else: if shuffle_data: base_dir = './pseudo_{}_{}_{}_shuffle/'.format( model_type, vocab_size, data_path[0]) else: base_dir = './pseudo_{}_{}_{}/'.format(model_type, vocab_size, data_path[0]) model_save_dir = base_dir if not os.path.exists(base_dir): os.mkdir(base_dir) shutil.copyfile(config_file, base_dir + config_file) config_file = base_dir + config_file config.read(config_file) """PARAMATER""" embed_size = int(config['Parameter']['embed_size']) hidden_size = int(config['Parameter']['hidden_size']) class_size = int(config['Parameter']['class_size']) dropout_ratio = float(config['Parameter']['dropout']) weight_decay = float(config['Parameter']['weight_decay']) gradclip = float(config['Parameter']['gradclip']) vocab_size = int(config['Parameter']['vocab_size']) valid_num = int(config['Parameter']['valid_num']) shuffle_data = bool(config['Parameter']['shuffle']) """LOGGER""" log_file = model_save_dir + 'log.txt' logger = dataset.prepare_logger(log_file) logger.info(args) # 引数を記録 logger.info('[Training start] logging to {}'.format(log_file)) """DATASET""" train_src_file = config[data_path]['train_src_file'] train_trg_file = config[data_path]['train_trg_file'] valid_src_file = config[data_path]['valid_src_file'] valid_trg_file = config[data_path]['valid_trg_file'] test_src_file = config[data_path]['single_src_file'] test_trg_file = config[data_path]['single_trg_file'] src_w2v_file = config[data_path]['src_w2v_file'] trg_w2v_file = config[data_path]['trg_w2v_file'] train_data = dataset.load_label_corpus_file(train_src_file, train_trg_file) qa_data_sub_lit = dataset.split_valid_data(train_data, valid_num) valid_data = dataset.load_label_corpus_file(valid_src_file, valid_trg_file) test_data = dataset.load_label_corpus_file(test_src_file, test_trg_file) test_data_sub_lit = dataset.split_valid_data(test_data, valid_num) """VOCABULARY""" src_vocab, trg_vocab, sos, eos = dataset.prepare_vocab( base_dir, train_data, vocab_size, gpu_id) src_vocab_size = len(src_vocab.vocab) trg_vocab_size = len(trg_vocab.vocab) src_initialW, trg_initialW = None, None if pretrain_w2v: w2v = word2vec.Word2Vec() src_initialW, vector_size, src_match_word_count = w2v.make_initialW( src_vocab.vocab, src_w2v_file) trg_initialW, vector_size, trg_match_word_count = w2v.make_initialW( trg_vocab.vocab, trg_w2v_file) logger.info( 'Initialize w2v embedding. Match: src {}/{}, trg {}/{}'.format( src_match_word_count, src_vocab_size, trg_match_word_count, trg_vocab_size)) logger.info('src_vocab size: {}, trg_vocab size: {}'.format( src_vocab_size, trg_vocab_size)) evaluater = evaluate.Evaluate() """GPU""" if gpu_id >= 0: logger.info('Use GPU') chainer.cuda.get_device_from_id(gpu_id).use() cross_valid_result = [] for ite in range(1, valid_num + 1): model_valid_dir = base_dir + 'valid{}/'.format(ite) if not os.path.exists(model_valid_dir): os.mkdir(model_valid_dir) qa_train_data, qa_dev_data, qa_test_data = dataset.separate_train_dev_test( qa_data_sub_lit, ite) train_data, dev_data, test_data = dataset.separate_train_dev_test( test_data_sub_lit, ite) test_data_id = [t['id'] for t in test_data] qa_iter = dataset.Iterator(qa_train_data, src_vocab, trg_vocab, batch_size, gpu_id, sort=True, shuffle=True) valid_iter = dataset.Iterator(valid_data, src_vocab, trg_vocab, batch_size, gpu_id, sort=False, shuffle=False) train_iter = dataset.Iterator(train_data, src_vocab, trg_vocab, batch_size, gpu_id, sort=True, shuffle=True) dev_iter = dataset.Iterator(dev_data, src_vocab, trg_vocab, batch_size, gpu_id, sort=False, shuffle=False) test_iter = dataset.Iterator(test_data, src_vocab, trg_vocab, batch_size, gpu_id, sort=False, shuffle=False) qa_size = len(qa_train_data) train_size = len(train_data) logger.info('V{} ## QA:{}, train:{}, dev:{} ,test:{}'.format( ite, qa_size, train_size, len(dev_data), len(test_data))) """MODEL""" if model_type == 'multi': model = model.Multi(src_vocab_size, trg_vocab_size, embed_size, hidden_size, class_size, dropout_ratio, coefficient, src_initialW, trg_initialW) elif model_type in ['label', 'pretrain']: model = model.Label(src_vocab_size, trg_vocab_size, embed_size, hidden_size, class_size, dropout_ratio, src_initialW, trg_initialW) else: model = model.EncoderDecoder(src_vocab_size, trg_vocab_size, embed_size, hidden_size, dropout_ratio, src_initialW, trg_initialW) if gpu_id >= 0: model.to_gpu() """OPTIMIZER""" optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(gradclip)) optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) """PRETRAIN""" if model_type == 'pretrain' and load_model is None: logger.info('Pre-train start') pretrain_loss_dic = {} for epoch in range(1, pretrain_epoch + 1): train_loss = 0 for i, batch in enumerate(train_iter.generate(), start=1): try: loss = model.pretrain(*batch) train_loss += loss.data optimizer.target.cleargrads() loss.backward() optimizer.update() except Exception as e: logger.info('P{} ## train iter: {}, {}'.format( epoch, i, e)) chainer.serializers.save_npz( model_save_dir + 'p_model_epoch_{}.npz'.format(epoch), model) """EVALUATE""" valid_loss = 0 for batch in valid_iter.generate(): with chainer.no_backprop_mode(), chainer.using_config( 'train', False): valid_loss += model.pretrain(*batch).data logger.info('P{} ## train loss: {}, val loss:{}'.format( epoch, train_loss, valid_loss)) pretrain_loss_dic[epoch] = valid_loss """MODEL SAVE & LOAD""" best_epoch = min(pretrain_loss_dic, key=(lambda x: pretrain_loss_dic[x])) logger.info('best_epoch:{}, val loss: {}'.format( best_epoch, pretrain_loss_dic[best_epoch])) shutil.copyfile( model_save_dir + 'p_model_epoch_{}.npz'.format(best_epoch), model_save_dir + 'p_best_model.npz') logger.info('Pre-train finish') if load_model: logger.info('load model: {}'.format(load_model)) chainer.serializers.load_npz(base_dir + load_model, model) """TRAIN""" epoch_info = {} for epoch in range(1, n_epoch + 1): train_loss = 0 mix_train_iter = dataset.MixIterator(qa_iter, train_iter, seed=0, shuffle=shuffle_data) for i, batch in enumerate(mix_train_iter.generate(), start=1): try: loss = optimizer.target(*batch[0]) train_loss += loss.data optimizer.target.cleargrads() loss.backward() optimizer.update() except Exception as e: logger.info('V{} ## E{} ## train iter: {}, {}'.format( ite, epoch, i, e)) chainer.serializers.save_npz( model_valid_dir + 'model_epoch_{}.npz'.format(epoch), model) """DEV""" labels, alignments = [], [] for i, batch in enumerate(dev_iter.generate(), start=1): try: with chainer.no_backprop_mode(), chainer.using_config( 'train', False): _, label, align = model.predict(batch[0], sos, eos) except Exception as e: logger.info('V{} ## E{} ## dev iter: {}, {}'.format( ite, epoch, i, e)) if model_type == 'multi': for l, a in zip(label, align): labels.append(chainer.cuda.to_cpu(l)) alignments.append(chainer.cuda.to_cpu(a)) elif model_type in ['label', 'pretrain']: for l in label: labels.append(chainer.cuda.to_cpu(l)) else: for a in align: alignments.append(chainer.cuda.to_cpu(a)) best_param_dic = evaluater.param_search(labels, alignments, dev_data) param = max(best_param_dic, key=lambda x: best_param_dic[x]['macro']) init, mix = evaluate.key_to_param(param) dev_score = round(best_param_dic[param]['macro'], 3) """TEST""" outputs, labels, alignments = [], [], [] for i, batch in enumerate(test_iter.generate(), start=1): try: with chainer.no_backprop_mode(), chainer.using_config( 'train', False): output, label, align = model.predict( batch[0], sos, eos) except Exception as e: logger.info('V{} ## E{} ## test iter: {}, {}'.format( ite, epoch, i, e)) if model_type == 'multi': for l, a in zip(label, align): labels.append(chainer.cuda.to_cpu(l)) alignments.append(chainer.cuda.to_cpu(a)) elif model_type in ['label', 'pretrain']: for l in label: labels.append(chainer.cuda.to_cpu(l)) else: for a in align: alignments.append(chainer.cuda.to_cpu(a)) rate, count, tf_lit, macro, micro = evaluater.eval_param( labels, alignments, test_data, init, mix) test_macro_score = round(macro, 3) test_micro_score = round(micro, 3) logger.info( 'V{} ## E{} ## loss: {}, dev: {}, param: {}, micro: {}, macro: {}' .format(ite, epoch, train_loss, dev_score, param, test_micro_score, test_macro_score)) epoch_info[epoch] = { 'id': test_data_id, 'label': labels, 'align': alignments, 'hypo': outputs, 'epoch': epoch, 'dev_score': dev_score, 'param': param, 'rate': rate, 'count': count, 'tf': tf_lit, 'macro': test_macro_score, 'micro': test_micro_score } dataset.save_output(model_valid_dir, epoch_info[epoch]) """MODEL SAVE""" best_epoch = max(epoch_info, key=(lambda x: epoch_info[x]['dev_score'])) cross_valid_result.append(epoch_info[best_epoch]) logger.info( 'V{} ## best_epoch: {}, dev: {}, micro: {}, macro: {}'.format( ite, best_epoch, epoch_info[best_epoch]['dev_score'], epoch_info[best_epoch]['micro'], epoch_info[best_epoch]['macro'])) shutil.copyfile( model_valid_dir + 'model_epoch_{}.npz'.format(best_epoch), model_valid_dir + 'best_model.npz') logger.info('') ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0 ave_test_score = [0 for _ in range(len(cross_valid_result[0]['rate']))] id_total, label_total, align_total, tf_total = [], [], [], [] for v, r in enumerate(cross_valid_result, start=1): ave_dev_score += r['dev_score'] ave_macro_score += r['macro'] ave_micro_score += r['micro'] for i, rate in enumerate(r['rate']): ave_test_score[i] += rate logger.info(' {}: e{}, {}\tdev: {}, micro: {}, macro: {} {}'.format( v, r['epoch'], r['param'], r['dev_score'], r['micro'], dataset.float_to_str(r['rate']), r['macro'])) id_total.extend(r['id']) label_total.extend(r['label']) align_total.extend(r['align']) tf_total.extend(r['tf']) ave_dev_score = round(ave_dev_score / valid_num, 3) ave_macro_score = round(ave_macro_score / valid_num, 3) ave_micro_score = round(ave_micro_score / valid_num, 3) ave_test_score = [ ave_test_score[i] / valid_num for i in range(len(ave_test_score)) ] logger.info('dev: {}, micro: {}, macro: {} {}'.format( ave_dev_score, ave_micro_score, dataset.float_to_str(ave_test_score), ave_macro_score)) label, align, tf = dataset.sort_multi_list(id_total, label_total, align_total, tf_total) dataset.save_list(base_dir + 'label.txt', label) dataset.save_list(base_dir + 'align.txt', align) dataset.save_list(base_dir + 'tf.txt', tf)
# bearings["llama"] = ... # Now you need to convert this to a horizontal bearing as an angle. # Use the camera matrix for this! # There are ways to get much better bearings. # Try and think of better solutions than just averaging. for animal in bearings: bearing_dict = { "pose": self.pose.tolist(), "animal": animal, "bearing": bearings[animal] } bearing_line = json.dumps(bearing_dict) if __name__ == "__main__": # Set up the network exp = evaluate.Evaluate() # Read in the images images_fname = "../system_output/images.txt" with open(images_fname, 'r') as images_file: posed_images = [PosedImage(line) for line in images_file] # Compute bearings and write to file bearings_fname = "../system_output/bearings.txt" with open(bearings_fname, 'w') as bearings_file: for posed_image in posed_images: posed_image.write_bearings(exp, bearings_file, "../system_output/")
def main(): """ model1: label model2: encdec を指定する """ args = parse_args() model_name1 = args.label_model model_dir1 = re.search(r'^(.*/)', model_name1).group(1) model_name2 = args.encdec_model model_dir2 = re.search(r'^(.*/)', model_name2).group(1) valid_type = args.valid # 結果保存用ディレクトリ作成 output_dir = model_dir1 + model_dir2 if not os.path.exists(output_dir): os.mkdir(output_dir) # 評価データ準備 config = configparser.ConfigParser() config_files = glob.glob(os.path.join(model_dir1, '*.ini')) config.read(config_files[0]) valid_num = int(config['Parameter']['valid_num']) test_src_file = config['server']['single_src_file'] test_trg_file = config['server']['single_trg_file'] data = dataset.load_label_corpus_file(test_src_file, test_trg_file) data_sub_lit = dataset.split_valid_data(data, valid_num) evaluater = evaluate.Evaluate() result_dic = {} # validファイルに分割されている時 if valid_type == 'TT': """ model1: validファイルあり model2: validファイルあり """ model_file_num = len( glob.glob(os.path.join(model_dir1, 'valid1/model_epoch_*.npz'))) label_dic = {} align_dic = {} for i in range(1, model_file_num + 1): label_dic[i] = [] align_dic[i] = [] for valid in [2, 3, 4, 5, 1]: label, _ = dataset.load_score_file( model_dir1 + 'valid{}/model_epoch_{}'.format(valid, i)) label_dic[i].append(label) _, align = dataset.load_score_file( model_dir2 + 'valid{}/model_epoch_{}'.format(valid, i)) align_dic[i].append(align) order = {1: [4, 5], 2: [5, 1], 3: [1, 2], 4: [2, 3], 5: [3, 4]} for i in tqdm(range(1, model_file_num + 1)): for j in range(1, model_file_num + 1): info = [] for ite, v in order.items(): _, dev_data, test_data = dataset.separate_train_dev_test( data_sub_lit, ite) dev_label = label_dic[i][v[0] - 1] test_label = label_dic[i][v[1] - 1] dev_align = align_dic[j][v[0] - 1] test_align = align_dic[j][v[1] - 1] best_param_dic = evaluater.param_search( dev_label, dev_align, dev_data) param = max(best_param_dic, key=lambda x: best_param_dic[x]['macro']) init, mix = evaluate.key_to_param(param) dev_score = round(best_param_dic[param]['macro'], 3) rate, count, tf_lit, macro, micro = evaluater.eval_param( test_label, test_align, test_data, init, mix) test_macro_score = round(macro, 3) test_micro_score = round(micro, 3) info.append({ 'dev_score': dev_score, 'param': param, 'macro': test_macro_score, 'micro': test_micro_score, 'tf': tf_lit }) ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0 param = [] tf_lit = [] for v, r in enumerate(info, start=1): ave_dev_score += r['dev_score'] ave_macro_score += r['macro'] ave_micro_score += r['micro'] param.append(r['param']) tf_lit.extend(r['tf']) ave_dev_score = round(ave_dev_score / valid_num, 3) ave_macro_score = round(ave_macro_score / valid_num, 3) ave_micro_score = round(ave_micro_score / valid_num, 3) key = 'label{}_enc{}'.format(i, j) result_dic[key] = { 'dev': ave_dev_score, 'micro': ave_micro_score, 'macro': ave_macro_score, 'param': ' '.join(param), 'tf': tf_lit } best_score = max(result_dic, key=lambda x: result_dic[x]['dev']) with open(output_dir + 'merge.txt', 'w') as f: [ f.write('{}: {}\n'.format(k, v)) for k, v in sorted(result_dic.items()) ] f.write('best score\n{}: {}\n'.format(best_score, result_dic[best_score])) with open(output_dir + 'tf.txt', 'w') as f: [f.write(r + '\n') for r in result_dic[best_score]['tf']] elif valid_type == 'FF': """ model1: validファイルなし model2: validファイルなし """ model_file_num = len( glob.glob(os.path.join(model_dir1, 'model_epoch_*.npz'))) for i in tqdm(range(1, model_file_num + 1)): label, _ = dataset.load_score_file(model_dir1 + 'model_epoch_{}'.format(i)) label_sub_lit = dataset.split_valid_data(label, valid_num) for j in range(1, model_file_num + 1): _, align = dataset.load_score_file(model_dir2 + 'model_epoch_{}'.format(j)) align_sub_lit = dataset.split_valid_data(align, valid_num) info = [] for ite in range(1, valid_num + 1): _, dev_data, test_data = dataset.separate_train_dev_test( data_sub_lit, ite) _, dev_label, test_label = dataset.separate_train_dev_test( label_sub_lit, ite) _, dev_align, test_align = dataset.separate_train_dev_test( align_sub_lit, ite) best_param_dic = evaluater.param_search( dev_label, dev_align, dev_data) param = max(best_param_dic, key=lambda x: best_param_dic[x]['macro']) init, mix = evaluate.key_to_param(param) dev_score = round(best_param_dic[param]['macro'], 3) rate, count, tf_lit, macro, micro = evaluater.eval_param( test_label, test_align, test_data, init, mix) test_macro_score = round(macro, 3) test_micro_score = round(micro, 3) info.append({ 'dev_score': dev_score, 'param': param, 'macro': test_macro_score, 'micro': test_micro_score, 'tf': tf_lit }) ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0 param = [] for v, r in enumerate(info, start=1): ave_dev_score += r['dev_score'] ave_macro_score += r['macro'] ave_micro_score += r['micro'] param.append(r['param']) tf_lit.extend(r['tf']) ave_dev_score = round(ave_dev_score / valid_num, 3) ave_macro_score = round(ave_macro_score / valid_num, 3) ave_micro_score = round(ave_micro_score / valid_num, 3) key = 'label{}_enc{}'.format(i, j) result_dic[key] = { 'dev': ave_dev_score, 'micro': ave_micro_score, 'macro': ave_macro_score, 'param': ' '.join(param), 'tf': tf_lit } best_score = max(result_dic, key=lambda x: result_dic[x]['dev']) with open(output_dir + 'merge.txt', 'w') as f: [ f.write('{}: {}\n'.format(k, v)) for k, v in sorted(result_dic.items()) ] f.write('best score\n{}: {}\n'.format(best_score, result_dic[best_score])) with open(output_dir + 'tf.txt', 'w') as f: [f.write(r + '\n') for r in result_dic[best_score]['tf']] elif valid_type == 'TF': """ model1: validファイルなし model2: validファイルなし """ model_file_num = len( glob.glob(os.path.join(model_dir1, 'valid1/model_epoch_*.npz'))) label_dic = {} for i in range(1, model_file_num + 1): label_dic[i] = [] for valid in [2, 3, 4, 5, 1]: label, _ = dataset.load_score_file( model_dir1 + 'valid{}/model_epoch_{}'.format(valid, i)) label_dic[i].append(label) for j in range(1, model_file_num + 1): _, align = dataset.load_score_file(model_dir2 + 'model_epoch_{}'.format(j)) align_sub_lit = dataset.split_valid_data(align, valid_num) # 5-fold crossvalidationでvalid, testのインデックスを指定している # 1: [4, 5]は1回目のテストでは4番目のデータをvalidation用,5番目のデータをテストで使用する order = {1: [4, 5], 2: [5, 1], 3: [1, 2], 4: [2, 3], 5: [3, 4]} for i in tqdm(range(1, model_file_num + 1)): for j in range(1, model_file_num + 1): info = [] for ite, v in order.items(): _, dev_data, test_data = dataset.separate_train_dev_test( data_sub_lit, ite) dev_label = label_dic[i][v[0] - 1] test_label = label_dic[i][v[1] - 1] _, dev_align, test_align = dataset.separate_train_dev_test( align_sub_lit, ite) best_param_dic = evaluater.param_search( dev_label, dev_align, dev_data) param = max(best_param_dic, key=lambda x: best_param_dic[x]['macro']) init, mix = evaluate.key_to_param(param) dev_score = round(best_param_dic[param]['macro'], 3) rate, count, tf_lit, macro, micro = evaluater.eval_param( test_label, test_align, test_data, init, mix) test_macro_score = round(macro, 3) test_micro_score = round(micro, 3) info.append({ 'dev_score': dev_score, 'param': param, 'macro': test_macro_score, 'micro': test_micro_score, 'tf': tf_lit }) ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0 param = [] tf_lit = [] for v, r in enumerate(info, start=1): ave_dev_score += r['dev_score'] ave_macro_score += r['macro'] ave_micro_score += r['micro'] param.append(r['param']) tf_lit.extend(r['tf']) ave_dev_score = round(ave_dev_score / valid_num, 3) ave_macro_score = round(ave_macro_score / valid_num, 3) ave_micro_score = round(ave_micro_score / valid_num, 3) key = 'label{}_enc{}'.format(i, j) result_dic[key] = { 'dev': ave_dev_score, 'micro': ave_micro_score, 'macro': ave_macro_score, 'param': ' '.join(param), 'tf': tf_lit } best_score = max(result_dic, key=lambda x: result_dic[x]['dev']) with open(output_dir + 'merge.txt', 'w') as f: [ f.write('{}: {}\n'.format(k, v)) for k, v in sorted(result_dic.items()) ] f.write('best score\n{}: {}\n'.format(best_score, result_dic[best_score])) with open(output_dir + 'tf.txt', 'w') as f: [f.write(r + '\n') for r in result_dic[best_score]['tf']]
src = FastTextEmb(hparams.data_dir, hparams.src_lang, hparams.vocab_size) src_dict, src_vec = src.load_embeddings() # Load Target Embeddings tgt = FastTextEmb(hparams.data_dir, hparams.tgt_lang, hparams.vocab_size) tgt_dict, tgt_vec = tgt.load_embeddings() # GAN instance train_model = WordTranslator(hparams, src_vec, tgt_vec, hparams.vocab_size) # Copy embeddings src_vec_eval = copy.deepcopy(src_vec) tgt_vec_eval = copy.deepcopy(tgt_vec) # Evaluator instance eval_model = evaluate.Evaluate(train_model.generator.W, src_vec_eval, tgt_vec_eval, src_dict, tgt_dict, hparams.src_lang, hparams.tgt_lang, hparams.eval_dir, hparams.vocab_size) # Tensorflow session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) local_lr = hparams.lr for epoch in range(hparams.epochs): # Train the model train_model.run(sess, local_lr) # Evaluate using nearest neighbors measure eval_model.calc_nn_acc(sess) # Evaluate using CSLS similarity measure eval_model.run_csls_metrics(sess) # Drop learning rate local_lr = train_model.set_lr(local_lr, eval_model.drop_lr)
def ctrl(x_tr, y_tr, x_te, x_va=None): p = np.size(y_tr, 1) thr = 0.3 m = 5 if p <= 20 else 7 if p <= 100 else 9 n_tr = np.size(x_tr, 0) n_tr_va = int(n_tr / 5) x_tr = np.array(x_tr) y_tr = np.int32(y_tr) x_tr_va = x_tr[0:n_tr_va, :] y_tr_va = y_tr[0:n_tr_va, :] x_tr = x_tr[n_tr_va:, :] y_tr = y_tr[n_tr_va:, :] pred_base = pt.BinaryRelevance(svm.LinearSVC()) pred_base.fit(x_tr, y_tr) y_tr_va_ = sparse.dok_matrix.toarray(pred_base.predict(x_tr_va)) # filter 1 ev = evaluate.Evaluate() f1 = np.zeros([p], dtype=np.float) for j in range(p): f1[j] = ev.eval_macro_f1(y_tr_va[:, j], y_tr_va_[:, j]) yc = np.where(f1 >= thr, True, False) yc_index = np.array(list(range(p)))[yc] y_tr_c = y_tr[:, yc] # filter 2 r = [] for j in range(p): yc_j = np.where(yc_index == j, False, True) yc_index_j = yc_index[yc_j].tolist() y_tr_c_j = y_tr_c[:, yc_j] y_tr_j = y_tr[:, j] chi2, _ = fs.chi2(y_tr_c_j, y_tr_j) chi2 = chi2.tolist() r_j = [] for k in range(m): if not chi2: break index = np.argmax(chi2) r_j.append(yc_index_j[index]) yc_index_j.remove(yc_index_j[index]) chi2.remove(chi2[index]) r.append(r_j) # predict test and validation sets together n_te, n_va = 0, 0 if x_va is not None: n_te = np.size(x_te, axis=0) n_va = np.size(x_va, axis=0) x_te = np.vstack((x_te, x_va)) else: x_te = np.array(x_te) # get original prediction y_te_ori = sparse.dok_matrix.toarray(pred_base.predict(x_te)) y_te_adv = np.zeros([np.size(x_te, 0), p]) # train meta-classifiers and predict (to reduce storage usage) for j in range(p): pred_meta = [svm.LinearSVC() for _ in r[j]] # train meta-classifiers for label l_j for k, index in enumerate(r[j]): x = np.hstack([x_tr, y_tr[:, index][:, np.newaxis]]) y = y_tr[:, j] pred_meta[k].fit(x, y) # predict phase votes = np.zeros([np.size(x_te, 0)]) for k, index in enumerate(r[j]): x = np.hstack([x_te, y_te_ori[:, index][:, np.newaxis]]) y = pred_meta[k].predict(x) votes += np.where(y == 1, 1, -1) y_te_adv[:, j] = np.where(votes > 0, 1, 0) y_te_ = y_te_adv # predict test and validation sets together if x_va is not None: y_va_ = y_te_[n_te:, :] y_te_ = y_te_[0:n_te, :] return y_te_, y_va_ else: return y_te_
def main(): args = parse_args() model_dir = args.model_dir """LOAD CONFIG FILE""" config_files = glob.glob(os.path.join(model_dir, '*.ini')) assert len(config_files) == 1, 'Put only one config file in the directory' config_file = config_files[0] config = configparser.ConfigParser() config.read(config_file) """LOGGER""" logger = getLogger(__name__) logger.setLevel(logging.INFO) formatter = logging.Formatter('[%(asctime)s] %(message)s') sh = logging.StreamHandler() sh.setLevel(logging.INFO) sh.setFormatter(formatter) logger.addHandler(sh) log_file = model_dir + 'log.txt' fh = logging.FileHandler(log_file) fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger.addHandler(fh) logger.info('[Test start] logging to {}'.format(log_file)) """PARAMATER""" embed_size = int(config['Parameter']['embed_size']) hidden_size = int(config['Parameter']['hidden_size']) class_size = int(config['Parameter']['class_size']) dropout_ratio = float(config['Parameter']['dropout']) vocab_type = config['Parameter']['vocab_type'] coefficient = float(config['Parameter']['coefficient']) """TEST DETAIL""" gpu_id = args.gpu batch_size = args.batch model_file = args.model """DATASET""" test_src_file = config['Dataset']['test_src_file'] correct_txt_file = config['Dataset']['correct_txt_file'] test_data_size = dataset.data_size(test_src_file) logger.info('test size: {0}'.format(test_data_size)) if vocab_type == 'normal': src_vocab = dataset.VocabNormal() src_vocab.load(model_dir + 'src_vocab.normal.pkl') src_vocab.set_reverse_vocab() trg_vocab = dataset.VocabNormal() trg_vocab.load(model_dir + 'trg_vocab.normal.pkl') trg_vocab.set_reverse_vocab() sos = np.array([src_vocab.vocab['<s>']], dtype=np.int32) eos = np.array([src_vocab.vocab['</s>']], dtype=np.int32) elif vocab_type == 'subword': src_vocab = dataset.VocabSubword() src_vocab.load(model_dir + 'src_vocab.sub.model') trg_vocab = dataset.VocabSubword() trg_vocab.load(model_dir + 'trg_vocab.sub.model') sos = np.array([src_vocab.vocab.PieceToId('<s>')], dtype=np.int32) eos = np.array([src_vocab.vocab.PieceToId('</s>')], dtype=np.int32) src_vocab_size = len(src_vocab.vocab) trg_vocab_size = len(trg_vocab.vocab) logger.info('src_vocab size: {}, trg_vocab size: {}'.format( src_vocab_size, trg_vocab_size)) evaluater = evaluate.Evaluate(correct_txt_file) test_iter = dataset.Iterator(test_src_file, test_src_file, src_vocab, trg_vocab, batch_size, sort=False, shuffle=False, include_label=False) """MODEL""" model = Multi(src_vocab_size, trg_vocab_size, embed_size, hidden_size, class_size, dropout_ratio, coefficient) chainer.serializers.load_npz(model_file, model) """GPU""" if gpu_id >= 0: logger.info('Use GPU') chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() """TEST""" outputs = [] labels = [] for i, batch in enumerate(test_iter.generate(), start=1): batch = convert.convert(batch, gpu_id) output, label = model.predict(batch[0], sos, eos) for o, l in zip(output, label): outputs.append(trg_vocab.id2word(o)) labels.append(l) rank_list = evaluater.rank(labels) single = evaluater.single(rank_list) multiple = evaluater.multiple(rank_list) logger.info('single: {} | {}'.format(single[0], single[1])) logger.info('multi : {} | {}'.format(multiple[0], multiple[1])) with open(model_file + '.hypo', 'w') as f: [f.write(o + '\n') for o in outputs] with open(model_file + '.attn', 'w') as f: [f.write('{}\n'.format(l)) for l in labels]
x[i * 10 + j] = j y = [0] * 100 for i in range(10): for j in range(10): y[i * 10 + j] = i basex = [3, 7, 7] basey = [5, 3, 7] baseh = 10 h = 1.7 fc = 2900 Tx = 38.2 G = 10 htb = 10 hre = 1.7 Noise = -110 rsrpthre = -88 sinrthre = 50 coverthre = 0.7 nbaseallx = [3] nbaseally = [5] ncost = 300 ocost = 100 parameter1 = [ basex, basey, x, y, baseh, h, fc, Tx, G, htb, hre, Noise, rsrpthre, sinrthre, coverthre, nbaseallx, nbaseally, ncost, ocost ] Evalute = evaluate.Evaluate(parameter=parameter1) a = Evalute.Evaluate_main() print(a)
build_cknn_graphs.BuildKNNGraphs(fn_result, fn_result_reranking, fn_label, kNN, retri_amount, search_region, lam)) print 'Graph Build Step; Total Time is ', time.time() - T, 's' print '###################### Graph Fusion Step ##########################' T = time.time() retrieval_length = 60000 for i in range(retrieval_length): graph_list = [] for j in range(num_ranks): graph_list.append(graph_lists[j][i]) graph_list_copy = copy.deepcopy(graph_list) weight, graph = graphfusion.DEMFR(graph_list_copy, num_ranks, kNN, retri_amount) weights[graph[0]] = weight graphs[graph[0]] = graph vectexS.append(graph[0]) print 'Graph Fusion Step; Total Time is ', time.time() - T, 's' print '###################### Re-Rank Step ##########################' T = time.time() graphfusion.Expectation_Rank(vectexS, graphs, fn_fusion_result, kNN, retri_amount, weights) print 'Re-Rank Step; Each retrieval image time is ', ( time.time() - T) / result_length * 1000 / retri_amount, 'ms' print "After graphs fusion:" evaluate.Evaluate(fn_label, fn_fusion_result)
for line in fd_stdin: line = line.rstrip() line = line.split() fn_graph = data_directory + line[0] if numpy.mod(count, num_ranks) != 0: graph_list.append(cPickle.load(open(fn_graph, 'rb'))) count += 1 continue else: graph_list.append(cPickle.load(open(fn_graph, 'rb'))) count += 1 graph_list_copy = copy.deepcopy(graph_list) selected_images = graphfusion.Fusion_Density_Subgraph(graph_list_copy, num_ranks, retri_amount) # Uncomment the next line to use PageRank-based method, but keep the above line as well. We need "selected_images[0]" to define the center of the graph #selected_images = graphfusion.Fusion_Graph_Laplacian(graph_list, num_ranks, retri_amount, selected_images[0]) fd_stdin_fusion.write(line[0] + ' ') for img_id in selected_images: fd_stdin_fusion.write(str(img_id) + ' ') fd_stdin_fusion.write('\n') graph_list = [] fd_stdin_fusion.close() import evaluate print "After fusing VOC and HSV graphs:" evaluate.Evaluate(fn_label, fn_fusion_result, retri_amount-2)
def main(): args = parse_args() model_dir = args.model_dir """LOAD CONFIG FILE""" config_files = glob.glob(os.path.join(model_dir, '*.ini')) assert len(config_files) == 1, 'Put only one config file in the directory' config_file = config_files[0] config = configparser.ConfigParser() config.read(config_file) """LOGGER""" logger = getLogger(__name__) logger.setLevel(logging.INFO) formatter = logging.Formatter('[%(asctime)s] %(message)s') sh = logging.StreamHandler() sh.setLevel(logging.INFO) sh.setFormatter(formatter) logger.addHandler(sh) log_file = model_dir + 'log.txt' fh = logging.FileHandler(log_file) fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger.addHandler(fh) logger.info('[Training start] logging to {}'.format(log_file)) """PARAMATER""" embed_size = int(config['Parameter']['embed_size']) hidden_size = int(config['Parameter']['hidden_size']) class_size = int(config['Parameter']['class_size']) dropout_ratio = float(config['Parameter']['dropout']) weight_decay = float(config['Parameter']['weight_decay']) gradclip = float(config['Parameter']['gradclip']) vocab_type = config['Parameter']['vocab_type'] vocab_size = int(config['Parameter']['vocab_size']) coefficient = float(config['Parameter']['coefficient']) """TRINING DETAIL""" gpu_id = args.gpu n_epoch = args.epoch batch_size = args.batch interval = args.interval reg = False if args.type == 'l' or args.type == 's' else True """DATASET""" if args.type == 'l': section = 'Local' elif args.type == 'lr': section = 'Local_Reg' elif args.type == 's': section = 'Server' else: section = 'Server_Reg' train_src_file = config[section]['train_src_file'] train_trg_file = config[section]['train_trg_file'] valid_src_file = config[section]['valid_src_file'] valid_trg_file = config[section]['valid_trg_file'] test_src_file = config[section]['test_src_file'] correct_txt_file = config[section]['correct_txt_file'] train_data_size = dataset.data_size(train_src_file) valid_data_size = dataset.data_size(valid_src_file) logger.info('train size: {0}, valid size: {1}'.format(train_data_size, valid_data_size)) if vocab_type == 'normal': src_vocab = dataset.VocabNormal(reg) trg_vocab = dataset.VocabNormal(reg) if os.path.isfile(model_dir + 'src_vocab.normal.pkl') and os.path.isfile(model_dir + 'trg_vocab.normal.pkl'): src_vocab.load(model_dir + 'src_vocab.normal.pkl') trg_vocab.load(model_dir + 'trg_vocab.normal.pkl') else: init_vocab = {'<pad>': 0, '<unk>': 1, '<s>': 2, '</s>': 3} src_vocab.build(train_src_file, True, init_vocab, vocab_size) trg_vocab.build(train_trg_file, False, init_vocab, vocab_size) dataset.save_pickle(model_dir + 'src_vocab.normal.pkl', src_vocab.vocab) dataset.save_pickle(model_dir + 'trg_vocab.normal.pkl', trg_vocab.vocab) src_vocab.set_reverse_vocab() trg_vocab.set_reverse_vocab() sos = convert.convert_list(np.array([src_vocab.vocab['<s>']], dtype=np.int32), gpu_id) eos = convert.convert_list(np.array([src_vocab.vocab['</s>']], dtype=np.int32), gpu_id) elif vocab_type == 'subword': src_vocab = dataset.VocabSubword() trg_vocab = dataset.VocabSubword() if os.path.isfile(model_dir + 'src_vocab.sub.model') and os.path.isfile(model_dir + 'trg_vocab.sub.model'): src_vocab.load(model_dir + 'src_vocab.sub.model') trg_vocab.load(model_dir + 'trg_vocab.sub.model') else: src_vocab.build(train_src_file, model_dir + 'src_vocab.sub', vocab_size) trg_vocab.build(train_trg_file, model_dir + 'trg_vocab.sub', vocab_size) sos = convert.convert_list(np.array([src_vocab.vocab.PieceToId('<s>')], dtype=np.int32), gpu_id) eos = convert.convert_list(np.array([src_vocab.vocab.PieceToId('</s>')], dtype=np.int32), gpu_id) src_vocab_size = len(src_vocab.vocab) trg_vocab_size = len(trg_vocab.vocab) logger.info('src_vocab size: {}, trg_vocab size: {}'.format(src_vocab_size, trg_vocab_size)) train_iter = dataset.Iterator(train_src_file, train_trg_file, src_vocab, trg_vocab, batch_size, sort=True, shuffle=True, reg=reg) # train_iter = dataset.Iterator(train_src_file, train_trg_file, src_vocab, trg_vocab, batch_size, sort=False, shuffle=False, reg=reg) valid_iter = dataset.Iterator(valid_src_file, valid_trg_file, src_vocab, trg_vocab, batch_size, sort=False, shuffle=False, reg=reg) evaluater = evaluate.Evaluate(correct_txt_file) test_iter = dataset.Iterator(test_src_file, test_src_file, src_vocab, trg_vocab, batch_size, sort=False, shuffle=False) """MODEL""" if reg: class_size = 1 model = MultiReg(src_vocab_size, trg_vocab_size, embed_size, hidden_size, class_size, dropout_ratio, coefficient) else: model = Multi(src_vocab_size, trg_vocab_size, embed_size, hidden_size, class_size, dropout_ratio, coefficient) """OPTIMIZER""" optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(gradclip)) optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) """GPU""" if gpu_id >= 0: logger.info('Use GPU') chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() """TRAIN""" sum_loss = 0 loss_dic = {} result = [] for epoch in range(1, n_epoch + 1): for i, batch in enumerate(train_iter.generate(), start=1): try: batch = convert.convert(batch, gpu_id) loss = optimizer.target(*batch) sum_loss += loss.data optimizer.target.cleargrads() loss.backward() optimizer.update() if i % interval == 0: logger.info('E{} ## iteration:{}, loss:{}'.format(epoch, i, sum_loss)) sum_loss = 0 except Exception as e: logger.info(traceback.format_exc()) logger.info('iteration: {}'.format(i)) for b in batch[0]: for bb in b: logger.info(src_vocab.id2word(bb)) chainer.serializers.save_npz(model_dir + 'model_epoch_{}.npz'.format(epoch), model) """EVALUATE""" valid_loss = 0 for batch in valid_iter.generate(): batch = convert.convert(batch, gpu_id) with chainer.no_backprop_mode(), chainer.using_config('train', False): valid_loss += optimizer.target(*batch).data logger.info('E{} ## val loss:{}'.format(epoch, valid_loss)) loss_dic[epoch] = valid_loss """TEST""" outputs = [] labels = [] for i, batch in enumerate(test_iter.generate(), start=1): batch = convert.convert(batch, gpu_id) with chainer.no_backprop_mode(), chainer.using_config('train', False): output, label = model.predict(batch[0], sos, eos) # for o, l in zip(output, label): # o = chainer.cuda.to_cpu(o) # outputs.append(trg_vocab.id2word(o)) # labels.append(l) for l in label: labels.append(l) rank_list = evaluater.rank(labels) s_rate, s_count = evaluater.single(rank_list) m_rate, m_count = evaluater.multiple(rank_list) logger.info('E{} ## s: {} | {}'.format(epoch, ' '.join(x for x in s_rate), ' '.join(x for x in s_count))) logger.info('E{} ## m: {} | {}'.format(epoch, ' '.join(x for x in m_rate), ' '.join(x for x in m_count))) # with open(model_dir + 'model_epoch_{}.hypo'.format(epoch), 'w')as f: # [f.write(o + '\n') for o in outputs] with open(model_dir + 'model_epoch_{}.attn'.format(epoch), 'w')as f: [f.write('{}\n'.format(l)) for l in labels] result.append('{},{},{},{}'.format(epoch, valid_loss, s_rate[-1], m_rate[-1])) """MODEL SAVE""" best_epoch = min(loss_dic, key=(lambda x: loss_dic[x])) logger.info('best_epoch:{0}'.format(best_epoch)) chainer.serializers.save_npz(model_dir + 'best_model.npz', model) with open(model_dir + 'result.csv', 'w')as f: f.write('epoch,valid_loss,single,multiple\n') [f.write(r + '\n') for r in result]
def eval(self): ev = evaluate.Evaluate(self.results['labels']) ev.score() print util.list_as_dec_str(ev.match_at_k) print util.list_as_dec_str(ev.mrr_at_k)