float(tokens[1]) init_list += tokens[1:] except ValueError: pass if args.no_add_weight == False: init_list.append(args.init_value) dim = len(init_list) init_opt.write(' '.join(init_list) + '\n') init_opt.write(' '.join(['0' for i in range(dim)]) + '\n') init_opt.write(' '.join(['1' for i in range(dim)]) + '\n') seed_arg = '' if args.pred_seed: seed_arg = ' -r 1 ' #seed_arg = ' -r 1500 ' if (args.alg == 'mert'): logger.info('Running MERT') cmd = args.moses_dir + '/bin/mert -d ' + str( dim ) + ' -S ' + args.out_dir + '/statscore.data -F ' + args.out_dir + '/features.data --ifile ' + args.out_dir + '/init.opt --threads ' + str( args.threads) + seed_arg + fscore_arg # + "-m 50 -n 20" logger.info("Command: " + cmd) os.system(cmd) else: logger.error('Invalid tuning algorithm: ' + args.alg) logger.info(L.green("Optimization complete.")) assert os.path.isfile('weights.txt') shutil.move('weights.txt', args.out_dir + '/weights.txt')
counter = 0 for group in input_aug_nbest: index = 0 scores = dict() for item in group: features = np.asarray( [x for x in item.features.split() if is_number(x)], dtype=float) try: scores[index] = np.dot(features, weights) except ValueError: logger.error( 'Number of features in the nbest and the weights file are not the same' ) index += 1 sorted_indices = sorted(scores, key=scores.get, reverse=True) for idx in sorted_indices: output_nbest.write(group[idx]) output_1best.write(group[sorted_indices[0]].hyp + "\n") counter += 1 if counter % 100 == 0: logger.info(L.b_yellow(str(counter)) + " groups processed") logger.info("%i groups processed" % (counter)) logger.info("Finished processing %i groups" % (counter)) logger.info(L.green('Reranking completed.')) output_nbest.close() output_1best.close() if args.clean_up: os.remove(output_nbest_path)
required=True, help="Input n-best file") parser.add_argument("-o", "--output-nbest", dest="output_nbest_path", required=True, help="Output n-best file") parser.add_argument( "-f", "--feature", dest="feature_string", required=True, help= "feature initializer, e.g. LM('LM0','/path/to/lm_file', normalize=True)") args = parser.parse_args() L.set_logger(os.path.abspath(os.path.dirname(args.output_nbest_path)), 'augment_log.txt') L.print_args(args) print( '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' ) print('[' + args.feature_string + ']') print( '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' ) features = eval('[' + args.feature_string + ']') augment(features, args.source_path, args.input_nbest_path, args.output_nbest_path) logger.info(L.green('Augmenting done.'))