def augment(features, source_path, input_nbest_path, output_nbest_path): ''' Function to augment the n-best list with a feature function :param feature: The feature function object :param source_path: Path to the original source sentences (maybe required for the feature function) :param input_nbest_path: Path to the n-best file :param output_nbest_path: Path to the output n-best file ''' # Initialize NBestList objects logger.info('Initializing Nbest lists') input_nbest = NBestList(input_nbest_path, mode='r') output_nbest = NBestList(output_nbest_path, mode='w') # Load the source sentences logger.info('Loading source sentences') src_sents = codecs.open(source_path, mode='r', encoding='UTF-8') # For each of the item in the n-best list, append the feature sent_count = 0 for group, src_sent in zip(input_nbest, src_sents): candidate_count = 0 for item in group: for feature in features: item.append_feature( feature.name, feature.get_score(src_sent, item.hyp, (sent_count, candidate_count))) output_nbest.write(item) candidate_count += 1 sent_count += 1 if (sent_count % 100 == 0): logger.info('Augmented ' + L.b_yellow(str(sent_count)) + ' sentences.') output_nbest.close()
counter = 0 for group in input_aug_nbest: index = 0 scores = dict() for item in group: features = np.asarray( [x for x in item.features.split() if is_number(x)], dtype=float) try: scores[index] = np.dot(features, weights) except ValueError: logger.error( 'Number of features in the nbest and the weights file are not the same' ) index += 1 sorted_indices = sorted(scores, key=scores.get, reverse=True) for idx in sorted_indices: output_nbest.write(group[idx]) output_1best.write(group[sorted_indices[0]].hyp + "\n") counter += 1 if counter % 100 == 0: logger.info(L.b_yellow(str(counter)) + " groups processed") logger.info("%i groups processed" % (counter)) logger.info("Finished processing %i groups" % (counter)) logger.info(L.green('Reranking completed.')) output_nbest.close() output_1best.close() if args.clean_up: os.remove(output_nbest_path)
parser.add_argument("-s", "--predictable-seed", dest="pred_seed", action='store_true', help="Tune with predictable seed to avoid randomness") parser.add_argument("--moses-dir", dest="moses_dir", required=True, help="Path to Moses. Required for tuning scripts") args = parser.parse_args() fscore_arg = "" if args.metric == 'm2': fscore_arg = " --sctype M2SCORER --scconfig ignore_whitespace_casing:true " logger.info("Using M2 Tuning") logger.info(L.b_yellow('Arguments: ') + fscore_arg) if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) L.set_logger(os.path.abspath(args.out_dir), 'train_log.txt') L.print_args(args) logger.info("Reading weights from config file") features = configreader.parse_ini(args.input_config) logger.info("Feature weights: " + str(features)) output_nbest_path = args.out_dir + '/augmented.nbest' shutil.copy(args.input_nbest, output_nbest_path) logger.info('Extracting stats and features')