def main(): parser = argparse.ArgumentParser(description='Add features to n-best list') parser.add_argument('nbest', help='n-best file') parser.add_argument('align', help='alignments file') parser.add_argument('source', help='source file') parser.add_argument('config', help='config file') parser.add_argument('-features_only', type=bool, default=False, help='print features only') args = parser.parse_args() config = __import__(args.config) nbests = groupby(read_nbest(args.nbest, args.align), key=lambda x: x[0]) for (_, group), source in izip(nbests, read_source(args.source)): for sid, target, alignments, init_features in group: features = add_features(extract_features(source, target, alignments, config), init_features) if args.features_only: for f in features.split(' '): print f.split('=')[0].encode('utf8') else: print(u'{} ||| {} ||| {}'.format(sid, target, features).encode('utf8'))
def main(): logging.basicConfig(level=logging.INFO, format='%(message)s') parser = argparse.ArgumentParser(description='Re-rank n-best lists') parser.add_argument('weights', help='weight file to use for ranking') args = parser.parse_args() with open(args.weights) as f: weights = pickle.load(f) logging.info('Reranking with %d weights', len(weights)) for sentence_id, group in groupby(read_nbest('/dev/stdin'), key=lambda t:t[0]): sentence_id, sentence, alignments, features = max(group, key=select_best(weights)) print(sentence.encode('utf8')) logging.info('Observed %d features in test set', len(all_observed_features)) all_weights = set(weights.iterkeys()) if all_weights != all_observed_features: diff = (all_weights - all_observed_features) | (all_observed_features - all_weights) if len(diff) < 20: logging.info('Missing features: %s', ' '.join(diff))