Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description='Add features to n-best list')
    parser.add_argument('nbest', help='n-best file')
    parser.add_argument('align', help='alignments file')
    parser.add_argument('source', help='source file')
    parser.add_argument('config', help='config file')
    parser.add_argument('-features_only', type=bool, default=False, help='print features only')
    args = parser.parse_args()

    config = __import__(args.config)

    nbests = groupby(read_nbest(args.nbest, args.align), key=lambda x: x[0])
    for (_, group), source in izip(nbests, read_source(args.source)):
        for sid, target, alignments, init_features in group:
            features = add_features(extract_features(source, target, alignments, config), init_features)
            if args.features_only:
                for f in features.split(' '):
                    print f.split('=')[0].encode('utf8')
            else:
                print(u'{} ||| {} ||| {}'.format(sid, target, features).encode('utf8'))
Ejemplo n.º 2
0
def main():
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    parser = argparse.ArgumentParser(description='Re-rank n-best lists')
    parser.add_argument('weights', help='weight file to use for ranking')
    args = parser.parse_args()

    with open(args.weights) as f:
        weights = pickle.load(f)

    logging.info('Reranking with %d weights', len(weights))

    for sentence_id, group in groupby(read_nbest('/dev/stdin'), key=lambda t:t[0]):
        sentence_id, sentence, alignments, features = max(group, key=select_best(weights))
        print(sentence.encode('utf8'))

    logging.info('Observed %d features in test set', len(all_observed_features))
    all_weights = set(weights.iterkeys())
    if all_weights != all_observed_features:
        diff = (all_weights - all_observed_features) | (all_observed_features - all_weights)
        if len(diff) < 20:
            logging.info('Missing features: %s', ' '.join(diff))