예제 #1
0
def preprocess_feature(obj):
    # preprocess sentence
    tokens = tokenizer.tokenize(obj['snt'])
    snt = ' '.join(tokens)
    # preprocess amr
    linear_amr = linearize(obj['doc'])
    return (snt, linear_amr, obj)
예제 #2
0
def lin_deline(amr_obj):
    amr = amr_obj['doc']
    amr = linearize(amr)
    amr = delinearize(amr)
    amr_obj['doc'] = amr
    return amr_obj
예제 #3
0
args = parser.parse_args()

if args.linearize:
    p = Pool(20)
    print('Linearize file: %s' % (args.input))
    filename = basename(args.input)
    directory = args.input[:-len(filename)]
    data = read_amr_format(args.input, return_dict=False)
    sentences = [x['snt'] for x in data]
    amrs = [x['doc'] for x in data]

    amrs_linearized = []
    for x in data:
        try:
            amrs_linearized.append(linearize(x['doc']))
        except:
            print('Error at linearizing: ' + x['id'])
    prefix = filename.split('.')[0]
    save(sentences, join(directory, '%s.snt' % (args.output)))
    save(amrs_linearized, join(directory, '%s.amr' % (args.output)))

elif args.delinearize:
    p = Pool(20)
    print('Delinearization file: ')
    filename = basename(args.input)
    directory = args.input[:-len(filename)]
    with io.open(args.input) as f:
        lines = f.readlines()
    amrs = p.map(delinearize, lines)
    data = {}