def preprocess_feature(obj): # preprocess sentence tokens = tokenizer.tokenize(obj['snt']) snt = ' '.join(tokens) # preprocess amr linear_amr = linearize(obj['doc']) return (snt, linear_amr, obj)
def lin_deline(amr_obj): amr = amr_obj['doc'] amr = linearize(amr) amr = delinearize(amr) amr_obj['doc'] = amr return amr_obj
args = parser.parse_args() if args.linearize: p = Pool(20) print('Linearize file: %s' % (args.input)) filename = basename(args.input) directory = args.input[:-len(filename)] data = read_amr_format(args.input, return_dict=False) sentences = [x['snt'] for x in data] amrs = [x['doc'] for x in data] amrs_linearized = [] for x in data: try: amrs_linearized.append(linearize(x['doc'])) except: print('Error at linearizing: ' + x['id']) prefix = filename.split('.')[0] save(sentences, join(directory, '%s.snt' % (args.output))) save(amrs_linearized, join(directory, '%s.amr' % (args.output))) elif args.delinearize: p = Pool(20) print('Delinearization file: ') filename = basename(args.input) directory = args.input[:-len(filename)] with io.open(args.input) as f: lines = f.readlines() amrs = p.map(delinearize, lines) data = {}