# coding=utf-8 from __future__ import print_function import json, sys from util import encoded_code_tokens_to_code from canonicalize import decanonicalize_code if __name__ == '__main__': seq2seq_output = sys.argv[1] dataset_path = sys.argv[2] code_output = sys.argv[3] code_list = [] dataset = json.load(open(dataset_path)) for line, example in zip(open(seq2seq_output), dataset): encoded_tokens = line.strip().split(' ') code = encoded_code_tokens_to_code(encoded_tokens) if 'slot_map' in example: code = decanonicalize_code(code, example['slot_map']) code_list.append(code) json.dump(code_list, open(code_output, 'w'), indent=2)
try: canonical_intent, slot_map = canonicalize_intent( rewritten_intent) snippet = snippet canonical_snippet = canonicalize_code(snippet, slot_map) intent_tokens = nltk.word_tokenize(canonical_intent) decanonical_snippet = decanonicalize_code( canonical_snippet, slot_map) snippet_reconstr = astor.to_source( ast.parse(snippet)).strip() decanonical_snippet_reconstr = astor.to_source( ast.parse(decanonical_snippet)).strip() encoded_reconstr_code = get_encoded_code_tokens( decanonical_snippet_reconstr) decoded_reconstr_code = encoded_code_tokens_to_code( encoded_reconstr_code) if not compare_ast(ast.parse(decoded_reconstr_code), ast.parse(snippet)): # tqdm.write(str(i)) # tqdm.write('Original Snippet: %s' % snippet_reconstr) # tqdm.write('Tokenized Snippet: %s' % ' '.join(encoded_reconstr_code)) # tqdm.write('decoded_reconstr_code: %s' % decoded_reconstr_code) not_same_count += 1 except Exception as e: failed_count += 1 finally: example['slot_map'] = slot_map if rewritten_intent is None: