def canonicalize_example(query, code): from lang.py.parse import parse_raw, parse_tree_to_python_ast, canonicalize_code as make_it_compilable import astor, ast canonical_query, str_map = canonicalize_query(query) canonical_code = code for str_literal, str_repr in str_map.iteritems(): canonical_code = canonical_code.replace(str_literal, '\'' + str_repr + '\'') canonical_code = make_it_compilable(canonical_code) # sanity check parse_tree = parse_raw(canonical_code) gold_ast_tree = ast.parse(canonical_code).body[0] gold_source = astor.to_source(gold_ast_tree) ast_tree = parse_tree_to_python_ast(parse_tree) source = astor.to_source(ast_tree) assert gold_source == source, 'sanity check fails: gold=[%s], actual=[%s]' % ( gold_source, source) query_tokens = canonical_query.split(' ') return query_tokens, canonical_code, str_map
def canonicalize_code(code): from lang.py.parse import parse_raw, parse_tree_to_python_ast, canonicalize_code as make_it_compilable import astor, ast canonical_code = make_it_compilable(code) # sanity check parse_tree = parse_raw(canonical_code) gold_ast_tree = ast.parse(canonical_code).body[0] gold_source = astor.to_source(gold_ast_tree) ast_tree = parse_tree_to_python_ast(parse_tree) source = astor.to_source(ast_tree) assert gold_source == source, 'sanity check fails: gold=[%s], actual=[%s]' % (gold_source, source) return canonical_code
def canonicalize_example(query, code): from lang.py.parse import parse_raw, parse_tree_to_python_ast, canonicalize_code as make_it_compilable import astor, ast canonical_query, str_map = canonicalize_query(query) canonical_code = code for str_literal, str_repr in str_map.iteritems(): canonical_code = canonical_code.replace(str_literal, '\'' + str_repr + '\'') canonical_code = make_it_compilable(canonical_code) # sanity check parse_tree = parse_raw(canonical_code) gold_ast_tree = ast.parse(canonical_code).body[0] gold_source = astor.to_source(gold_ast_tree) ast_tree = parse_tree_to_python_ast(parse_tree) source = astor.to_source(ast_tree) assert gold_source == source, 'sanity check fails: gold=[%s], actual=[%s]' % (gold_source, source) query_tokens = canonical_query.split(' ') return query_tokens, canonical_code, str_map