def test_make_copy_optional_arg(): tc = TypeContext() ft = AInixType(tc, "ft") bt = AInixType(tc, "bt") arg1 = AInixArgument(tc, "arg1", "bt", required=False, parent_object_name="fo") fo = AInixObject( tc, "fo", "ft", [arg1], preferred_object_parser_name=create_object_parser_from_grammar( tc, "masfoo_parser", '"foo" arg1?').name) bo = AInixObject( tc, "bo", "bt", None, preferred_object_parser_name=create_object_parser_from_grammar( tc, "masdfo_parser", '"bar"').name) tc.finalize_data() parser = StringParser(tc) unparser = AstUnparser(tc) ast = parser.create_parse_tree("foobar", "ft") tokenizer = SpaceTokenizer() in_str = "Hello bar sdf cow" tokens, metadata = tokenizer.tokenize(in_str) unpar_res = unparser.to_string(ast) assert unpar_res.total_string == "foobar" result = make_copy_version_of_tree(ast, unparser, metadata) assert result.next_node_not_copy.get_choice_node_for_arg( "arg1").copy_was_chosen
def fac(): type_context = TypeContext() for tf in type_files: loader.load_path(tf, type_context) type_context.finalize_data() index = ainix_kernel.indexing.exampleindex.ExamplesIndex(type_context) for ef in example_files: exampleloader.load_path(ef, index) return index
def numbers_type_context(): type_context = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", type_context, up_search_limit=3) loader.load_path(f"builtin_types/numbers.ainix.yaml", type_context, up_search_limit=3) type_context.finalize_data() return type_context
def get_toy_strings_context() -> TypeContext: context = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", context, up_search_limit=4) loader.load_path(f"{LOAD_PATH_ROOT}/twostr.ainix.yaml", context, up_search_limit=4) context.finalize_data() return context
def get_a_tc() -> TypeContext: type_context = TypeContext() loader = TypeContextDataLoader(type_context, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(type_context) for f in ALL_EXAMPLE_NAMES: loader.load_path(f"builtin_types/{f}.ainix.yaml") type_context.finalize_data() return type_context
def tc(): context = TypeContext() loader.load_path("builtin_types/paths.ainix.yaml", context, up_search_limit=2) loader.load_path("builtin_types/generic_parsers.ainix.yaml", context, up_search_limit=2) generic_strings.create_generic_strings(context) context.finalize_data() return context
def test_partial_copy_numbers(): tc = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", tc, up_search_limit=3) loader.load_path(f"builtin_types/numbers.ainix.yaml", tc, up_search_limit=3) tc.finalize_data() parser = StringParser(tc) tokenizer = NonLetterTokenizer() unparser = AstUnparser(tc, tokenizer) ast = parser.create_parse_tree("1000", "Number")
def toy_context() -> TypeContext: tc = TypeContext() AInixType(tc, "Foo") AInixObject(tc, "oFoo1", "Foo") AInixObject(tc, "oFoo2", "Foo") AInixObject(tc, "oFoo3", "Foo") AInixObject(tc, "oFoo4", "Foo") AInixType(tc, "Bar") AInixObject(tc, "oBar1", "Bar") AInixObject(tc, "oBar2", "Bar") AInixType(tc, "Baz") AInixObject(tc, "oBaz1", "Baz") tc.finalize_data() return tc
def build_types() -> Tuple[TypeImplTensorMap, TypeContext]: tc = TypeContext() AInixType(tc, "Foo") AInixObject(tc, "oFoo1", "Foo") AInixObject(tc, "oFoo2", "Foo") AInixObject(tc, "oFoo3", "Foo") AInixObject(tc, "oFoo4", "Foo") AInixType(tc, "Zar") AInixObject(tc, "oZar1", "Zar") AInixObject(tc, "oZar2", "Zar") AInixType(tc, "Zaz") AInixObject(tc, "oZaz1", "Zaz") tc.finalize_data() return TypeImplTensorMap(tc), tc
def test_file_replacer(): replacements = _load_replacer_relative( "../../../training/augmenting/data/FILENAME.tsv") tc = TypeContext() loader = TypeContextDataLoader(tc, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(tc) tc.finalize_data() parser = StringParser(tc) unparser = AstUnparser(tc) for repl in replacements: x, y = repl.get_replacement() assert x == y ast = parser.create_parse_tree(x, "Path") result = unparser.to_string(ast) assert result.total_string == x
def get_examples(split_proportions: SPLIT_PROPORTIONS_TYPE = DEFAULT_SPLITS, randomize_seed: bool = False): type_context = TypeContext() loader = TypeContextDataLoader(type_context, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(type_context) for f in ALL_EXAMPLE_NAMES: loader.load_path(f"builtin_types/{f}.ainix.yaml") type_context.finalize_data() split_seed = None if not randomize_seed else random.randint(1, 1e8) index = load_all_examples(type_context, split_proportions, split_seed) #index = load_tellina_examples(type_context) #index = load_all_and_tellina(type_context) #print("num docs", index.get_num_x_values()) #print("num train", len(list(index.get_all_x_values((DataSplits.TRAIN, ))))) replacers = get_all_replacers() return type_context, index, replacers, loader
def test_multi_copy(): tc = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", tc, up_search_limit=3) ft = AInixType(tc, "ft") bt = AInixType(tc, "bt", default_type_parser_name="max_munch_type_parser") arg1 = AInixArgument(tc, "lhs", "bt", required=True, parent_object_name="fo") arg2 = AInixArgument(tc, "right", "bt", required=True, parent_object_name="sg") fo = AInixObject( tc, "fo", "ft", [arg1, arg2], preferred_object_parser_name=create_object_parser_from_grammar( tc, "mp", 'lhs right').name) bfoo = AInixObject( tc, "bfoo", "bt", None, preferred_object_parser_name=create_object_parser_from_grammar( tc, "masdfo_parser", '"foo"').name) bbar = AInixObject( tc, "bbar", "bt", None, preferred_object_parser_name=create_object_parser_from_grammar( tc, "mdf", '"bar"').name) tc.finalize_data() parser = StringParser(tc) unparser = AstUnparser(tc) ast = parser.create_parse_tree("foofoo", "ft") tokenizer = SpaceTokenizer() in_str = "Hello foo" tokens, metadata = tokenizer.tokenize(in_str) unpar_res = unparser.to_string(ast) assert unpar_res.total_string == "foofoo" cset = AstObjectChoiceSet(ft) cset.add(ast, True, 1, 1) assert cset.is_node_known_valid(ast) add_copies_to_ast_set(ast, cset, unparser, metadata) copy_left = ObjectChoiceNode( ft, ObjectNode( fo, pmap({ "lhs": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)), "right": ObjectChoiceNode(bt, ObjectNode(bfoo, pmap())) }))) assert cset.is_node_known_valid(copy_left) copy_right = ObjectChoiceNode( ft, ObjectNode( fo, pmap({ "lhs": ObjectChoiceNode(bt, ObjectNode(bfoo, pmap())), "right": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)) }))) assert cset.is_node_known_valid(copy_right) copy_both = ObjectChoiceNode( ft, ObjectNode( fo, pmap({ "lhs": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)), "right": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)) }))) assert cset.is_node_known_valid(copy_both)
x_tokenizer, query_vocab, output_size, freeze_base=True) model.eval() type_context = TypeContext() loader = TypeContextDataLoader(type_context, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(type_context) for f in ALL_EXAMPLE_NAMES: loader.load_path(f"builtin_types/{f}.ainix.yaml") type_context.finalize_data() index = load_all_examples(type_context) #index = load_tellina_examples(type_context) print("num docs", index.backend.index.doc_count()) replacers = get_all_replacers() train_splits = (DataSplits.TRAIN, ) all_ex_list = list(index.get_all_x_values(train_splits)) random.shuffle(all_ex_list) processed_x_raws = set() summaries = [] examples = [] for example in tqdm(all_ex_list): if example.xquery in processed_x_raws: