def test_make_copy_optional_arg(): tc = TypeContext() ft = AInixType(tc, "ft") bt = AInixType(tc, "bt") arg1 = AInixArgument(tc, "arg1", "bt", required=False, parent_object_name="fo") fo = AInixObject( tc, "fo", "ft", [arg1], preferred_object_parser_name=create_object_parser_from_grammar( tc, "masfoo_parser", '"foo" arg1?').name) bo = AInixObject( tc, "bo", "bt", None, preferred_object_parser_name=create_object_parser_from_grammar( tc, "masdfo_parser", '"bar"').name) tc.finalize_data() parser = StringParser(tc) unparser = AstUnparser(tc) ast = parser.create_parse_tree("foobar", "ft") tokenizer = SpaceTokenizer() in_str = "Hello bar sdf cow" tokens, metadata = tokenizer.tokenize(in_str) unpar_res = unparser.to_string(ast) assert unpar_res.total_string == "foobar" result = make_copy_version_of_tree(ast, unparser, metadata) assert result.next_node_not_copy.get_choice_node_for_arg( "arg1").copy_was_chosen
def fac(): type_context = TypeContext() for tf in type_files: loader.load_path(tf, type_context) type_context.finalize_data() index = ainix_kernel.indexing.exampleindex.ExamplesIndex(type_context) for ef in example_files: exampleloader.load_path(ef, index) return index
def get_toy_strings_context() -> TypeContext: context = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", context, up_search_limit=4) loader.load_path(f"{LOAD_PATH_ROOT}/twostr.ainix.yaml", context, up_search_limit=4) context.finalize_data() return context
def numbers_type_context(): type_context = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", type_context, up_search_limit=3) loader.load_path(f"builtin_types/numbers.ainix.yaml", type_context, up_search_limit=3) type_context.finalize_data() return type_context
def tc(): context = TypeContext() loader.load_path("builtin_types/paths.ainix.yaml", context, up_search_limit=2) loader.load_path("builtin_types/generic_parsers.ainix.yaml", context, up_search_limit=2) generic_strings.create_generic_strings(context) context.finalize_data() return context
def get_a_tc() -> TypeContext: type_context = TypeContext() loader = TypeContextDataLoader(type_context, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(type_context) for f in ALL_EXAMPLE_NAMES: loader.load_path(f"builtin_types/{f}.ainix.yaml") type_context.finalize_data() return type_context
def create_from_save_state_dict( cls, save_state: dict, new_type_context: TypeContext ) -> 'TypeContextWrapperVocab': instance = cls.__new__(cls) instance.itos = np.array([ new_type_context.get_type_by_name(name) if is_type else new_type_context.get_object_by_name(name) for is_type, name in save_state['itos'] ]) instance._finish_init() return instance
def test_partial_copy_numbers(): tc = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", tc, up_search_limit=3) loader.load_path(f"builtin_types/numbers.ainix.yaml", tc, up_search_limit=3) tc.finalize_data() parser = StringParser(tc) tokenizer = NonLetterTokenizer() unparser = AstUnparser(tc, tokenizer) ast = parser.create_parse_tree("1000", "Number")
def toy_context() -> TypeContext: tc = TypeContext() AInixType(tc, "Foo") AInixObject(tc, "oFoo1", "Foo") AInixObject(tc, "oFoo2", "Foo") AInixObject(tc, "oFoo3", "Foo") AInixObject(tc, "oFoo4", "Foo") AInixType(tc, "Bar") AInixObject(tc, "oBar1", "Bar") AInixObject(tc, "oBar2", "Bar") AInixType(tc, "Baz") AInixObject(tc, "oBaz1", "Baz") tc.finalize_data() return tc
def build_types() -> Tuple[TypeImplTensorMap, TypeContext]: tc = TypeContext() AInixType(tc, "Foo") AInixObject(tc, "oFoo1", "Foo") AInixObject(tc, "oFoo2", "Foo") AInixObject(tc, "oFoo3", "Foo") AInixObject(tc, "oFoo4", "Foo") AInixType(tc, "Zar") AInixObject(tc, "oZar1", "Zar") AInixObject(tc, "oZar2", "Zar") AInixType(tc, "Zaz") AInixObject(tc, "oZaz1", "Zaz") tc.finalize_data() return TypeImplTensorMap(tc), tc
def _get_root_parser( type_context: typecontext.TypeContext, type_name: str, parser_name: Optional[str] ) -> ainix_common.parsing.parse_primitives.TypeParser: """The public interface accepts a type name to parse as the root AST node for the string. This method converts that type name into an actuall parser instance.""" if parser_name: return type_context.get_type_parser_by_name(parser_name) else: type_instance = type_context.get_type_by_name(type_name) if type_instance.default_type_parser is None: raise ValueError( f"No default type parser available for {type_instance}") return type_instance.default_type_parser
def test_objectnode_copy_simple(): """Copy with no children""" tc = TypeContext() AInixType(tc, "footype") foo_object = AInixObject(tc, "foo_object", "footype") instance = ObjectNode(foo_object) # Unfrozen clone, path = instance.path_clone() assert id(clone) != id(instance) assert clone.implementation == foo_object assert instance == clone assert path is None # Frozen instance.freeze() clone, path = instance.path_clone() assert id(clone) == id(instance) assert clone == instance assert path is None assert clone.is_frozen # Frozen but on unfreeze path clone, path = instance.path_clone([instance]) assert id(clone) != id(instance) assert instance == clone assert clone.implementation == foo_object assert not clone.is_frozen
def test_pointer_change_here(freeze_first): """Test an arg change""" # Establish types tc = TypeContext() AInixType(tc, "footype") bartype = AInixType(tc, "bartype") arg1 = AInixArgument(tc, "arg1", "bartype", required=True) foo_object = AInixObject(tc, "foo_object", "footype", [arg1]) bar_object = AInixObject(tc, "bar_object", "bartype") other_bar_obj = AInixObject(tc, "other_bar_ob", "bartype") # Make an ast arg_choice = ObjectChoiceNode(bartype) ob_chosen = ObjectNode(bar_object) arg_choice.set_choice(ob_chosen) instance = ObjectNode(foo_object) instance.set_arg_value("arg1", arg_choice) if freeze_first: instance.freeze() # Try change deepest = list(instance.depth_first_iter())[-1] assert deepest.cur_node == ob_chosen new_node = ObjectNode(other_bar_obj) new_point = deepest.change_here(new_node) assert new_point.cur_node == new_node assert new_point.cur_node.is_frozen == freeze_first
def test_objectnode_copy_simple_with_arg(): """Copy with an arg""" # Establish types tc = TypeContext() AInixType(tc, "footype") bartype = AInixType(tc, "bartype") arg1 = AInixArgument(tc, "arg1", "bartype", required=True) foo_object = AInixObject(tc, "foo_object", "footype", [arg1]) bar_object = AInixObject(tc, "bar_object", "bartype") # Make an ast arg_choice = ObjectChoiceNode(bartype) ob_chosen = ObjectNode(bar_object) arg_choice.set_choice(ob_chosen) instance = ObjectNode(foo_object) instance.set_arg_value("arg1", arg_choice) # Do the tests: # Unfrozen clone, leaf = instance.path_clone() assert id(clone) != id(instance) assert leaf is None # with a path clone, leaf = instance.path_clone([instance]) assert id(clone) != id(instance) assert leaf.cur_node == clone assert leaf.parent is None # with a deep path clone, leaf = instance.path_clone([instance, arg_choice]) assert id(clone) != id(instance) assert leaf.cur_node.choice is None # with a deeper path clone, leaf = instance.path_clone([instance, arg_choice, ob_chosen]) assert id(clone) != id(instance) assert clone == instance assert leaf.cur_node == ob_chosen assert leaf.parent.cur_node.choice == ob_chosen
def test_max_munch(): tc = TypeContext() loader.load_path("builtin_types/generic_parsers.ainix.yaml", tc, up_search_limit=3) foo_type = "MMTestType" AInixType(tc, foo_type, default_type_parser_name="max_munch_type_parser") def make_mock_with_parse_rep(representation: str): loader._load_object( { "name": representation, "type": foo_type, "preferred_object_parser": { "grammar": f'"{representation}"' } }, tc, "foopathsdf") assert tc.get_object_by_name( representation).preferred_object_parser_name is not None objects = [ make_mock_with_parse_rep(rep) for rep in ("fo", "bar", "f", "foo", "foot", 'baz') ] parser = StringParser(tc) ast = parser.create_parse_tree("foobar", foo_type, allow_partial_consume=True) assert ast.next_node_not_copy.implementation.name == "foo"
def test_parse_set_optional(): """Manually build up an ast, and make sure the ast set is representing optional args correctly.""" tc = TypeContext() foo_type = AInixType(tc, "FooType") test_arg = AInixArgument(tc, "test_arg", "FooType", parent_object_name="foo_ob") foo_ob = AInixObject(tc, "foo_ob", "FooType", [test_arg]) ast_set = AstObjectChoiceSet(foo_type, None) ast = ObjectChoiceNode(foo_type) next_o = ObjectNode(foo_ob) ast.set_choice(next_o) arg_choice_node = ObjectChoiceNode(test_arg.present_choice_type) arg_choice_node.set_choice(ObjectNode(test_arg.not_present_object)) next_o.set_arg_value("test_arg", arg_choice_node) ast.freeze() ast_set.add(ast, True, 1, 1) data = ast_set._impl_name_to_data["foo_ob"].next_node.get_arg_set_data( next_o.as_childless_node()) assert data is not None assert data.arg_to_choice_set["test_arg"].type_to_choose_name == \ test_arg.present_choice_type.name
def test_objectnode_copy_with_child(): """Copy with an arg""" # Establish types tc = TypeContext() AInixType(tc, "footype") bartype = AInixType(tc, "bartype") arg1 = AInixArgument(tc, "arg1", "bartype", parent_object_name="foo_object") foo_object = AInixObject(tc, "foo_object", "footype", [arg1]) bar_object = AInixObject(tc, "bar_object", "bartype") # Make an ast fin_choice = ObjectNode(bar_object) is_pres = ObjectChoiceNode(bartype) is_pres.set_choice(fin_choice) arg_node = ObjectNode(arg1.is_present_object) arg_node.set_arg_value(OPTIONAL_ARGUMENT_NEXT_ARG_NAME, is_pres) is_pres_top = ObjectChoiceNode(arg1.present_choice_type) is_pres_top.set_choice(arg_node) instance = ObjectNode(foo_object) instance.set_arg_value("arg1", is_pres_top) # Do the tests: # Unfrozen clone, leaf = instance.path_clone() assert id(clone) != id(instance) assert leaf is None # Freeze part is_pres_top.freeze() clone, leaf = instance.path_clone() assert id(clone) != id(instance) assert not clone.is_frozen assert clone == instance assert id(clone.get_choice_node_for_arg("arg1")) == id(is_pres_top) # Freeze all instance.freeze() clone, leaf = instance.path_clone() assert id(clone) == id(instance) assert clone == instance assert id(clone.get_choice_node_for_arg("arg1")) == id(is_pres_top) # Full unfreeze path clone, leaf = instance.path_clone( [instance, is_pres_top, arg_node, is_pres, fin_choice]) assert id(clone) != id(instance) assert not clone.is_frozen assert clone == instance assert leaf.get_nodes_to_here() == [ instance, is_pres_top, arg_node, is_pres, fin_choice ] # Partial unfreeze path (stop early) clone, leaf = instance.path_clone([instance, is_pres_top, arg_node]) assert id(clone) != id(instance) assert not clone.is_frozen assert clone != instance path = leaf.get_nodes_to_here() assert len(path) == 3 new_arg_node: ObjectNode = leaf.cur_node assert new_arg_node.get_choice_node_for_arg( OPTIONAL_ARGUMENT_NEXT_ARG_NAME) is None
def get_default_nonretrieval_decoder(type_context: TypeContext, rnn_hidden_size: int) -> TreeDecoder: object_vectorizer = vectorizers.TorchDeepEmbed( type_context.get_object_count(), rnn_hidden_size) ast_embed_size = int(rnn_hidden_size / 2) type_vectorizer = vectorizers.TorchDeepEmbed(type_context.get_type_count(), ast_embed_size) rnn_cell = TreeRNNCellLSTM(ast_embed_size, rnn_hidden_size) #rnn_cell = TreeCellOnlyAttn(rnn_hidden_size, rnn_hidden_size) #rnn_cell = TreeRNNCellGRU(rnn_hidden_size, rnn_hidden_size) action_selector = SimpleActionSelector( rnn_cell.output_size, objectselector.get_default_object_selector(type_context, object_vectorizer), type_context) return TreeRNNDecoder(rnn_cell, action_selector, type_vectorizer, type_context)
def __init__(self, type_context: TypeContext): super().__init__() self.lookup = torch.zeros(type_context.get_object_count(), 3) self.lookup[0] = torch.Tensor([1, 1, 1]) self.lookup[1] = torch.Tensor([-1, -1, -1]) self.lookup[2] = torch.Tensor([3, 3, 3]) self.lookup[3] = torch.Tensor([10, 0, 1]) self.lookup[6] = torch.Tensor([3, 5, 1])
def test_file_replacer(): replacements = _load_replacer_relative( "../../../training/augmenting/data/FILENAME.tsv") tc = TypeContext() loader = TypeContextDataLoader(tc, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(tc) tc.finalize_data() parser = StringParser(tc) unparser = AstUnparser(tc) for repl in replacements: x, y = repl.get_replacement() assert x == y ast = parser.create_parse_tree(x, "Path") result = unparser.to_string(ast) assert result.total_string == x
def get_default_retrieval_decoder(type_context: TypeContext, rnn_hidden_size: int, examples: ExamplesStore, replacer: Replacer, parser: StringParser, unparser: AstUnparser) -> TreeDecoder: type_vectorizer = vectorizers.TorchDeepEmbed(type_context.get_type_count(), rnn_hidden_size) rnn_cell = TreeRNNCell(rnn_hidden_size, rnn_hidden_size) latent_store = make_latent_store_from_examples(examples, rnn_hidden_size, replacer, parser, unparser) action_selector = RetrievalActionSelector(latent_store, type_context, 0.25) return TreeRNNDecoder(rnn_cell, action_selector, type_vectorizer, type_context)
def __init__( self, rnn_cell: TreeRNNCell, action_selector: ActionSelector, type_vectorizer: VectorizerBase, type_context: TypeContext #, #bce_pos_weight=1.0 ): super().__init__() self.rnn_cell = rnn_cell self.action_selector = action_selector self.type_vectorizer = type_vectorizer self.type_context = type_context self.object_embeddings = nn.Embedding(type_context.get_object_count(), rnn_cell.hidden_size)
def get_examples(split_proportions: SPLIT_PROPORTIONS_TYPE = DEFAULT_SPLITS, randomize_seed: bool = False): type_context = TypeContext() loader = TypeContextDataLoader(type_context, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(type_context) for f in ALL_EXAMPLE_NAMES: loader.load_path(f"builtin_types/{f}.ainix.yaml") type_context.finalize_data() split_seed = None if not randomize_seed else random.randint(1, 1e8) index = load_all_examples(type_context, split_proportions, split_seed) #index = load_tellina_examples(type_context) #index = load_all_and_tellina(type_context) #print("num docs", index.get_num_x_values()) #print("num train", len(list(index.get_all_x_values((DataSplits.TRAIN, ))))) replacers = get_all_replacers() return type_context, index, replacers, loader
def _create_all_word_parts(tc: TypeContext, word_part_strs: List[Tuple[str, bool]]): symb_trie = pygtrie.CharTrie() for symb, allow_mod in word_part_strs: new_part = _create_word_part_obj(tc, symb, allow_mod) symb_trie[symb] = new_part if allow_mod: symb_trie[symb.upper()] = new_part first_letter_upper_version = symb[0].upper() + symb[1:] symb_trie[first_letter_upper_version] = new_part symb_trie[""] = tc.get_object_by_name(WORD_PART_TERMINAL_NAME) def word_parser_func(run: parse_primitives.TypeParserRun, string: str, result: parse_primitives.TypeParserResult): result.set_valid_implementation(symb_trie.longest_prefix(string).value) result.set_next_slice(0, len(string)) TypeParser(tc, WORD_PART_TYPE_PARSER_NAME, word_parser_func)
def test_objectnode_copy_with_2children(): """Copypasta of the above test, just with an extra arg thrown in""" # Establish types tc = TypeContext() AInixType(tc, "footype") bartype = AInixType(tc, "bartype") arg1 = AInixArgument(tc, "arg1", "bartype", parent_object_name="foo_obj") arg2 = AInixArgument(tc, "arg2", "bartype", parent_object_name="bar_obj") foo_object = AInixObject(tc, "foo_object", "footype", [arg1, arg2]) bar_object = AInixObject(tc, "bar_object", "bartype") # Make an ast fin_choice = ObjectNode(bar_object) is_pres = ObjectChoiceNode(bartype) is_pres.set_choice(fin_choice) arg_node = ObjectNode(arg1.is_present_object) arg_node.set_arg_value(OPTIONAL_ARGUMENT_NEXT_ARG_NAME, is_pres) is_pres_top = ObjectChoiceNode(arg1.present_choice_type) is_pres_top.set_choice(arg_node) instance = ObjectNode(foo_object) instance.set_arg_value("arg1", is_pres_top) fin_choice2 = ObjectNode(bar_object) is_pres2 = ObjectChoiceNode(bartype) is_pres2.set_choice(fin_choice2) arg_node2 = ObjectNode(arg2.is_present_object) arg_node2.set_arg_value(OPTIONAL_ARGUMENT_NEXT_ARG_NAME, is_pres2) is_pres_top2 = ObjectChoiceNode(arg2.present_choice_type) is_pres_top2.set_choice(arg_node2) instance.set_arg_value("arg2", is_pres_top2) # Do the tests: # Unfrozen clone, leaf_pointer = instance.path_clone() assert id(clone) != id(instance) assert clone == instance # Freeze part is_pres_top.freeze() clone, leaf_pointer = instance.path_clone() assert id(clone) != id(instance) assert not clone.is_frozen assert clone == instance assert id(clone.get_choice_node_for_arg("arg1")) == id(is_pres_top) assert id(clone.get_choice_node_for_arg("arg2")) != id(is_pres_top2) # Freeze all instance.freeze() clone, leaf_pointer = instance.path_clone() assert id(clone) == id(instance) assert clone == instance assert id(clone.get_choice_node_for_arg("arg1")) == id(is_pres_top) assert id(clone.get_choice_node_for_arg("arg2")) == id(is_pres_top2) # Full unfreeze path clone, leaf_pointer = instance.path_clone( [instance, is_pres_top, arg_node, is_pres, fin_choice]) assert id(clone) != id(instance) assert not clone.is_frozen assert clone == instance assert leaf_pointer.get_nodes_to_here() == \ [instance, is_pres_top, arg_node, is_pres, fin_choice] assert id(clone.get_choice_node_for_arg("arg2")) == id(is_pres_top2) assert clone.get_choice_node_for_arg("arg2").is_frozen # Partial unfreeze path (stop early) clone, leaf_pointer = instance.path_clone( [instance, is_pres_top, arg_node]) assert id(clone) != id(instance) assert not clone.is_frozen assert clone != instance assert len(leaf_pointer.get_nodes_to_here()) == 3 new_arg_node: ObjectNode = leaf_pointer.cur_node assert new_arg_node.get_choice_node_for_arg( OPTIONAL_ARGUMENT_NEXT_ARG_NAME) is None assert new_arg_node.get_choice_node_for_arg( OPTIONAL_ARGUMENT_NEXT_ARG_NAME) is None assert clone.get_choice_node_for_arg("arg2") == is_pres_top2 assert id(clone.get_choice_node_for_arg("arg2")) == id(is_pres_top2)
def test_multi_copy(): tc = TypeContext() loader.load_path(f"builtin_types/generic_parsers.ainix.yaml", tc, up_search_limit=3) ft = AInixType(tc, "ft") bt = AInixType(tc, "bt", default_type_parser_name="max_munch_type_parser") arg1 = AInixArgument(tc, "lhs", "bt", required=True, parent_object_name="fo") arg2 = AInixArgument(tc, "right", "bt", required=True, parent_object_name="sg") fo = AInixObject( tc, "fo", "ft", [arg1, arg2], preferred_object_parser_name=create_object_parser_from_grammar( tc, "mp", 'lhs right').name) bfoo = AInixObject( tc, "bfoo", "bt", None, preferred_object_parser_name=create_object_parser_from_grammar( tc, "masdfo_parser", '"foo"').name) bbar = AInixObject( tc, "bbar", "bt", None, preferred_object_parser_name=create_object_parser_from_grammar( tc, "mdf", '"bar"').name) tc.finalize_data() parser = StringParser(tc) unparser = AstUnparser(tc) ast = parser.create_parse_tree("foofoo", "ft") tokenizer = SpaceTokenizer() in_str = "Hello foo" tokens, metadata = tokenizer.tokenize(in_str) unpar_res = unparser.to_string(ast) assert unpar_res.total_string == "foofoo" cset = AstObjectChoiceSet(ft) cset.add(ast, True, 1, 1) assert cset.is_node_known_valid(ast) add_copies_to_ast_set(ast, cset, unparser, metadata) copy_left = ObjectChoiceNode( ft, ObjectNode( fo, pmap({ "lhs": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)), "right": ObjectChoiceNode(bt, ObjectNode(bfoo, pmap())) }))) assert cset.is_node_known_valid(copy_left) copy_right = ObjectChoiceNode( ft, ObjectNode( fo, pmap({ "lhs": ObjectChoiceNode(bt, ObjectNode(bfoo, pmap())), "right": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)) }))) assert cset.is_node_known_valid(copy_right) copy_both = ObjectChoiceNode( ft, ObjectNode( fo, pmap({ "lhs": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)), "right": ObjectChoiceNode(bt, CopyNode(bt, 1, 1)) }))) assert cset.is_node_known_valid(copy_both)
def type_context(): context = TypeContext() import os dirname, filename = os.path.split(os.path.abspath(__file__)) loader.load_path(f"{dirname}/command.ainix.yaml", context) return context
if __name__ == "__main__": pretrained_checkpoint_path = "../../checkpoints/" \ "lmchkp_iter152k_200_2rnn_total3.29_ns0.47_lm2.82.pt" output_size = 200 (x_tokenizer, query_vocab), y_tokenizer = _get_default_tokenizers() base_enc = make_default_cookie_monster_base(query_vocab, output_size) model = PretrainPoweredQueryEncoder.create_with_pretrained_checkpoint( pretrained_checkpoint_path, x_tokenizer, query_vocab, output_size, freeze_base=True) model.eval() type_context = TypeContext() loader = TypeContextDataLoader(type_context, up_search_limit=4) loader.load_path("builtin_types/generic_parsers.ainix.yaml") loader.load_path("builtin_types/command.ainix.yaml") loader.load_path("builtin_types/paths.ainix.yaml") allspecials.load_all_special_types(type_context) for f in ALL_EXAMPLE_NAMES: loader.load_path(f"builtin_types/{f}.ainix.yaml") type_context.finalize_data() index = load_all_examples(type_context) #index = load_tellina_examples(type_context) print("num docs", index.backend.index.doc_count()) replacers = get_all_replacers()
def base_type_context(): type_context = TypeContext() loader.load_path(f"{BUILTIN_TYPES_PATH}/generic_parsers.ainix.yaml", type_context) return type_context
def __init__(self, type_context: TypeContext): self._type_to_impl_tensor = [None] * type_context.get_type_count() for typ in type_context.get_all_types(): self._type_to_impl_tensor[typ.ind] = \ torch.LongTensor([impl.ind for impl in type_context.get_implementations(typ)])