Ejemplo n.º 1
0
def test_string_parse_e2e_sequence(type_context):
    twoargs = AInixObject(type_context,
                          "FooProgram",
                          "Program", [
                              AInixArgument(type_context,
                                            "a",
                                            None,
                                            arg_data={"short_name": "a"},
                                            parent_object_name="sdf"),
                              AInixArgument(type_context,
                                            "barg",
                                            None,
                                            arg_data={"short_name": "b"},
                                            parent_object_name="bw")
                          ],
                          type_data={"invoke_name": "hello"})
    parser = StringParser(type_context)
    unparser = AstUnparser(type_context)
    string = "hello -a | hello -b"
    ast = parser.create_parse_tree(string, "CommandSequence")
    to_string = unparser.to_string(ast)
    assert to_string.total_string == string

    no_space = "hello -a|hello -b"
    ast = parser.create_parse_tree(no_space, "CommandSequence")
    to_string = unparser.to_string(ast)
    assert to_string.total_string == string

    string = "hello -a | hello"
    ast = parser.create_parse_tree(string, "CommandSequence")
    to_string = unparser.to_string(ast)
    assert to_string.total_string == string
Ejemplo n.º 2
0
def test_string_parse_e2e_pos_unparse(type_context):
    fooType = AInixType(type_context, "FooType")
    fo = AInixObject(
        type_context,
        "fo",
        "FooType", [],
        preferred_object_parser_name=create_object_parser_from_grammar(
            type_context, "fooname", '"foo"').name)
    twoargs = AInixObject(type_context,
                          "FooProgram",
                          "Program", [
                              AInixArgument(type_context,
                                            "p1",
                                            "FooType",
                                            arg_data={
                                                POSITION: 0,
                                                MULTIWORD_POS_ARG: False
                                            },
                                            parent_object_name="bw",
                                            required=True)
                          ],
                          type_data={"invoke_name": "hello"})
    type_context.finalize_data()
    parser = StringParser(type_context)
    ast = parser.create_parse_tree("hello foo", "Program")
    unparser = AstUnparser(type_context)
    unparse_result = unparser.to_string(ast)
    assert unparse_result.total_string == "hello foo"
    for p in ast.depth_first_iter():
        n = p.cur_node
        if isinstance(n,
                      ObjectChoiceNode) and n.type_to_choose.name == "FooType":
            arg_node_pointer = p
            break
    assert unparse_result.pointer_to_string(arg_node_pointer) == "foo"
Ejemplo n.º 3
0
def test_string_parse_e2e_multiword2(type_context):
    fooType = AInixType(type_context, "FooType")
    fo = AInixObject(
        type_context,
        "fo",
        "FooType", [],
        preferred_object_parser_name=create_object_parser_from_grammar(
            type_context, "fooname", '"foo bar"').name)
    twoargs = AInixObject(type_context,
                          "FooProgram",
                          "Program", [
                              AInixArgument(type_context,
                                            "a",
                                            None,
                                            arg_data={"short_name": "a"},
                                            parent_object_name="sdf"),
                              AInixArgument(type_context,
                                            "p1",
                                            "FooType",
                                            arg_data={
                                                POSITION: 0,
                                                MULTIWORD_POS_ARG: True
                                            },
                                            parent_object_name="sdf")
                          ],
                          type_data={"invoke_name": "hello"})
    type_context.finalize_data()
    parser = StringParser(type_context)
    ast = parser.create_parse_tree("hello foo bar -a", "Program")
    unparser = AstUnparser(type_context)
    to_string = unparser.to_string(ast)
    assert to_string.total_string == "hello -a foo bar"
Ejemplo n.º 4
0
def test_string_parse_e2e_multiword3(type_context):
    fooType = AInixType(type_context, "FooType")
    fo = AInixObject(
        type_context,
        "fo",
        "FooType", [],
        preferred_object_parser_name=create_object_parser_from_grammar(
            type_context, "fooname", '"foo"').name)
    twoargs = AInixObject(type_context,
                          "FooProgram",
                          "Program", [
                              AInixArgument(type_context,
                                            "a",
                                            None,
                                            arg_data={"short_name": "a"},
                                            parent_object_name="sdf"),
                              AInixArgument(type_context,
                                            "barg",
                                            None,
                                            arg_data={"short_name": "b"},
                                            parent_object_name="bw"),
                              _make_positional()
                          ],
                          type_data={"invoke_name": "hello"})
    type_context.finalize_data()
    parser = StringParser(type_context)
    ast = parser.create_parse_tree("hello -a", "CommandSequence")
    unparser = AstUnparser(type_context)
    to_string = unparser.to_string(ast)
    assert to_string.total_string == "hello -a"
Ejemplo n.º 5
0
class Interface():
    def __init__(self, file_name):
        #self.type_context, self.model, self.example_store = restore(file_name)

        # hacks
        from ainix_kernel.training import fullret_try
        model, index, replacers, type_context, loader = fullret_try.train_the_thing()
        self.type_context, self.model, self.example_store = type_context, model, index

        self.unparser = AstUnparser(self.type_context, self.model.get_string_tokenizer())

    def predict(self, utterance: str, ytype: str) -> PredictReturn:
        try:
            result, metad = self.model.predict(utterance, ytype, False)
            assert result.is_frozen
            unparse = self.unparser.to_string(result, utterance)
            return PredictReturn(
                success=True,
                ast=result,
                unparse=unparse,
                metad=metad,
                error_message=None
            )
        except ModelException as e:
            return PredictReturn(
                False,
                None,
                None,
                None, str(e)
            )
Ejemplo n.º 6
0
def make_copy_version_of_tree(
        ast: ObjectChoiceNode, unparser: AstUnparser,
        token_metadata: StringTokensMetadata) -> ObjectChoiceNode:
    """Goes through and replaces anywhere can copy with a copy at earliest possible
    oppertunity. Makes a copy and does not mutate the original ast"""
    unparse = unparser.to_string(ast)
    cur_pointer = AstIterPointer(ast, None, None)
    last_pointer = None
    while cur_pointer:
        if isinstance(cur_pointer.cur_node, ObjectChoiceNode):
            this_node_str = unparse.pointer_to_string(cur_pointer)
            if this_node_str:
                copy_pos = string_in_tok_list(this_node_str, token_metadata)
                # Questionable hacky skip?
                # Need to avoid being overly generous with copying stuff
                other_no_copy_reason = this_node_str in STOP_WORDS or \
                                       this_node_str in ('"', ".", ",", "?")
                if copy_pos and not other_no_copy_reason:
                    copy_node = CopyNode(cur_pointer.cur_node.type_to_choose,
                                         copy_pos[0], copy_pos[1])
                    cur_pointer = cur_pointer.dfs_get_next().change_here(
                        copy_node, always_clone=True)
        last_pointer = cur_pointer
        cur_pointer = cur_pointer.dfs_get_next()
    return last_pointer.get_root().cur_node
Ejemplo n.º 7
0
 def __init__(self,
              prediction: ObjectChoiceNode,
              ground_truth: AstObjectChoiceSet,
              y_texts: Set[str],
              x_text: str,
              exception,
              unparser: AstUnparser,
              know_pred_str: str = None):
     self.data = {}
     self.prediction = prediction
     self.ground_truth = ground_truth
     self.y_texts = y_texts
     self.x_text = x_text
     self.p_exception = exception
     if self.prediction is not None:
         try:
             self.predicted_y = unparser.to_string(self.prediction,
                                                   self.x_text).total_string
         except RecursionError as e:
             self.predicted_y = f"UNPARSE RECURSION LIMIT HIT"
     else:
         self.predicted_y = f"EXCEPTION {str(self.p_exception)}"
     self.in_ast_set = self.ground_truth.is_node_known_valid(
         self.prediction)
     self.correct = self.in_ast_set or self.predicted_y in self.y_texts
     self.known_pred_str = know_pred_str
     if self.correct and not self.in_ast_set:
         warnings.warn(
             f"The prediction is not in ground truth but value "
             f"matches a y string. "
             f"Prediction text {self.predicted_y} actuals {self.y_texts}")
     self._fill_stats()
Ejemplo n.º 8
0
def test_not_fail_find_expr(all_the_stuff_context, string):
    tc = all_the_stuff_context
    parser = StringParser(tc)
    ast = parser.create_parse_tree(string, "FindExpression")
    unparser = AstUnparser(tc, NonLetterTokenizer())
    result = unparser.to_string(ast)
    assert result.total_string == string
Ejemplo n.º 9
0
def test_word_parts_2():
    tc = TypeContext()
    _create_root_types(tc)
    _create_all_word_parts(tc, [('foo', True), ("bar", True), ("fo", True),
                                ("!", False)])
    tc.finalize_data()
    parser = StringParser(tc)
    ast = parser.create_parse_tree("fooBarBaz",
                                   WORD_PART_TYPE_NAME,
                                   allow_partial_consume=True)
    word_part_o = ast.next_node_not_copy
    assert word_part_o.implementation.name == _name_for_word_part("foo")
    mod_type_choice = word_part_o.get_choice_node_for_arg(
        WORD_PART_MODIFIER_ARG_NAME)
    mod_type_object = mod_type_choice.next_node_not_copy
    assert mod_type_object.implementation.name == MODIFIER_LOWER_NAME
    next_type_choice = word_part_o.get_choice_node_for_arg(
        WORD_PART_NEXT_ARG_NAME)
    next_part_o = next_type_choice.next_node_not_copy
    assert next_part_o.implementation.name == _name_for_word_part("bar")
    ### Unparse
    unparser = AstUnparser(tc)
    result = unparser.to_string(ast)
    assert result.total_string == "fooBar"
    pointers = list(ast.depth_first_iter())
    assert get_str_and_assert_same_part(result, pointers[1],
                                        word_part_o) == "fooBar"
    assert get_str_and_assert_same_part(result, pointers[2],
                                        mod_type_choice) == "foo"
    assert get_str_and_assert_same_part(result, pointers[3],
                                        mod_type_object) == ""
    assert get_str_and_assert_same_part(result, pointers[4],
                                        next_type_choice) == "Bar"
    assert get_str_and_assert_same_part(result, pointers[5],
                                        next_part_o) == "Bar"
Ejemplo n.º 10
0
def test_word_parts_upper():
    tc = TypeContext()
    _create_root_types(tc)
    _create_all_word_parts(tc, [('foo', True), ("bar", True), ("fo", True),
                                ("!", False)])
    tc.finalize_data()
    parser = StringParser(tc)
    ast = parser.create_parse_tree("FOO", WORD_PART_TYPE_NAME)
    word_part_o = ast.next_node_not_copy
    assert word_part_o.implementation.name == _name_for_word_part("foo")
    mod_type_choice = word_part_o.get_choice_node_for_arg(
        WORD_PART_MODIFIER_ARG_NAME)
    mod_type_object = mod_type_choice.next_node_not_copy
    assert mod_type_object.implementation.name == MODIFIER_ALL_UPPER
    next_type_choice = word_part_o.get_choice_node_for_arg(
        WORD_PART_NEXT_ARG_NAME)
    next_part_o = next_type_choice.next_node_not_copy
    assert next_part_o.implementation.name == WORD_PART_TERMINAL_NAME
    ### Unparse
    unparser = AstUnparser(tc)
    result = unparser.to_string(ast)
    assert result.total_string == "FOO"
    pointers = list(ast.depth_first_iter())
    assert ast == pointers[0].cur_node
    assert result.pointer_to_string(pointers[0]) == "FOO"
    assert word_part_o == pointers[1].cur_node
    assert result.pointer_to_string(pointers[1]) == "FOO"
    assert mod_type_choice == pointers[2].cur_node
    assert result.pointer_to_string(pointers[2]) == "FOO"
    assert mod_type_object == pointers[3].cur_node
    assert result.pointer_to_string(pointers[3]) == ""
Ejemplo n.º 11
0
def test_touch_set(all_the_stuff_context):
    x_str = 'set the last mod time of out.txt to now'
    tc = all_the_stuff_context
    parser = StringParser(tc)
    string = "touch out.txt"
    ast = parser.create_parse_tree(string, "Program")
    unparser = AstUnparser(tc, NonLetterTokenizer())
    result = unparser.to_string(ast, x_str)
    assert result.total_string == string

    cset = AstObjectChoiceSet(tc.get_type_by_name("Program"))
    cset.add(ast, True, 1, 1)
    new_ast = parser.create_parse_tree(string, "Program")
    assert cset.is_node_known_valid(new_ast)

    tokenizer = NonLetterTokenizer()
    _, tok_metadata = tokenizer.tokenize(x_str)
    ast_copies = make_copy_version_of_tree(ast, unparser, tok_metadata)
    add_copies_to_ast_set(ast, cset, unparser, tok_metadata)
    assert cset.is_node_known_valid(ast_copies)
    assert cset.is_node_known_valid(ast)

    # Scary complicated reconstruction of something that broke it.
    # could be made into a simpler unit test in copy_tools
    touch_o = tc.get_object_by_name("touch")
    file_list = tc.get_type_by_name("PathList")
    r_arg = touch_o.get_arg_by_name("r")
    m_arg = touch_o.get_arg_by_name("m")
    other_copy = ObjectChoiceNode(
        tc.get_type_by_name("Program"),
        ObjectNode(
            touch_o,
            pmap({
                "r":
                ObjectChoiceNode(r_arg.present_choice_type,
                                 ObjectNode(r_arg.not_present_object, pmap())),
                "m":
                ObjectChoiceNode(m_arg.present_choice_type,
                                 ObjectNode(m_arg.not_present_object, pmap())),
                "file_list":
                ObjectChoiceNode(file_list, CopyNode(file_list, 12, 14))
            })))
    other_result = unparser.to_string(other_copy, x_str)
    assert other_result.total_string == string
    assert cset.is_node_known_valid(other_copy)
Ejemplo n.º 12
0
def test_cp(all_the_stuff_context, string):
    tc = all_the_stuff_context
    parser = StringParser(tc)
    ast = parser.create_parse_tree(string, "Program")
    unparser = AstUnparser(tc, NonLetterTokenizer())
    result = unparser.to_string(ast)
    assert result.total_string == string
    pointers = list(ast.depth_first_iter())
    assert result.pointer_to_string(pointers[0]) == string
Ejemplo n.º 13
0
def test_file_replacer():
    replacements = _load_replacer_relative(
        "../../../training/augmenting/data/FILENAME.tsv")
    tc = TypeContext()
    loader = TypeContextDataLoader(tc, up_search_limit=4)
    loader.load_path("builtin_types/generic_parsers.ainix.yaml")
    loader.load_path("builtin_types/command.ainix.yaml")
    loader.load_path("builtin_types/paths.ainix.yaml")
    allspecials.load_all_special_types(tc)
    tc.finalize_data()
    parser = StringParser(tc)
    unparser = AstUnparser(tc)

    for repl in replacements:
        x, y = repl.get_replacement()
        assert x == y
        ast = parser.create_parse_tree(x, "Path")
        result = unparser.to_string(ast)
        assert result.total_string == x
Ejemplo n.º 14
0
def test_generic_word():
    context = TypeContext()
    loader.load_path("builtin_types/generic_parsers.ainix.yaml",
                     context,
                     up_search_limit=4)
    generic_strings.create_generic_strings(context)
    context.finalize_data()
    parser = StringParser(context)
    ast = parser.create_parse_tree("a", WORD_TYPE_NAME)
    generic_word_ob = ast.next_node_not_copy
    assert generic_word_ob.implementation.name == WORD_OBJ_NAME
    parts_arg = generic_word_ob.get_choice_node_for_arg("parts")
    parts_v = parts_arg.next_node_not_copy
    assert parts_v.implementation.name == "word_part_a"
    mod_type_choice = parts_v.get_choice_node_for_arg(
        WORD_PART_MODIFIER_ARG_NAME)
    mod_type_object = mod_type_choice.next_node_not_copy
    assert mod_type_object.implementation.name == MODIFIER_LOWER_NAME
    unparser = AstUnparser(context)
    result = unparser.to_string(ast)
    assert result.total_string == "a"
Ejemplo n.º 15
0
def add_copies_to_ast_set(ast: ObjectChoiceNode,
                          ast_set: AstObjectChoiceSet,
                          unparser: AstUnparser,
                          token_metadata: StringTokensMetadata,
                          copy_node_weight: float = 1) -> None:
    """Takes in an AST that has been parsed and adds copynodes where appropriate
    to an AstSet that contains that AST"""
    unparse = unparser.to_string(ast)
    df_ast_pointers = list(ast.depth_first_iter())
    df_ast_nodes = [pointer.cur_node for pointer in ast.depth_first_iter()]
    df_ast_set = list(
        depth_first_iterate_ast_set_along_path(ast_set, df_ast_nodes))
    assert len(df_ast_nodes) == len(df_ast_set)
    for pointer, cur_set in zip(df_ast_pointers, df_ast_set):
        if isinstance(pointer.cur_node, ObjectChoiceNode):
            # TODO (DNGros): Figure out if we are handling weight and probability right
            # I think works fine now if known valid
            _try_add_copy_node_at_object_choice(pointer, cur_set, True,
                                                copy_node_weight, 1, unparse,
                                                token_metadata)
        elif isinstance(pointer.cur_node, ObjectNode):
            pass
        else:
            raise ValueError("Unrecognized node?")
Ejemplo n.º 16
0
    non_ascii_tokenizer = NonLetterTokenizer()
    def non_asci_do(string):
        toks, metad = non_ascii_tokenizer.tokenize(string)
        return " ".join(toks)

    split_to_sentences = defaultdict(list)
    num_to_do = args.replace_samples*index.get_num_x_values()
    data_iterator = itertools.chain.from_iterable(
        (iterate_data_pairs(
            index, replacers, string_parser, tokenizer, unparser, None)
         for epoch in range(args.replace_samples))
    )
    data_iterator = itertools.islice(data_iterator, num_to_do)
    for (example, this_example_replaced_x, y_ast_set,
         teacher_force_path_ast, y_texts, rsample) in tqdm(data_iterator, total=num_to_do):
            teach_force_str = unparser.to_string(
                teacher_force_path_ast, this_example_replaced_x).total_string
            split_to_sentences[example.split].append((
                non_asci_do(this_example_replaced_x),
                non_asci_do(teach_force_str),
                example.y_set_id,
                rsample
             ))
    for split, datas in split_to_sentences.items():
        split = {0: 'train', 1: 'val'}[split]
        with open(f'{args.prefix}{split}_x.txt', 'w') as xf, \
                open(f'{args.prefix}{split}_y.txt', 'w') as yf, \
                open(f'{args.prefix}{split}_yids.txt', 'w') as yidsf:
            xs, ys, y_set_ids, rsamples = zip(*datas)
            xf.write("\n".join(xs))
            yf.write("\n".join(ys))
            #if split == "val":
Ejemplo n.º 17
0
def test_dot_separated_words(tc, in_str):
    parser = StringParser(tc)
    ast = parser.create_parse_tree(in_str, "DotSeparatedWords")
    unparser = AstUnparser(tc)
    to_string = unparser.to_string(ast)
    assert to_string.total_string == in_str
Ejemplo n.º 18
0
    train_frac = args.train_percent / 100.0
    split_proportions = ((train_frac, DataSplits.TRAIN),
                         (1 - train_frac, DataSplits.VALIDATION))

    model, index, replacers, type_context, loader = train_the_thing(
        split_proportions, args.randomize_seed, args.replace_samples,
        args.encoder_name)
    unparser = AstUnparser(type_context, model.get_string_tokenizer())

    tran_trainer = TypeTranslateCFTrainer(model,
                                          index,
                                          replacer=replacers,
                                          loader=loader)
    logger = EvaluateLogger()
    tran_trainer.evaluate(logger,
                          dump_each=True,
                          num_replace_samples=args.eval_replace_samples)
    print_ast_eval_log(logger)

    if not args.nointeractive:
        while True:
            q = input("Query: ")
            ast, metad = model.predict(q, "CommandSequence", True)
            unparse_result = unparser.to_string(ast, q)
            print(unparse_result.total_string)
            print(math.exp(sum(metad.log_confidences)))
            retr_explan = metad.example_retrieve_explanations[0]
            for sim, example_id in zip(retr_explan.reference_confidence,
                                       retr_explan.reference_example_ids):
                print(math.exp(sim), index.get_example_by_id(example_id))
Ejemplo n.º 19
0
def test_path_list_parse_and_unparse_without_error(tc, in_str):
    parser = StringParser(tc)
    ast = parser.create_parse_tree(in_str, "PathList")
    unparser = AstUnparser(tc)
    to_string = unparser.to_string(ast)
    assert to_string.total_string == in_str
Ejemplo n.º 20
0
def test_path_parse_extension(tc, in_str):
    parser = StringParser(tc)
    ast = parser.create_parse_tree(in_str, "FileExtension")
    unparser = AstUnparser(tc)
    to_string = unparser.to_string(ast)
    assert to_string.total_string == in_str