def parse(self):
        print_DBG("Parsing file: " + self.in_file.name)
        line = None
        while line != "":
            line = self.read_line()
            stripped_line = line.lstrip()
            line_type = pu.get_top_level_line_type(line, stripped_line)

            if line_type is None:
                raise SyntaxError("Invalid top-level line",
                                  (self.in_file.name, 0, 0, stripped_line))
            elif line_type == pu.LineType.empty or line_type == pu.LineType.comment:
                continue
            stripped_line = pu.strip_comments(stripped_line)
            # Stripping was not done before to compute the indentation
            if line_type == pu.LineType.include_file:
                self.parse_file(stripped_line[1:].rstrip())
            elif line_type == pu.LineType.alias_declaration:
                self.parse_alias_definition(stripped_line)
            elif line_type == pu.LineType.slot_declaration:
                self.parse_slot_definition(stripped_line)
            else:  # intent declaration
                self.parse_intent_definition(stripped_line)

        print_DBG("Parsing of file: " + self.in_file.name + " finished")
        self.parsing_finished = True
Beispiel #2
0
    def run_generation(self, adapter_str=None):
        """"
        Runs the generation of all intents and writes them out to the output
        file(s) using the adapter `adapter` if one is provided.
        @pre: the parsing has been done.
        """
        if adapter_str is None:
            adapter = self.adapter
        else:
            adapter = adapter_factory.create_adapter(adapter_str)

        self.generator = Generator()
        synonyms = AST.get_or_create().get_entities_synonyms()

        if os.path.exists(self.output_dir_path):
            if self.force_overwriting or self._ask_confirmation():
                shutil.rmtree(self.output_dir_path)
            else:
                print_DBG("Aborting generation. Exiting without any change.")
                return

        train_examples = list(self.generator.generate_train())
        if train_examples:
            adapter.write(os.path.join(self.output_dir_path, "train"),
                          train_examples, synonyms)
        test_examples = list(self.generator.generate_test(train_examples))
        if test_examples:
            adapter.write(os.path.join(self.output_dir_path, "test"),
                          test_examples, synonyms)
        print_DBG("Generation over")
Beispiel #3
0
 def generate_train(self):
     print_DBG("Generating training examples...")
     for intent_name in self.parser.intent_definitions:
         intent = self.parser.intent_definitions[intent_name]
         examples = intent.generate(self.max_nb_single_intent_examples)
         for example in examples:
             yield example
Beispiel #4
0
    def run_generation(self, adapter_str=None):
        """"
        Runs the generation of all intents and writes them out to the output
        file(s) using the adapter `adapter` if one is provided.
        @pre: the parsing has been done.
        """
        if adapter_str is None:
            adapter = self.adapter
        else:
            adapter = adapter_factory.create_adapter(adapter_str)

        self.generator = Generator(self.parser)
        synonyms = self.generator.get_entities_synonyms()

        if os.path.exists(self.output_dir_path):
            shutil.rmtree(self.output_dir_path)

        train_examples = list(self.generator.generate_train())
        if train_examples:
            adapter.write(os.path.join(self.output_dir_path, "train"),
                          train_examples, synonyms)
        test_examples = list(self.generator.generate_test(train_examples))
        if test_examples:
            adapter.write(os.path.join(self.output_dir_path, "test"),
                          test_examples, synonyms)
        print_DBG("Generation over")
Beispiel #5
0
 def generate_train(self):
     print_DBG("Generating training examples...")
     intent_definitions = self.ast[UnitType.intent]
     for intent_name in intent_definitions:
         intent = intent_definitions[intent_name]
         examples = intent.generate_train()
         for example in examples:
             yield example
Beispiel #6
0
 def _parse_file_inclusion(self, lexical_tokens):
     """
     Opens the file that is included by the tokenized line `lexical_tokens`.
     @pre: `lexical_tokens` contain a tokenized file inclusion line.
     """
     self.open_new_file(lexical_tokens[1].text)
     print_DBG(
         "Parsing file: " + \
         self.input_file_manager.get_current_file_name()
     )
Beispiel #7
0
    def parse_file(self, file_path):
        """
        Parses the template file(s) at `file_path`
        and translates them into an AST.
        """
        self.open_new_file(file_path)
        print_DBG(
            "Parsing file: " + \
            self.input_file_manager.get_current_file_name()
        )

        while True:
            line = self.input_file_manager.read_line()
            if line is None:  # End of file
                break
            currently_parsing_slot = (
                self._current_unit_declaration is not None
                and self._current_unit_declaration.unit_type == UnitType.slot)
            lexical_tokens = self.lexer.lex(line, currently_parsing_slot)
            lexical_tokens = remove_comment_tokens(lexical_tokens)

            if len(lexical_tokens) == 0:
                continue

            if lexical_tokens[0].type == TerminalType.file_inclusion_marker:
                self._parse_file_inclusion(lexical_tokens)
                self._declaration_line_allowed = True
                self._last_indentation = None
                self._current_unit_declaration = None
                self._current_variation_name = None
            elif lexical_tokens[0].type == TerminalType.indentation:
                self._parse_rule_line(lexical_tokens)
                self._declaration_line_allowed = True
                self._last_indentation = lexical_tokens[0].text
            elif (
                lexical_tokens[0].type in \
                (TerminalType.alias_decl_start,
                 TerminalType.slot_decl_start,
                 TerminalType.intent_decl_start)
            ):
                self._parse_unit_declaration_line(lexical_tokens)
                self._declaration_line_allowed = False
                self._last_indentation = None
            else:
                self.input_file_manager.syntax_error(
                    "Couldn't parse this line: a line can be either " + \
                    "an empty line, a comment line, a file inclusion line, " + \
                    "a unit declaration or a rule."
                )
Beispiel #8
0
    def generate_test(self, training_examples=None):
        should_generate_test_set = False

        for intent_name in self.parser.intent_definitions:
            if self.parser.intent_definitions[intent_name].nb_testing_examples_asked is not None:
                should_generate_test_set = True
                break

        if should_generate_test_set:
            print_DBG("Generating testing examples...")
            for intent_name in self.parser.intent_definitions:
                intent = self.parser.intent_definitions[intent_name]
                examples = intent.generate(self.max_nb_single_intent_examples, training_examples)
                for example in examples:
                    yield example
Beispiel #9
0
    def generate_test(self, training_examples=None):
        should_generate_test_set = False

        intent_definitions = self.ast[UnitType.intent]
        for intent_name in intent_definitions:
            if (
                intent_definitions[intent_name].get_nb_testing_examples_asked \
                is not None
            ):
                should_generate_test_set = True
                break

        if should_generate_test_set:
            print_DBG("Generating testing examples...")
            for intent_name in intent_definitions:
                intent = intent_definitions[intent_name]
                examples = intent.generate_test(training_examples)
                for example in examples:
                    yield example
Beispiel #10
0
 def parse(self):
     """
     Parses the master file and subsequent files and
     transforms the information parsed into a dictionary of
     declaration names -> rules.
     """
     print_DBG("Parsing master file: " +
               self.tokenizer.get_file_information()[0])
     for token_line in self.tokenizer.next_tokenized_line():
         if not token_line[0].isspace():
             if token_line[0] == pu.INCLUDE_FILE_SYM:
                 self.tokenizer.open_file(token_line[1])
                 print_DBG("Parsing file: " +
                           self.tokenizer.get_file_information()[0])
                 self.stats["#files"] += 1
             else:
                 self._parse_declaration_initiator(token_line)
                 self._expecting_rule = True
                 self.stats["#declarations"] += 1
             self._expected_indentation = None
         else:
             self._parse_rule(token_line)
             self._expecting_rule = False  # Not expecting but still allowed
             self.stats["#rules"] += 1
     self.tokenizer.close_files()
     print_DBG("Parsing finished!")
def main():
    # pylint: disable=bad-continuation
    argument_parser = argparse.ArgumentParser(
        description="Chatette v" + __version__ + " -- " +
        "Generates NLU datasets from template files",
        epilog="SimGus -- 2018 -- Released under MIT license",
        prog="Chatette",
        add_help=True)

    argument_parser.add_argument("input",
                                 type=str,
                                 help="Path to master template file")

    argument_parser.add_argument("-o",
                                 "--out",
                                 dest="output",
                                 required=False,
                                 type=str,
                                 default=None,
                                 help="Output directory path")

    argument_parser.add_argument("-s",
                                 "--seed",
                                 dest="seed",
                                 required=False,
                                 type=str,
                                 default=None,
                                 help="Seed for the random generator " +
                                 "(any string without spaces will work)")

    argument_parser.add_argument("-l",
                                 "--local",
                                 dest="local",
                                 required=False,
                                 action="store_true",
                                 default=False,
                                 help="Change the base directory for output " +
                                 "files from the current working directory " +
                                 "to the directory containing the template " +
                                 "file")

    argument_parser.add_argument("-a",
                                 "--adapter",
                                 dest="adapter",
                                 required=False,
                                 type=str,
                                 default="rasa",
                                 help="Write adapter. Possible values: " +
                                 "['rasa', 'jsonl']")

    argument_parser.add_argument("-v",
                                 "--version",
                                 action="version",
                                 version="%(prog)s v" + __version__,
                                 help="Print the version number of the module")

    if len(sys.argv[1:]) == 0:
        argument_parser.print_help()
        argument_parser.exit()

    args = argument_parser.parse_args()

    template_file_path = args.input
    if args.local:
        dir_path = os.path.dirname(template_file_path)
    else:
        dir_path = os.getcwd()

    if args.output is None:
        dir_path = os.path.join(dir_path, "output")
    else:
        dir_path = os.path.join(dir_path, args.output)

    # Initialize the random number generator
    if args.seed is not None:
        random_seed(args.seed)

    with io.open(template_file_path, 'r') as in_file:
        parser = Parser(in_file)
        parser.parse()
        # parser.print_DBG()

    if args.adapter == 'rasa':
        # pylint: disable=redefined-variable-type
        adapter = RasaAdapter()
    elif args.adapter == 'jsonl':
        # pylint: disable=redefined-variable-type
        adapter = JsonListAdapter()
    else:
        raise ValueError("Unknown adapter was selected")

    generator = Generator(parser)
    synonyms = generator.get_entities_synonyms()

    train_examples = list(generator.generate_train())
    if train_examples:
        adapter.write(os.path.join(dir_path, "train"), train_examples,
                      synonyms)

    test_examples = list(generator.generate_test(train_examples))
    if test_examples:
        adapter.write(os.path.join(dir_path, "test"), test_examples, synonyms)

    print_DBG("Generation over")