Example #1
0
 def from_json(cls, json_data: Dict[str, List]) -> 'ParseGraph':
     """Constructs a ParseGraph from a JSON-serializable data structure produced by
     ParseGraph.to_json()."""
     from pyramids.grammar import GrammarParser
     root = json_data['roots'][-1]
     tokens = [
         Token(token.get('index', index), token['spelling'], token['span'],
               GrammarParser.parse_category(token['category']))
         for index, token in enumerate(json_data['tokens'])
     ]
     links = [{} for _ in tokens]
     for link in json_data['links']:
         source = link['source']
         sink = link['sink']
         label = LinkLabel.get(link['label'])
         if sink in links[source]:
             links[source][sink].add(label)
         else:
             links[source] = {sink: {label}}
     phrases = [[] for _ in tokens]
     for index, phrase_stack in enumerate(json_data['phrases']):
         for phrase in phrase_stack:
             category = GrammarParser.parse_category(phrase['category'])
             phrase_links = [(link['source'], link['sink'])
                             for link in phrase['links']]
             phrases[index].append((category, phrase_links))
     return cls(root, tokens, links, phrases)
Example #2
0
 def from_json(cls, json_data: Dict[str, List]) -> 'BuildGraph':
     """Constructs a ParseGraph from a JSON-serializable data structure produced by
     ParseGraph.to_json()."""
     from pyramids.grammar import GrammarParser
     result = BuildGraph()
     for token in json_data['tokens']:
         spelling = token['spelling']
         category = token.get('category')
         span = token.get('span')
         if category is not None:
             category = GrammarParser.parse_category(category)
         if span is not None:
             span = tuple(span)
         result.append_token(spelling, category, span)
     for link in json_data['links']:
         source = link['source']
         sink = link['sink']
         label = LinkLabel.get(link['label'])
         result.add_link(source, label, sink)
     for index, phrase_stack in enumerate(json_data['phrases']):
         for phrase in phrase_stack:
             category = phrase.get('category')
             if category is not None:
                 category = GrammarParser.parse_category(category)
                 result.set_phrase_category(index, category)
     return result
Example #3
0
 def _validate_output(output_val: str, target: str) -> bool:
     if ':' not in output_val:
         return False
     split_index = target.index(':')
     target_category = GrammarParser.parse_category(target[:split_index])
     target_structure = target[split_index:]
     split_index = output_val.index(':')
     output_category = GrammarParser.parse_category(
         output_val[:split_index])
     output_structure = output_val[split_index:]
     return output_category in target_category and target_structure == output_structure
Example #4
0
 def do_remove(self, line: str) -> None:
     """Removes a word from a given category's word list file."""
     if not line:
         print("No category specified.")
         return
     category_definition = line.split()[0]
     words_to_remove = set(line[len(category_definition):].strip().split())
     if not words_to_remove:
         print("No words specified.")
         return
     category = GrammarParser.parse_category(category_definition)
     config_info = (self._model.config_info
                    if self._model and self._model.config_info else
                    self._model_loader.load_model_config())
     found = set()
     for folder_path in config_info.word_sets_folders:
         for filename in os.listdir(folder_path):
             if not filename.lower().endswith('.ctg'):
                 continue
             file_category = GrammarParser.parse_category(filename[:-4])
             if file_category != category:
                 continue
             file_path = os.path.join(folder_path, filename)
             with open(file_path) as words_file:
                 words = set(words_file.read().split())
             for w in sorted(words_to_remove):
                 if w in words:
                     print("Removing " + repr(w) + " from " + file_path +
                           ".")
                     words.remove(w)
                     found.add(w)
                 else:
                     print(repr(w) + " not found in " + file_path + ".")
             if words:
                 with open(file_path, 'w') as words_file:
                     words_file.write('\n'.join(sorted(words)))
             else:
                 print("Deleting empty word list file " + file_path + ".")
                 os.remove(file_path)
     if words_to_remove - found:
         print("No file(s) found containing the following words: " +
               ' '.join(
                   repr(word)
                   for word in sorted(words_to_remove - found)) + ".")
         return
     self.do_reload()
Example #5
0
 def do_add(self, line: str) -> None:
     """Adds a word to a given category's word list file."""
     if not line:
         print("No category specified.")
         return
     category_definition = line.split()[0]
     category = GrammarParser.parse_category(category_definition)
     words_to_add = sorted(
         set(line[len(category_definition):].strip().split()))
     if not words_to_add:
         print("No words specified.")
         return
     config_info = (self._model.config_info
                    if self._model and self._model.config_info else
                    self._model_loader.load_model_config())
     found = False
     for folder_path in config_info.word_sets_folders:
         for filename in os.listdir(folder_path):
             if not filename.lower().endswith('.ctg'):
                 continue
             file_category = GrammarParser.parse_category(filename[:-4])
             if file_category != category:
                 continue
             file_path = os.path.join(folder_path, filename)
             with open(file_path) as word_set_file:
                 words = set(word_set_file.read().split())
             for w in words_to_add:
                 if w in words:
                     print(repr(w) + " was already in " + file_path + ".")
                 else:
                     print("Adding " + repr(w) + " to " + file_path + ".")
                     words.add(w)
             with open(file_path, 'w') as word_set_file:
                 word_set_file.write('\n'.join(sorted(words)))
             found = True
     if not found:
         for folder_path in config_info.word_sets_folders:
             file_path = os.path.join(folder_path, str(category) + '.ctg')
             print("Creating " + file_path + ".")
             with open(file_path, 'w') as word_set_file:
                 word_set_file.write('\n'.join(sorted(words_to_add)))
             break
         else:
             print("No word sets folder identified. Cannot add words.")
             return
     self.do_reload()
Example #6
0
 def do_as(self, line: str) -> None:
     """Parse an input string as a particular category and print the
     highest-scoring parse for it."""
     if not line:
         print("No category specified.")
         return
     category_definition = line.split()[0]
     category = GrammarParser.parse_category(category_definition)
     line = line[len(category_definition):].strip()
     self._handle_parse(line,
                        restriction_category=category,
                        emergency=self._emergency_mode)
Example #7
0
 def from_word_set(cls, file_path: str, verbose: bool = False) -> 'SetRule':
     """Load a word set and return it as a set rule."""
     from pyramids.grammar import GrammarSyntaxError, GrammarParser
     folder, filename = os.path.split(file_path)
     category_definition = os.path.splitext(filename)[0]
     try:
         category = GrammarParser.parse_category(category_definition)
     except GrammarSyntaxError as error:
         raise IOError("Badly named word set file: " + file_path) from error
     if verbose:
         print("Loading category", str(category), "from", file_path, "...")
     return SetRule(category,
                    WordSetUtils.load_word_set(file_path),
                    _word_set_path=file_path)
Example #8
0
    def _training_attempt_iterator(
            self, text: Input,
            target: Target) -> Iterator[Tuple[Attempt, FeedbackReceiver]]:
        print(text)

        # Restrict it to the correct category and token_start_index from there. This gives the
        # parser a leg up when it's far from the correct response.
        split_index = target.index(':')
        target_category = GrammarParser.parse_category(target[:split_index])
        start_time = time.time()
        end_time = start_time + self._timeout_interval
        emergency_disambig, parse_timed_out, disambig_timed_out = \
            self._do_parse(text, end_time, restriction_category=target_category)
        end_time = time.time()
        self._benchmark_emergency_disambiguations += int(emergency_disambig)
        self._benchmark_parse_timeouts += int(parse_timed_out)
        self._benchmark_disambiguation_timeouts += int(disambig_timed_out)
        self._benchmark_time += end_time - start_time

        # We shouldn't keep going if there are no parses of the correct category. This most likely
        # indicates a change in the grammar, not a problem with the model.
        assert self.parses_available
        while self._parse_index <= self.max_parse_index:
            # (benchmark target, scoring function)
            yield self._get_benchmark_parser_output(), self._scoring_function
            self._parse_index += 1

        # Now try it without any help,
        start_time = time.time()
        end_time = start_time + self._timeout_interval
        emergency_disambig, parse_timed_out, disambig_timed_out = self._do_parse(
            text, end_time)
        end_time = time.time()
        self._benchmark_emergency_disambiguations += int(emergency_disambig)
        self._benchmark_parse_timeouts += int(parse_timed_out)
        self._benchmark_disambiguation_timeouts += int(disambig_timed_out)
        self._benchmark_time += end_time - start_time
        if self.parses_available:
            while self._parse_index <= self.max_parse_index:
                # (benchmark target, scoring function)
                yield self._get_benchmark_parser_output(
                ), self._scoring_function
                self._parse_index += 1
Example #9
0
 def do_compare(line: str) -> None:
     """Compare two categories to determine if either contains the other."""
     definitions = [definition for definition in line.split() if definition]
     if len(definitions) == 0:
         print("Nothing to compare.")
         return
     if len(definitions) == 1:
         print("Nothing to compare with.")
         return
     categories = set()
     for definition in definitions:
         categories.add(
             GrammarParser.parse_category(definition,
                                          offset=line.find(definition) + 1))
     categories = sorted(categories, key=str)
     for category1 in categories:
         for category2 in categories:
             if category1 is not category2:
                 contains_phrase = [" does not contain ",
                                    " contains "][category2 in category1]
                 print(str(category1) + contains_phrase + str(category2))