def from_json(cls, json_data: Dict[str, List]) -> 'ParseGraph': """Constructs a ParseGraph from a JSON-serializable data structure produced by ParseGraph.to_json().""" from pyramids.grammar import GrammarParser root = json_data['roots'][-1] tokens = [ Token(token.get('index', index), token['spelling'], token['span'], GrammarParser.parse_category(token['category'])) for index, token in enumerate(json_data['tokens']) ] links = [{} for _ in tokens] for link in json_data['links']: source = link['source'] sink = link['sink'] label = LinkLabel.get(link['label']) if sink in links[source]: links[source][sink].add(label) else: links[source] = {sink: {label}} phrases = [[] for _ in tokens] for index, phrase_stack in enumerate(json_data['phrases']): for phrase in phrase_stack: category = GrammarParser.parse_category(phrase['category']) phrase_links = [(link['source'], link['sink']) for link in phrase['links']] phrases[index].append((category, phrase_links)) return cls(root, tokens, links, phrases)
def from_json(cls, json_data: Dict[str, List]) -> 'BuildGraph': """Constructs a ParseGraph from a JSON-serializable data structure produced by ParseGraph.to_json().""" from pyramids.grammar import GrammarParser result = BuildGraph() for token in json_data['tokens']: spelling = token['spelling'] category = token.get('category') span = token.get('span') if category is not None: category = GrammarParser.parse_category(category) if span is not None: span = tuple(span) result.append_token(spelling, category, span) for link in json_data['links']: source = link['source'] sink = link['sink'] label = LinkLabel.get(link['label']) result.add_link(source, label, sink) for index, phrase_stack in enumerate(json_data['phrases']): for phrase in phrase_stack: category = phrase.get('category') if category is not None: category = GrammarParser.parse_category(category) result.set_phrase_category(index, category) return result
def _validate_output(output_val: str, target: str) -> bool: if ':' not in output_val: return False split_index = target.index(':') target_category = GrammarParser.parse_category(target[:split_index]) target_structure = target[split_index:] split_index = output_val.index(':') output_category = GrammarParser.parse_category( output_val[:split_index]) output_structure = output_val[split_index:] return output_category in target_category and target_structure == output_structure
def do_remove(self, line: str) -> None: """Removes a word from a given category's word list file.""" if not line: print("No category specified.") return category_definition = line.split()[0] words_to_remove = set(line[len(category_definition):].strip().split()) if not words_to_remove: print("No words specified.") return category = GrammarParser.parse_category(category_definition) config_info = (self._model.config_info if self._model and self._model.config_info else self._model_loader.load_model_config()) found = set() for folder_path in config_info.word_sets_folders: for filename in os.listdir(folder_path): if not filename.lower().endswith('.ctg'): continue file_category = GrammarParser.parse_category(filename[:-4]) if file_category != category: continue file_path = os.path.join(folder_path, filename) with open(file_path) as words_file: words = set(words_file.read().split()) for w in sorted(words_to_remove): if w in words: print("Removing " + repr(w) + " from " + file_path + ".") words.remove(w) found.add(w) else: print(repr(w) + " not found in " + file_path + ".") if words: with open(file_path, 'w') as words_file: words_file.write('\n'.join(sorted(words))) else: print("Deleting empty word list file " + file_path + ".") os.remove(file_path) if words_to_remove - found: print("No file(s) found containing the following words: " + ' '.join( repr(word) for word in sorted(words_to_remove - found)) + ".") return self.do_reload()
def do_add(self, line: str) -> None: """Adds a word to a given category's word list file.""" if not line: print("No category specified.") return category_definition = line.split()[0] category = GrammarParser.parse_category(category_definition) words_to_add = sorted( set(line[len(category_definition):].strip().split())) if not words_to_add: print("No words specified.") return config_info = (self._model.config_info if self._model and self._model.config_info else self._model_loader.load_model_config()) found = False for folder_path in config_info.word_sets_folders: for filename in os.listdir(folder_path): if not filename.lower().endswith('.ctg'): continue file_category = GrammarParser.parse_category(filename[:-4]) if file_category != category: continue file_path = os.path.join(folder_path, filename) with open(file_path) as word_set_file: words = set(word_set_file.read().split()) for w in words_to_add: if w in words: print(repr(w) + " was already in " + file_path + ".") else: print("Adding " + repr(w) + " to " + file_path + ".") words.add(w) with open(file_path, 'w') as word_set_file: word_set_file.write('\n'.join(sorted(words))) found = True if not found: for folder_path in config_info.word_sets_folders: file_path = os.path.join(folder_path, str(category) + '.ctg') print("Creating " + file_path + ".") with open(file_path, 'w') as word_set_file: word_set_file.write('\n'.join(sorted(words_to_add))) break else: print("No word sets folder identified. Cannot add words.") return self.do_reload()
def do_as(self, line: str) -> None: """Parse an input string as a particular category and print the highest-scoring parse for it.""" if not line: print("No category specified.") return category_definition = line.split()[0] category = GrammarParser.parse_category(category_definition) line = line[len(category_definition):].strip() self._handle_parse(line, restriction_category=category, emergency=self._emergency_mode)
def from_word_set(cls, file_path: str, verbose: bool = False) -> 'SetRule': """Load a word set and return it as a set rule.""" from pyramids.grammar import GrammarSyntaxError, GrammarParser folder, filename = os.path.split(file_path) category_definition = os.path.splitext(filename)[0] try: category = GrammarParser.parse_category(category_definition) except GrammarSyntaxError as error: raise IOError("Badly named word set file: " + file_path) from error if verbose: print("Loading category", str(category), "from", file_path, "...") return SetRule(category, WordSetUtils.load_word_set(file_path), _word_set_path=file_path)
def _training_attempt_iterator( self, text: Input, target: Target) -> Iterator[Tuple[Attempt, FeedbackReceiver]]: print(text) # Restrict it to the correct category and token_start_index from there. This gives the # parser a leg up when it's far from the correct response. split_index = target.index(':') target_category = GrammarParser.parse_category(target[:split_index]) start_time = time.time() end_time = start_time + self._timeout_interval emergency_disambig, parse_timed_out, disambig_timed_out = \ self._do_parse(text, end_time, restriction_category=target_category) end_time = time.time() self._benchmark_emergency_disambiguations += int(emergency_disambig) self._benchmark_parse_timeouts += int(parse_timed_out) self._benchmark_disambiguation_timeouts += int(disambig_timed_out) self._benchmark_time += end_time - start_time # We shouldn't keep going if there are no parses of the correct category. This most likely # indicates a change in the grammar, not a problem with the model. assert self.parses_available while self._parse_index <= self.max_parse_index: # (benchmark target, scoring function) yield self._get_benchmark_parser_output(), self._scoring_function self._parse_index += 1 # Now try it without any help, start_time = time.time() end_time = start_time + self._timeout_interval emergency_disambig, parse_timed_out, disambig_timed_out = self._do_parse( text, end_time) end_time = time.time() self._benchmark_emergency_disambiguations += int(emergency_disambig) self._benchmark_parse_timeouts += int(parse_timed_out) self._benchmark_disambiguation_timeouts += int(disambig_timed_out) self._benchmark_time += end_time - start_time if self.parses_available: while self._parse_index <= self.max_parse_index: # (benchmark target, scoring function) yield self._get_benchmark_parser_output( ), self._scoring_function self._parse_index += 1
def do_compare(line: str) -> None: """Compare two categories to determine if either contains the other.""" definitions = [definition for definition in line.split() if definition] if len(definitions) == 0: print("Nothing to compare.") return if len(definitions) == 1: print("Nothing to compare with.") return categories = set() for definition in definitions: categories.add( GrammarParser.parse_category(definition, offset=line.find(definition) + 1)) categories = sorted(categories, key=str) for category1 in categories: for category2 in categories: if category1 is not category2: contains_phrase = [" does not contain ", " contains "][category2 in category1] print(str(category1) + contains_phrase + str(category2))