def parse_tagged_sents(self, sentences, verbose=False): """ Use MaltParser to parse multiple POS tagged sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged. :param sentences: Input sentences to parse :type sentence: list(list(tuple(str, str))) :return: iter(iter(``DependencyGraph``)) the dependency graph representation of each sentence """ if not self._trained: raise Exception("Parser has not been trained. Call train() first.") with tempfile.NamedTemporaryFile(prefix='malt_input.conll.', dir=self.working_dir, mode='w', delete=False) as input_file: with tempfile.NamedTemporaryFile(prefix='malt_output.conll.', dir=self.working_dir, mode='w', delete=False) as output_file: # Convert list of sentences to CONLL format. for line in taggedsents_to_conll(sentences): input_file.write(text_type(line)) input_file.close() # Generate command to run maltparser. cmd =self.generate_malt_command(input_file.name, output_file.name, mode="parse") # This is a maltparser quirk, it needs to be run # where the model file is. otherwise it goes into an awkward # missing .jars or strange -w working_dir problem. _current_path = os.getcwd() # Remembers the current path. try: # Change to modelfile path os.chdir(os.path.split(self.model)[0]) except: pass ret = self._execute(cmd, verbose) # Run command. os.chdir(_current_path) # Change back to current path. if ret is not 0: raise Exception("MaltParser parsing (%s) failed with exit " "code %d" % (' '.join(cmd), ret)) # Must return iter(iter(Tree)) with open(output_file.name) as infile: for tree_str in infile.read().split('\n\n'): tree_str = self.pretrained_model_sanity_checks(tree_str) yield(iter([DependencyGraph(tree_str)])) os.remove(input_file.name) os.remove(output_file.name)
def parse_tagged_sents(self, sentences, verbose=False): """ Use MaltParser to parse multiple POS tagged sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged. :param sentences: Input sentences to parse :type sentence: list(list(tuple(str, str))) :return: iter(iter(``DependencyGraph``)) the dependency graph representation of each sentence """ if not self._trained: raise Exception("Parser has not been trained. Call train() first.") with tempfile.NamedTemporaryFile(prefix='malt_input.conll.', dir=self.working_dir, mode='w', delete=False) as input_file: with tempfile.NamedTemporaryFile(prefix='malt_output.conll.', dir=self.working_dir, mode='w', delete=False) as output_file: # Convert list of sentences to CONLL format. for line in taggedsents_to_conll(sentences): input_file.write(text_type(line)) input_file.close() # Generate command to run maltparser. cmd =self.generate_malt_command(input_file.name, output_file.name, mode="parse") # This is a maltparser quirk, it needs to be run # where the model file is. otherwise it goes into an awkward # missing .jars or strange -w working_dir problem. _current_path = os.getcwd() # Remembers the current path. try: # Change to modelfile path os.chdir(os.path.split(self.model)[0]) except: pass ret = self._execute(cmd, verbose) # Run command. os.chdir(_current_path) # Change back to current path. if ret is not 0: raise Exception("MaltParser parsing (%s) failed with exit " "code %d" % (' '.join(cmd), ret)) # Must return iter(iter(Tree)) with open(output_file.name) as infile: for tree_str in infile.read().split('\n\n'): tree_str = self.pretrained_model_sanity_checks(tree_str) yield(iter([DependencyGraph(tree_str)])) os.remove(input_file.name) os.remove(output_file.name)