def load_all_depgraphs(tfes): current = 1 for tfe in tfes: filename_question = depgraph_folder + (depgraph_filename_format % (current, "question")) filename_answer = depgraph_folder + (depgraph_filename_format % (current, "answer")) graphs_question = DependencyGraph.load(filename_question) graphs_answer = DependencyGraph.load(filename_answer) tfe.depgraphs = graphs_question + graphs_answer
def tagged_parse_sents(self, sentences, verbose=False): """ Use MaltParser to parse multiple sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged. :param sentences: Input sentences to parse :type sentence: list(list(tuple(str, str))) :return: iter(iter(``DependencyGraph``)) the dependency graph representation of each sentence """ if not self._malt_bin: raise Exception( "MaltParser location is not configured. Call config_malt() first." ) if not self._trained: raise Exception( "Parser has not been trained. Call train() first.") input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll', dir=self.working_dir, delete=False) output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll', dir=self.working_dir, delete=False) try: for sentence in sentences: for (i, (word, tag)) in enumerate(sentence, start=1): input_str = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\ (i, word, '_', tag, tag, '_', '0', 'a', '_', '_') input_file.write(input_str.encode("utf8")) input_file.write(b'\n\n') input_file.close() cmd = ['java'] + self.additional_java_args + [ '-jar', self._malt_bin, '-w', self.working_dir, '-c', self.mco, '-i', input_file.name, '-o', output_file.name, '-m', 'parse' ] ret = self._execute(cmd, verbose) if ret != 0: raise Exception("MaltParser parsing (%s) failed with exit " "code %d" % (' '.join(cmd), ret)) # Must return iter(iter(Tree)) return (iter([dep_graph]) for dep_graph in DependencyGraph.load(output_file.name)) finally: input_file.close() os.remove(input_file.name) output_file.close() os.remove(output_file.name)
def tagged_parse(self, sentence, verbose=False): """ Use MaltParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. :param sentence: Input sentence to parse :type sentence: list(tuple(str, str)) :return: ``DependencyGraph`` the dependency graph representation of the sentence """ if not self._malt_bin: raise Exception( "MaltParser location is not configured. Call config_malt() first." ) if not self._trained: raise Exception( "Parser has not been trained. Call train() first.") input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll') output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll') execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse' if not verbose: execute_string += ' > ' + os.path.join(tempfile.gettempdir(), "malt.out") f = None try: f = open(input_file, 'w') for (i, (word, tag)) in enumerate(sentence): f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (i + 1, word, '_', tag, tag, '_', '0', 'a', '_', '_')) f.write('\n') f.close() cmd = [ 'java', '-jar %s' % self._malt_bin, '-w %s' % tempfile.gettempdir(), '-c %s' % self.mco, '-i %s' % input_file, '-o %s' % output_file, '-m parse' ] self._execute(cmd, 'parse', verbose) return DependencyGraph.load(output_file) finally: if f: f.close()
def tagged_parse_sents(self, sentences, verbose=False): """ Use MaltParser to parse multiple sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged. :param sentences: Input sentences to parse :type sentence: list(list(tuple(str, str))) :return: iter(iter(``DependencyGraph``)) the dependency graph representation of each sentence """ if not self._malt_bin: raise Exception("MaltParser location is not configured. Call config_malt() first.") if not self._trained: raise Exception("Parser has not been trained. Call train() first.") input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll', dir=self.working_dir, delete=False) output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll', dir=self.working_dir, delete=False) try: for sentence in sentences: for (i, (word, tag)) in enumerate(sentence, start=1): input_str = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\ (i, word, '_', tag, tag, '_', '0', 'a', '_', '_') input_file.write(input_str.encode("utf8")) input_file.write(b'\n\n') input_file.close() cmd = ['java'] + self.additional_java_args + ['-jar', self._malt_bin, '-w', self.working_dir, '-c', self.mco, '-i', input_file.name, '-o', output_file.name, '-m', 'parse'] ret = self._execute(cmd, verbose) if ret != 0: raise Exception("MaltParser parsing (%s) failed with exit " "code %d" % (' '.join(cmd), ret)) # Must return iter(iter(Tree)) return (iter([dep_graph]) for dep_graph in DependencyGraph.load(output_file.name)) finally: input_file.close() os.remove(input_file.name) output_file.close() os.remove(output_file.name)
def tagged_parse(self, sentence, verbose=False): """ Use MaltParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. :param sentence: Input sentence to parse :type sentence: list(tuple(str, str)) :return: ``DependencyGraph`` the dependency graph representation of the sentence """ if not self._malt_bin: raise Exception("MaltParser location is not configured. Call config_malt() first.") if not self._trained: raise Exception("Parser has not been trained. Call train() first.") input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll', dir=self.working_dir, delete=False) output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll', dir=self.working_dir, delete=False) try: for (i, (word, tag)) in enumerate(sentence, start=1): input_file.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (i, word, '_', tag, tag, '_', '0', 'a', '_', '_')) input_file.write('\n') input_file.close() cmd = ['java', '-jar', self._malt_bin, '-w', self.working_dir, '-c', self.mco, '-i', input_file.name, '-o', output_file.name, '-m', 'parse'] ret = self._execute(cmd, verbose) if ret != 0: raise Exception("MaltParser parsing (%s) failed with exit " "code %d" % (' '.join(cmd), ret)) return DependencyGraph.load(output_file.name) finally: input_file.close() os.remove(input_file.name) output_file.close() os.remove(output_file.name)
def tagged_parse(self, sentence, verbose=False): """ Use MaltParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. :param sentence: Input sentence to parse :type sentence: L{list} of (word, tag) L{tuple}s. :return: C{DependencyGraph} the dependency graph representation of the sentence """ if not self._malt_bin: raise Exception("MaltParser location is not configured. Call config_malt() first.") if not self._trained: raise Exception("Parser has not been trained. Call train() first.") input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll') output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll') execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse' if not verbose: execute_string += ' > ' + os.path.join(tempfile.gettempdir(), "malt.out") f = None try: f = open(input_file, 'w') for (i, (word,tag)) in enumerate(sentence): f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (i+1, word, '_', tag, tag, '_', '0', 'a', '_', '_')) f.write('\n') f.close() cmd = ['java', '-jar %s' % self._malt_bin, '-w %s' % tempfile.gettempdir(), '-c %s' % self.mco, '-i %s' % input_file, '-o %s' % output_file, '-m parse'] self._execute(cmd, 'parse', verbose) return DependencyGraph.load(output_file) finally: if f: f.close()