Ejemplo n.º 1
0
def load_all_depgraphs(tfes):
    current = 1
    for tfe in tfes:
        filename_question = depgraph_folder + (depgraph_filename_format %
                                               (current, "question"))
        filename_answer = depgraph_folder + (depgraph_filename_format %
                                             (current, "answer"))
        graphs_question = DependencyGraph.load(filename_question)
        graphs_answer = DependencyGraph.load(filename_answer)
        tfe.depgraphs = graphs_question + graphs_answer
Ejemplo n.º 2
0
    def tagged_parse_sents(self, sentences, verbose=False):
        """
        Use MaltParser to parse multiple sentences. Takes multiple sentences
        where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph representation
                 of each sentence
        """

        if not self._malt_bin:
            raise Exception(
                "MaltParser location is not configured.  Call config_malt() first."
            )
        if not self._trained:
            raise Exception(
                "Parser has not been trained.  Call train() first.")

        input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll',
                                                 dir=self.working_dir,
                                                 delete=False)
        output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll',
                                                  dir=self.working_dir,
                                                  delete=False)

        try:
            for sentence in sentences:
                for (i, (word, tag)) in enumerate(sentence, start=1):
                    input_str = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
                        (i, word, '_', tag, tag, '_', '0', 'a', '_', '_')
                    input_file.write(input_str.encode("utf8"))
                input_file.write(b'\n\n')
            input_file.close()

            cmd = ['java'] + self.additional_java_args + [
                '-jar', self._malt_bin, '-w', self.working_dir, '-c', self.mco,
                '-i', input_file.name, '-o', output_file.name, '-m', 'parse'
            ]

            ret = self._execute(cmd, verbose)
            if ret != 0:
                raise Exception("MaltParser parsing (%s) failed with exit "
                                "code %d" % (' '.join(cmd), ret))

            # Must return iter(iter(Tree))
            return (iter([dep_graph])
                    for dep_graph in DependencyGraph.load(output_file.name))
        finally:
            input_file.close()
            os.remove(input_file.name)
            output_file.close()
            os.remove(output_file.name)
Ejemplo n.º 3
0
    def tagged_parse(self, sentence, verbose=False):
        """
        Use MaltParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.

        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
        :return: ``DependencyGraph`` the dependency graph representation of the sentence
        """

        if not self._malt_bin:
            raise Exception(
                "MaltParser location is not configured.  Call config_malt() first."
            )
        if not self._trained:
            raise Exception(
                "Parser has not been trained.  Call train() first.")

        input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll')
        output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll')

        execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse'
        if not verbose:
            execute_string += ' > ' + os.path.join(tempfile.gettempdir(),
                                                   "malt.out")

        f = None
        try:
            f = open(input_file, 'w')

            for (i, (word, tag)) in enumerate(sentence):
                f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                        (i + 1, word, '_', tag, tag, '_', '0', 'a', '_', '_'))
            f.write('\n')
            f.close()

            cmd = [
                'java',
                '-jar %s' % self._malt_bin,
                '-w %s' % tempfile.gettempdir(),
                '-c %s' % self.mco,
                '-i %s' % input_file,
                '-o %s' % output_file, '-m parse'
            ]

            self._execute(cmd, 'parse', verbose)

            return DependencyGraph.load(output_file)
        finally:
            if f: f.close()
Ejemplo n.º 4
0
    def tagged_parse_sents(self, sentences, verbose=False):
        """
        Use MaltParser to parse multiple sentences. Takes multiple sentences
        where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph representation
                 of each sentence
        """

        if not self._malt_bin:
            raise Exception("MaltParser location is not configured.  Call config_malt() first.")
        if not self._trained:
            raise Exception("Parser has not been trained.  Call train() first.")

        input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll',
                                                 dir=self.working_dir,
                                                 delete=False)
        output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll',
                                                 dir=self.working_dir,
                                                 delete=False)

        try:
            for sentence in sentences:
                for (i, (word, tag)) in enumerate(sentence, start=1):
                    input_str = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
                        (i, word, '_', tag, tag, '_', '0', 'a', '_', '_')
                    input_file.write(input_str.encode("utf8"))
                input_file.write(b'\n\n')
            input_file.close()

            cmd = ['java'] + self.additional_java_args + ['-jar', self._malt_bin,
                   '-w', self.working_dir,
                   '-c', self.mco, '-i', input_file.name,
                   '-o', output_file.name, '-m', 'parse']

            ret = self._execute(cmd, verbose)
            if ret != 0:
                raise Exception("MaltParser parsing (%s) failed with exit "
                                "code %d" % (' '.join(cmd), ret))

            # Must return iter(iter(Tree))
            return (iter([dep_graph]) for dep_graph in  DependencyGraph.load(output_file.name))
        finally:
            input_file.close()
            os.remove(input_file.name)
            output_file.close()
            os.remove(output_file.name)
Ejemplo n.º 5
0
    def tagged_parse(self, sentence, verbose=False):
        """
        Use MaltParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.

        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
        :return: ``DependencyGraph`` the dependency graph representation of the sentence
        """

        if not self._malt_bin:
            raise Exception("MaltParser location is not configured.  Call config_malt() first.")
        if not self._trained:
            raise Exception("Parser has not been trained.  Call train() first.")

        input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll',
                                                 dir=self.working_dir,
                                                 delete=False)
        output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll',
                                                 dir=self.working_dir,
                                                 delete=False)

        try:
            for (i, (word, tag)) in enumerate(sentence, start=1):
                input_file.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                        (i, word, '_', tag, tag, '_', '0', 'a', '_', '_'))
            input_file.write('\n')
            input_file.close()

            cmd = ['java', '-jar', self._malt_bin, '-w', self.working_dir,
                   '-c', self.mco, '-i', input_file.name,
                   '-o', output_file.name, '-m', 'parse']

            ret = self._execute(cmd, verbose)
            if ret != 0:
                raise Exception("MaltParser parsing (%s) failed with exit "
                                "code %d" % (' '.join(cmd), ret))

            return DependencyGraph.load(output_file.name)
        finally:
            input_file.close()
            os.remove(input_file.name)
            output_file.close()
            os.remove(output_file.name)
Ejemplo n.º 6
0
    def tagged_parse(self, sentence, verbose=False):
        """
        Use MaltParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.
        
        :param sentence: Input sentence to parse
        :type sentence: L{list} of (word, tag) L{tuple}s.
        :return: C{DependencyGraph} the dependency graph representation of the sentence
        """

        if not self._malt_bin:
            raise Exception("MaltParser location is not configured.  Call config_malt() first.")
        if not self._trained:
            raise Exception("Parser has not been trained.  Call train() first.")
            
        input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll')
        output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll')
        
        execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse'
        if not verbose:
            execute_string += ' > ' + os.path.join(tempfile.gettempdir(), "malt.out")
        
        f = None
        try:
            f = open(input_file, 'w')

            for (i, (word,tag)) in enumerate(sentence):
                f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % 
                        (i+1, word, '_', tag, tag, '_', '0', 'a', '_', '_'))
            f.write('\n')
            f.close()
        
            cmd = ['java', '-jar %s' % self._malt_bin, '-w %s' % tempfile.gettempdir(), 
                   '-c %s' % self.mco, '-i %s' % input_file, '-o %s' % output_file, '-m parse']

            self._execute(cmd, 'parse', verbose)
            
            return DependencyGraph.load(output_file)
        finally:
            if f: f.close()