예제 #1
0
 def run(self):
     global rrp
     print "Reloading"
     rrp = RerankingParser()
     rrp.load_parser_model(os.path.join(os.path.dirname(__file__), '../../lib/bllip/DATA/EN'))
     rrp.load_reranker_model(os.path.join(os.path.dirname(__file__), '../../lib/bllip/models/ec50spfinal/features.gz'), os.path.join(os.path.dirname(__file__), '../../lib/bllip/models/ec50spfinal/cvlm-l1c10P1-weights.gz'))
     print "Done loading model"
예제 #2
0
파일: bllip.py 프로젝트: prz3m/kind2anki
class BllipParser(ParserI):
    """
    Interface for parsing with BLLIP Parser. BllipParser objects can be
    constructed with the ``BllipParser.from_unified_model_dir`` class
    method or manually using the ``BllipParser`` constructor.
    """

    def __init__(
        self,
        parser_model=None,
        reranker_features=None,
        reranker_weights=None,
        parser_options=None,
        reranker_options=None,
    ):
        """
        Load a BLLIP Parser model from scratch. You'll typically want to
        use the ``from_unified_model_dir()`` class method to construct
        this object.

        :param parser_model: Path to parser model directory
        :type parser_model: str

        :param reranker_features: Path the reranker model's features file
        :type reranker_features: str

        :param reranker_weights: Path the reranker model's weights file
        :type reranker_weights: str

        :param parser_options: optional dictionary of parser options, see
        ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
        for more information.
        :type parser_options: dict(str)

        :param reranker_options: optional
        dictionary of reranker options, see
        ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
        for more information.
        :type reranker_options: dict(str)
        """
        _ensure_bllip_import_or_error()

        parser_options = parser_options or {}
        reranker_options = reranker_options or {}

        self.rrp = RerankingParser()
        self.rrp.load_parser_model(parser_model, **parser_options)
        if reranker_features and reranker_weights:
            self.rrp.load_reranker_model(
                features_filename=reranker_features,
                weights_filename=reranker_weights,
                **reranker_options
            )

    def parse(self, sentence):
        """
        Use BLLIP Parser to parse a sentence. Takes a sentence as a list
        of words; it will be automatically tagged with this BLLIP Parser
        instance's tagger.

        :return: An iterator that generates parse trees for the sentence
        from most likely to least likely.

        :param sentence: The sentence to be parsed
        :type sentence: list(str)
        :rtype: iter(Tree)
        """
        _ensure_ascii(sentence)
        nbest_list = self.rrp.parse(sentence)
        for scored_parse in nbest_list:
            yield _scored_parse_to_nltk_tree(scored_parse)

    def tagged_parse(self, word_and_tag_pairs):
        """
        Use BLLIP to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized
        and tagged. BLLIP will attempt to use the tags provided but may
        use others if it can't come up with a complete parse subject
        to those constraints. You may also specify a tag as ``None``
        to leave a token's tag unconstrained.

        :return: An iterator that generates parse trees for the sentence
        from most likely to least likely.

        :param sentence: Input sentence to parse as (word, tag) pairs
        :type sentence: list(tuple(str, str))
        :rtype: iter(Tree)
        """
        words = []
        tag_map = {}
        for i, (word, tag) in enumerate(word_and_tag_pairs):
            words.append(word)
            if tag is not None:
                tag_map[i] = tag

        _ensure_ascii(words)
        nbest_list = self.rrp.parse_tagged(words, tag_map)
        for scored_parse in nbest_list:
            yield _scored_parse_to_nltk_tree(scored_parse)

    @classmethod
    def from_unified_model_dir(
        cls, model_dir, parser_options=None, reranker_options=None
    ):
        """
        Create a ``BllipParser`` object from a unified parsing model
        directory. Unified parsing model directories are a standardized
        way of storing BLLIP parser and reranker models together on disk.
        See ``bllipparser.RerankingParser.get_unified_model_parameters()``
        for more information about unified model directories.

        :return: A ``BllipParser`` object using the parser and reranker
        models in the model directory.

        :param model_dir: Path to the unified model directory.
        :type model_dir: str
        :param parser_options: optional dictionary of parser options, see
        ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
        for more information.
        :type parser_options: dict(str)
        :param reranker_options: optional dictionary of reranker options, see
        ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
        for more information.
        :type reranker_options: dict(str)
        :rtype: BllipParser
        """
        (
            parser_model_dir,
            reranker_features_filename,
            reranker_weights_filename,
        ) = get_unified_model_parameters(model_dir)
        return cls(
            parser_model_dir,
            reranker_features_filename,
            reranker_weights_filename,
            parser_options,
            reranker_options,
        )
예제 #3
0
        break
    if good:
      new_nbest.append(t)
  return new_nbest


if __name__ == '__main__':
  if len(sys.argv) != 3 and len(sys.argv) != 4:
    print('usage: python traversal.py vocab.gz gold.gz [nbest.gz]')
    sys.exit(0)

  words = read_vocab(sys.argv[1])
  if len(sys.argv) == 3:
    for line in open_file(sys.argv[2]):
      print(ptb(line[:-1], words))
  else:
    rrp = RerankingParser()
    parser = 'wsj/WSJ-PTB3/parser'
    rrp.load_parser_model(parser)
    for gold, nbest in zip(open_file(sys.argv[2]),
                           generate_nbest(open_file(sys.argv[3]))):
      for tree in nbest:
        tree['seq'] = ptb(tree['ptb'], words)
      nbest = remove_duplicates(nbest)
      gold = Tree(gold)
      print(len(nbest))
      for t in nbest:
        scores = Tree(t['ptb']).evaluate(gold)
        print(scores['gold'], scores['test'], scores['matched'])
        print(t['seq'])
예제 #4
0
파일: bllip.py 프로젝트: weissercn/nltk
class BllipParser(ParserI):
    """
    Interface for parsing with BLLIP Parser. BllipParser objects can be
    constructed with the ``BllipParser.from_unified_model_dir`` class
    method or manually using the ``BllipParser`` constructor.
    """
    def __init__(
        self,
        parser_model=None,
        reranker_features=None,
        reranker_weights=None,
        parser_options=None,
        reranker_options=None,
    ):
        """
        Load a BLLIP Parser model from scratch. You'll typically want to
        use the ``from_unified_model_dir()`` class method to construct
        this object.

        :param parser_model: Path to parser model directory
        :type parser_model: str

        :param reranker_features: Path the reranker model's features file
        :type reranker_features: str

        :param reranker_weights: Path the reranker model's weights file
        :type reranker_weights: str

        :param parser_options: optional dictionary of parser options, see
        ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
        for more information.
        :type parser_options: dict(str)

        :param reranker_options: optional
        dictionary of reranker options, see
        ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
        for more information.
        :type reranker_options: dict(str)
        """
        _ensure_bllip_import_or_error()

        parser_options = parser_options or {}
        reranker_options = reranker_options or {}

        self.rrp = RerankingParser()
        self.rrp.load_parser_model(parser_model, **parser_options)
        if reranker_features and reranker_weights:
            self.rrp.load_reranker_model(features_filename=reranker_features,
                                         weights_filename=reranker_weights,
                                         **reranker_options)

    def parse(self, sentence):
        """
        Use BLLIP Parser to parse a sentence. Takes a sentence as a list
        of words; it will be automatically tagged with this BLLIP Parser
        instance's tagger.

        :return: An iterator that generates parse trees for the sentence
        from most likely to least likely.

        :param sentence: The sentence to be parsed
        :type sentence: list(str)
        :rtype: iter(Tree)
        """
        _ensure_ascii(sentence)
        nbest_list = self.rrp.parse(sentence)
        for scored_parse in nbest_list:
            yield _scored_parse_to_nltk_tree(scored_parse)

    def tagged_parse(self, word_and_tag_pairs):
        """
        Use BLLIP to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized
        and tagged. BLLIP will attempt to use the tags provided but may
        use others if it can't come up with a complete parse subject
        to those constraints. You may also specify a tag as ``None``
        to leave a token's tag unconstrained.

        :return: An iterator that generates parse trees for the sentence
        from most likely to least likely.

        :param sentence: Input sentence to parse as (word, tag) pairs
        :type sentence: list(tuple(str, str))
        :rtype: iter(Tree)
        """
        words = []
        tag_map = {}
        for i, (word, tag) in enumerate(word_and_tag_pairs):
            words.append(word)
            if tag is not None:
                tag_map[i] = tag

        _ensure_ascii(words)
        nbest_list = self.rrp.parse_tagged(words, tag_map)
        for scored_parse in nbest_list:
            yield _scored_parse_to_nltk_tree(scored_parse)

    @classmethod
    def from_unified_model_dir(cls,
                               model_dir,
                               parser_options=None,
                               reranker_options=None):
        """
        Create a ``BllipParser`` object from a unified parsing model
        directory. Unified parsing model directories are a standardized
        way of storing BLLIP parser and reranker models together on disk.
        See ``bllipparser.RerankingParser.get_unified_model_parameters()``
        for more information about unified model directories.

        :return: A ``BllipParser`` object using the parser and reranker
        models in the model directory.

        :param model_dir: Path to the unified model directory.
        :type model_dir: str
        :param parser_options: optional dictionary of parser options, see
        ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
        for more information.
        :type parser_options: dict(str)
        :param reranker_options: optional dictionary of reranker options, see
        ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
        for more information.
        :type reranker_options: dict(str)
        :rtype: BllipParser
        """
        (
            parser_model_dir,
            reranker_features_filename,
            reranker_weights_filename,
        ) = get_unified_model_parameters(model_dir)
        return cls(
            parser_model_dir,
            reranker_features_filename,
            reranker_weights_filename,
            parser_options,
            reranker_options,
        )
예제 #5
0
파일: parse.py 프로젝트: cdg720/paraparsing
	sys.exit(0)

if sys.argv[3] == 'bllip':
	# bllip
	parser = '/pro/dpg/dc65/models/WSJ+QB'
	print 'basic:', parser
elif sys.argv[3] == 'self':
	# self-trained
	parser = '/pro/dpg/dc65/models/WSJ+Gigaword'
	print 'self-trained:', parser
else:
	print 'parser options: bllip, self'
	sys.exit(0)

rrp = RerankingParser()
rrp.load_parser_model(parser + '/parser')
print 'reranker: /pro/dpg/dc65/models/WSJ/'
rrp.load_reranker_model('/pro/dpg/dc65/models/WSJ/reranker/features.gz', '/pro/dpg/dc65/models/WSJ/reranker/weights.gz')
	
mode = int(sys.argv[2]) # 0: gold, 1: 1best, 2: nbest
f = open('tmp/trees', 'w')
if mode == 2:
	g = open('tmp/scores', 'w')
with open(sys.argv[1], 'rb') as csvfile:
	reader = csv.reader(csvfile, delimiter=',', quotechar='"')
	iter = 0
	for row in reader:
		iter += 1
		if iter % 3 == 1:
			if mode == 0:
				f.write(row[2] + '\n')
예제 #6
0
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.  You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
# License for the specific language governing permissions and limitations
# under the License.

if __name__ == "__main__":
    # needs to be run from the root of the repository

    from bllipparser import RerankingParser, Tree

    rrp = RerankingParser()
    rrp.load_parser_model("first-stage/DATA/EN", terms_only=True)

    tree1 = Tree("""(S1 (INTJ (UH Oh) (JJ sure) (. !)))""")

    tree2 = Tree("""(S1 (FRAG (INTJ (UH Oh) (INTJ (JJ sure))) (. !)))""")

    print tree1.evaluate(tree2)
    print tree2.evaluate(tree1)
예제 #7
0
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.  You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
# License for the specific language governing permissions and limitations
# under the License.

if __name__ == "__main__":
    # needs to be run from the root of the repository

    from bllipparser import RerankingParser, Tree

    rrp = RerankingParser()
    rrp.load_parser_model('first-stage/DATA/EN', terms_only=True)

    tree1 = Tree('''(S1 (INTJ (UH Oh) (JJ sure) (. !)))''')

    tree2 = Tree('''(S1 (FRAG (INTJ (UH Oh) (INTJ (JJ sure))) (. !)))''')

    print tree1.evaluate(tree2)
    print tree2.evaluate(tree1)
예제 #8
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
# License for the specific language governing permissions and limitations
# under the License.

if __name__ == "__main__":
    # needs to be run from the root of the repository for the parser
    # model path below to work

    from bllipparser import RerankingParser, Tree

    rrp = RerankingParser()
    rrp.load_parser_model('first-stage/DATA/EN', heads_only=True)

    tree1 = Tree('''(S1 (SQ (VBZ Swears) (NP (PRP she)) (VP (VBD
    recognized) (NP (PRP$ his) (NN voice)) (, ,) (SBAR (IN that) (S
    (NP (NNP Tim)) (VP (VBD fired)))) (, ,) ('' ') (S (S (NP (PRP It))
    (VP (VBZ 's) (NP (PRP$ my) (NN money)))) (CC and) (S (NP (PRP I))
    (VP (VBP want) (S (NP (PRP it)) (VP (POS '))))))) (. !)))''')

    head = tree1.head()
    print 'head word of sentence:', head.token
    print 'head tree of sentence:', head
    print

    # print all syntactic dependencies
    for goveror, dependent in tree1.dependencies():
        print 'dependency: %s -> %s' % (goveror.token, dependent.token)
        for span, allowed_labels in constraints.items():
            if mapping.get(span) not in allowed_labels:
                return False
        return True

    nbest_list = rrp.parse(sentence)
    for item in nbest_list:
        if consistent(item.ptb_parse, constraints):
            return item.ptb_parse
    else:
        return None


if __name__ == "__main__":
    # this needs to be run from the root of the repository since it has
    # a relative path to the parsing model

    from bllipparser import RerankingParser
    rrp = RerankingParser()
    rrp.load_parser_model('first-stage/DATA/EN')

    # the constraint means: there must be a VP from [1,5)
    # (i.e., left ... Falklands)
    # this encourages the parser to pick "left" as the main verb
    constraints = {(1, 5): ['VP']}
    print parse_constrained(rrp, 'British left waffles on Falklands .'.split(),
                            constraints)
    # if we parse without constraints, we get that the main verb is "waffles"
    print parse_constrained(rrp, 'British left waffles on Falklands .'.split(),
                            {})
예제 #10
0
            mapping[subtree.span()] = subtree.label
        for span, allowed_labels in constraints.items():
            if mapping.get(span) not in allowed_labels:
                return False
        return True

    nbest_list = rrp.parse(sentence)
    for item in nbest_list:
        if consistent(item.ptb_parse, constraints):
            return item.ptb_parse
    else:
        return None

if __name__ == "__main__":
    # this needs to be run from the root of the repository since it has
    # a relative path to the parsing model

    from bllipparser import RerankingParser
    rrp = RerankingParser()
    rrp.load_parser_model('first-stage/DATA/EN')

    # the constraint means: there must be a VP from [1,5)
    # (i.e., left ... Falklands)
    # this encourages the parser to pick "left" as the main verb
    constraints = {(1, 5): ['VP']}
    print parse_constrained(rrp, 'British left waffles on Falklands .'.split(),
                            constraints)
    # if we parse without constraints, we get that the main verb is "waffles"
    print parse_constrained(rrp, 'British left waffles on Falklands .'.split(),
                            {})