def run(self): global rrp print "Reloading" rrp = RerankingParser() rrp.load_parser_model(os.path.join(os.path.dirname(__file__), '../../lib/bllip/DATA/EN')) rrp.load_reranker_model(os.path.join(os.path.dirname(__file__), '../../lib/bllip/models/ec50spfinal/features.gz'), os.path.join(os.path.dirname(__file__), '../../lib/bllip/models/ec50spfinal/cvlm-l1c10P1-weights.gz')) print "Done loading model"
class BllipParser(ParserI): """ Interface for parsing with BLLIP Parser. BllipParser objects can be constructed with the ``BllipParser.from_unified_model_dir`` class method or manually using the ``BllipParser`` constructor. """ def __init__( self, parser_model=None, reranker_features=None, reranker_weights=None, parser_options=None, reranker_options=None, ): """ Load a BLLIP Parser model from scratch. You'll typically want to use the ``from_unified_model_dir()`` class method to construct this object. :param parser_model: Path to parser model directory :type parser_model: str :param reranker_features: Path the reranker model's features file :type reranker_features: str :param reranker_weights: Path the reranker model's weights file :type reranker_weights: str :param parser_options: optional dictionary of parser options, see ``bllipparser.RerankingParser.RerankingParser.load_parser_options()`` for more information. :type parser_options: dict(str) :param reranker_options: optional dictionary of reranker options, see ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()`` for more information. :type reranker_options: dict(str) """ _ensure_bllip_import_or_error() parser_options = parser_options or {} reranker_options = reranker_options or {} self.rrp = RerankingParser() self.rrp.load_parser_model(parser_model, **parser_options) if reranker_features and reranker_weights: self.rrp.load_reranker_model( features_filename=reranker_features, weights_filename=reranker_weights, **reranker_options ) def parse(self, sentence): """ Use BLLIP Parser to parse a sentence. Takes a sentence as a list of words; it will be automatically tagged with this BLLIP Parser instance's tagger. :return: An iterator that generates parse trees for the sentence from most likely to least likely. :param sentence: The sentence to be parsed :type sentence: list(str) :rtype: iter(Tree) """ _ensure_ascii(sentence) nbest_list = self.rrp.parse(sentence) for scored_parse in nbest_list: yield _scored_parse_to_nltk_tree(scored_parse) def tagged_parse(self, word_and_tag_pairs): """ Use BLLIP to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. BLLIP will attempt to use the tags provided but may use others if it can't come up with a complete parse subject to those constraints. You may also specify a tag as ``None`` to leave a token's tag unconstrained. :return: An iterator that generates parse trees for the sentence from most likely to least likely. :param sentence: Input sentence to parse as (word, tag) pairs :type sentence: list(tuple(str, str)) :rtype: iter(Tree) """ words = [] tag_map = {} for i, (word, tag) in enumerate(word_and_tag_pairs): words.append(word) if tag is not None: tag_map[i] = tag _ensure_ascii(words) nbest_list = self.rrp.parse_tagged(words, tag_map) for scored_parse in nbest_list: yield _scored_parse_to_nltk_tree(scored_parse) @classmethod def from_unified_model_dir( cls, model_dir, parser_options=None, reranker_options=None ): """ Create a ``BllipParser`` object from a unified parsing model directory. Unified parsing model directories are a standardized way of storing BLLIP parser and reranker models together on disk. See ``bllipparser.RerankingParser.get_unified_model_parameters()`` for more information about unified model directories. :return: A ``BllipParser`` object using the parser and reranker models in the model directory. :param model_dir: Path to the unified model directory. :type model_dir: str :param parser_options: optional dictionary of parser options, see ``bllipparser.RerankingParser.RerankingParser.load_parser_options()`` for more information. :type parser_options: dict(str) :param reranker_options: optional dictionary of reranker options, see ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()`` for more information. :type reranker_options: dict(str) :rtype: BllipParser """ ( parser_model_dir, reranker_features_filename, reranker_weights_filename, ) = get_unified_model_parameters(model_dir) return cls( parser_model_dir, reranker_features_filename, reranker_weights_filename, parser_options, reranker_options, )
break if good: new_nbest.append(t) return new_nbest if __name__ == '__main__': if len(sys.argv) != 3 and len(sys.argv) != 4: print('usage: python traversal.py vocab.gz gold.gz [nbest.gz]') sys.exit(0) words = read_vocab(sys.argv[1]) if len(sys.argv) == 3: for line in open_file(sys.argv[2]): print(ptb(line[:-1], words)) else: rrp = RerankingParser() parser = 'wsj/WSJ-PTB3/parser' rrp.load_parser_model(parser) for gold, nbest in zip(open_file(sys.argv[2]), generate_nbest(open_file(sys.argv[3]))): for tree in nbest: tree['seq'] = ptb(tree['ptb'], words) nbest = remove_duplicates(nbest) gold = Tree(gold) print(len(nbest)) for t in nbest: scores = Tree(t['ptb']).evaluate(gold) print(scores['gold'], scores['test'], scores['matched']) print(t['seq'])
class BllipParser(ParserI): """ Interface for parsing with BLLIP Parser. BllipParser objects can be constructed with the ``BllipParser.from_unified_model_dir`` class method or manually using the ``BllipParser`` constructor. """ def __init__( self, parser_model=None, reranker_features=None, reranker_weights=None, parser_options=None, reranker_options=None, ): """ Load a BLLIP Parser model from scratch. You'll typically want to use the ``from_unified_model_dir()`` class method to construct this object. :param parser_model: Path to parser model directory :type parser_model: str :param reranker_features: Path the reranker model's features file :type reranker_features: str :param reranker_weights: Path the reranker model's weights file :type reranker_weights: str :param parser_options: optional dictionary of parser options, see ``bllipparser.RerankingParser.RerankingParser.load_parser_options()`` for more information. :type parser_options: dict(str) :param reranker_options: optional dictionary of reranker options, see ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()`` for more information. :type reranker_options: dict(str) """ _ensure_bllip_import_or_error() parser_options = parser_options or {} reranker_options = reranker_options or {} self.rrp = RerankingParser() self.rrp.load_parser_model(parser_model, **parser_options) if reranker_features and reranker_weights: self.rrp.load_reranker_model(features_filename=reranker_features, weights_filename=reranker_weights, **reranker_options) def parse(self, sentence): """ Use BLLIP Parser to parse a sentence. Takes a sentence as a list of words; it will be automatically tagged with this BLLIP Parser instance's tagger. :return: An iterator that generates parse trees for the sentence from most likely to least likely. :param sentence: The sentence to be parsed :type sentence: list(str) :rtype: iter(Tree) """ _ensure_ascii(sentence) nbest_list = self.rrp.parse(sentence) for scored_parse in nbest_list: yield _scored_parse_to_nltk_tree(scored_parse) def tagged_parse(self, word_and_tag_pairs): """ Use BLLIP to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. BLLIP will attempt to use the tags provided but may use others if it can't come up with a complete parse subject to those constraints. You may also specify a tag as ``None`` to leave a token's tag unconstrained. :return: An iterator that generates parse trees for the sentence from most likely to least likely. :param sentence: Input sentence to parse as (word, tag) pairs :type sentence: list(tuple(str, str)) :rtype: iter(Tree) """ words = [] tag_map = {} for i, (word, tag) in enumerate(word_and_tag_pairs): words.append(word) if tag is not None: tag_map[i] = tag _ensure_ascii(words) nbest_list = self.rrp.parse_tagged(words, tag_map) for scored_parse in nbest_list: yield _scored_parse_to_nltk_tree(scored_parse) @classmethod def from_unified_model_dir(cls, model_dir, parser_options=None, reranker_options=None): """ Create a ``BllipParser`` object from a unified parsing model directory. Unified parsing model directories are a standardized way of storing BLLIP parser and reranker models together on disk. See ``bllipparser.RerankingParser.get_unified_model_parameters()`` for more information about unified model directories. :return: A ``BllipParser`` object using the parser and reranker models in the model directory. :param model_dir: Path to the unified model directory. :type model_dir: str :param parser_options: optional dictionary of parser options, see ``bllipparser.RerankingParser.RerankingParser.load_parser_options()`` for more information. :type parser_options: dict(str) :param reranker_options: optional dictionary of reranker options, see ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()`` for more information. :type reranker_options: dict(str) :rtype: BllipParser """ ( parser_model_dir, reranker_features_filename, reranker_weights_filename, ) = get_unified_model_parameters(model_dir) return cls( parser_model_dir, reranker_features_filename, reranker_weights_filename, parser_options, reranker_options, )
sys.exit(0) if sys.argv[3] == 'bllip': # bllip parser = '/pro/dpg/dc65/models/WSJ+QB' print 'basic:', parser elif sys.argv[3] == 'self': # self-trained parser = '/pro/dpg/dc65/models/WSJ+Gigaword' print 'self-trained:', parser else: print 'parser options: bllip, self' sys.exit(0) rrp = RerankingParser() rrp.load_parser_model(parser + '/parser') print 'reranker: /pro/dpg/dc65/models/WSJ/' rrp.load_reranker_model('/pro/dpg/dc65/models/WSJ/reranker/features.gz', '/pro/dpg/dc65/models/WSJ/reranker/weights.gz') mode = int(sys.argv[2]) # 0: gold, 1: 1best, 2: nbest f = open('tmp/trees', 'w') if mode == 2: g = open('tmp/scores', 'w') with open(sys.argv[1], 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='"') iter = 0 for row in reader: iter += 1 if iter % 3 == 1: if mode == 0: f.write(row[2] + '\n')
# Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. if __name__ == "__main__": # needs to be run from the root of the repository from bllipparser import RerankingParser, Tree rrp = RerankingParser() rrp.load_parser_model("first-stage/DATA/EN", terms_only=True) tree1 = Tree("""(S1 (INTJ (UH Oh) (JJ sure) (. !)))""") tree2 = Tree("""(S1 (FRAG (INTJ (UH Oh) (INTJ (JJ sure))) (. !)))""") print tree1.evaluate(tree2) print tree2.evaluate(tree1)
# Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. if __name__ == "__main__": # needs to be run from the root of the repository from bllipparser import RerankingParser, Tree rrp = RerankingParser() rrp.load_parser_model('first-stage/DATA/EN', terms_only=True) tree1 = Tree('''(S1 (INTJ (UH Oh) (JJ sure) (. !)))''') tree2 = Tree('''(S1 (FRAG (INTJ (UH Oh) (INTJ (JJ sure))) (. !)))''') print tree1.evaluate(tree2) print tree2.evaluate(tree1)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. if __name__ == "__main__": # needs to be run from the root of the repository for the parser # model path below to work from bllipparser import RerankingParser, Tree rrp = RerankingParser() rrp.load_parser_model('first-stage/DATA/EN', heads_only=True) tree1 = Tree('''(S1 (SQ (VBZ Swears) (NP (PRP she)) (VP (VBD recognized) (NP (PRP$ his) (NN voice)) (, ,) (SBAR (IN that) (S (NP (NNP Tim)) (VP (VBD fired)))) (, ,) ('' ') (S (S (NP (PRP It)) (VP (VBZ 's) (NP (PRP$ my) (NN money)))) (CC and) (S (NP (PRP I)) (VP (VBP want) (S (NP (PRP it)) (VP (POS '))))))) (. !)))''') head = tree1.head() print 'head word of sentence:', head.token print 'head tree of sentence:', head print # print all syntactic dependencies for goveror, dependent in tree1.dependencies(): print 'dependency: %s -> %s' % (goveror.token, dependent.token)
for span, allowed_labels in constraints.items(): if mapping.get(span) not in allowed_labels: return False return True nbest_list = rrp.parse(sentence) for item in nbest_list: if consistent(item.ptb_parse, constraints): return item.ptb_parse else: return None if __name__ == "__main__": # this needs to be run from the root of the repository since it has # a relative path to the parsing model from bllipparser import RerankingParser rrp = RerankingParser() rrp.load_parser_model('first-stage/DATA/EN') # the constraint means: there must be a VP from [1,5) # (i.e., left ... Falklands) # this encourages the parser to pick "left" as the main verb constraints = {(1, 5): ['VP']} print parse_constrained(rrp, 'British left waffles on Falklands .'.split(), constraints) # if we parse without constraints, we get that the main verb is "waffles" print parse_constrained(rrp, 'British left waffles on Falklands .'.split(), {})
mapping[subtree.span()] = subtree.label for span, allowed_labels in constraints.items(): if mapping.get(span) not in allowed_labels: return False return True nbest_list = rrp.parse(sentence) for item in nbest_list: if consistent(item.ptb_parse, constraints): return item.ptb_parse else: return None if __name__ == "__main__": # this needs to be run from the root of the repository since it has # a relative path to the parsing model from bllipparser import RerankingParser rrp = RerankingParser() rrp.load_parser_model('first-stage/DATA/EN') # the constraint means: there must be a VP from [1,5) # (i.e., left ... Falklands) # this encourages the parser to pick "left" as the main verb constraints = {(1, 5): ['VP']} print parse_constrained(rrp, 'British left waffles on Falklands .'.split(), constraints) # if we parse without constraints, we get that the main verb is "waffles" print parse_constrained(rrp, 'British left waffles on Falklands .'.split(), {})