Exemple #1
0
    def __init__(
        self,
        parser_model=None,
        reranker_features=None,
        reranker_weights=None,
        parser_options=None,
        reranker_options=None,
    ):
        """
        Load a BLLIP Parser model from scratch. You'll typically want to
        use the ``from_unified_model_dir()`` class method to construct
        this object.

        :param parser_model: Path to parser model directory
        :type parser_model: str

        :param reranker_features: Path the reranker model's features file
        :type reranker_features: str

        :param reranker_weights: Path the reranker model's weights file
        :type reranker_weights: str

        :param parser_options: optional dictionary of parser options, see
        ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
        for more information.
        :type parser_options: dict(str)

        :param reranker_options: optional
        dictionary of reranker options, see
        ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
        for more information.
        :type reranker_options: dict(str)
        """
        _ensure_bllip_import_or_error()

        parser_options = parser_options or {}
        reranker_options = reranker_options or {}

        self.rrp = RerankingParser()
        self.rrp.load_parser_model(parser_model, **parser_options)
        if reranker_features and reranker_weights:
            self.rrp.load_reranker_model(
                features_filename=reranker_features,
                weights_filename=reranker_weights,
                **reranker_options
            )
Exemple #2
0
        break
    if good:
      new_nbest.append(t)
  return new_nbest


if __name__ == '__main__':
  if len(sys.argv) != 3 and len(sys.argv) != 4:
    print('usage: python traversal.py vocab.gz gold.gz [nbest.gz]')
    sys.exit(0)

  words = read_vocab(sys.argv[1])
  if len(sys.argv) == 3:
    for line in open_file(sys.argv[2]):
      print(ptb(line[:-1], words))
  else:
    rrp = RerankingParser()
    parser = 'wsj/WSJ-PTB3/parser'
    rrp.load_parser_model(parser)
    for gold, nbest in zip(open_file(sys.argv[2]),
                           generate_nbest(open_file(sys.argv[3]))):
      for tree in nbest:
        tree['seq'] = ptb(tree['ptb'], words)
      nbest = remove_duplicates(nbest)
      gold = Tree(gold)
      print(len(nbest))
      for t in nbest:
        scores = Tree(t['ptb']).evaluate(gold)
        print(scores['gold'], scores['test'], scores['matched'])
        print(t['seq'])