Beispiel #1
0
 def load_parsing_model(self, model_dir, language='En',
                        case_insensitive=False, nbest=50, small_corpus=True,
                        overparsing=21, debug=0, smoothPos=0):
     """Load the parsing model from model_dir and set parsing
     options. In general, the default options should suffice. Note
     that the parser does not allow loading multiple models within
     the same process."""
     if self._parser_model_loaded:
         raise ValueError('Parser is already loaded and can only be loaded once.')
     if not os.path.exists(model_dir):
         raise ValueError('Parser model directory %r does not exist.' % model_dir)
     self._parser_model_loaded = True
     parser.loadModel(model_dir)
     self.parser_model_dir = model_dir
     parser.setOptions(language, case_insensitive, nbest, small_corpus,
                       overparsing, debug, smoothPos)
    def set_parser_options(self,
                           language='En',
                           case_insensitive=False,
                           nbest=50,
                           small_corpus=True,
                           overparsing=21,
                           debug=0,
                           smooth_pos=0):
        """Set options for the parser. Note that this is called
        automatically by load_parser_model() so you should only need to
        call this to update the parsing options. The method returns a
        dictionary of the new options.

        The options are as follows: language is a string describing
        the language. Currently, it can be one of En (English), Ch
        (Chinese), or Ar (Arabic). case_insensitive will make the parser
        ignore capitalization. nbest is the maximum size of the n-best
        list. small_corpus=True enables additional smoothing (originally
        intended for training from small corpora, but helpful in many
        situations). overparsing determines how much more time the parser
        will spend on a sentence relative to the time it took to find the
        first possible complete parse. This affects the speed/accuracy
        tradeoff. debug takes a non-negative integer. Setting it higher
        than 0 will cause the parser to print debug messages (surprising,
        no?). Setting smooth_pos to a number higher than 0 will cause the
        parser to assign that value as the probability of seeing a known
        word in a new part-of-speech (one never seen in training)."""
        if not RerankingParser._parser_model_loaded:
            raise RuntimeError('Parser must already be loaded (call '
                               'load_parser_model() first)')

        parser.setOptions(language, case_insensitive, nbest, small_corpus,
                          overparsing, debug, smooth_pos)
        self.parser_options = {
            'language': language,
            'case_insensitive': case_insensitive,
            'nbest': nbest,
            'small_corpus': small_corpus,
            'overparsing': overparsing,
            'debug': debug,
            'smooth_pos': smooth_pos
        }
        return self.parser_options
    def set_parser_options(self, language='En', case_insensitive=False,
                           nbest=50, small_corpus=True, overparsing=21,
                           debug=0, smooth_pos=0):
        """Set options for the parser. Note that this is called
        automatically by load_parser_model() so you should only need to
        call this to update the parsing options. The method returns a
        dictionary of the new options.

        The options are as follows: language is a string describing
        the language. Currently, it can be one of En (English), Ch
        (Chinese), or Ar (Arabic). case_insensitive will make the parser
        ignore capitalization. nbest is the maximum size of the n-best
        list. small_corpus=True enables additional smoothing (originally
        intended for training from small corpora, but helpful in many
        situations). overparsing determines how much more time the parser
        will spend on a sentence relative to the time it took to find the
        first possible complete parse. This affects the speed/accuracy
        tradeoff. debug takes a non-negative integer. Setting it higher
        than 0 will cause the parser to print debug messages (surprising,
        no?). Setting smooth_pos to a number higher than 0 will cause the
        parser to assign that value as the probability of seeing a known
        word in a new part-of-speech (one never seen in training)."""
        if not RerankingParser._parser_model_loaded:
            raise RuntimeError('Parser must already be loaded (call '
                               'load_parser_model() first)')

        parser.setOptions(language, case_insensitive, nbest, small_corpus,
                          overparsing, debug, smooth_pos)
        self.parser_options = {
            'language': language,
            'case_insensitive': case_insensitive,
            'nbest': nbest,
            'small_corpus': small_corpus,
            'overparsing': overparsing,
            'debug': debug,
            'smooth_pos': smooth_pos
        }
        return self.parser_options
Beispiel #4
0
 def load_parsing_model(self,
                        model_dir,
                        language='En',
                        case_insensitive=False,
                        nbest=50,
                        small_corpus=True,
                        overparsing=21,
                        debug=0,
                        smoothPos=0):
     """Load the parsing model from model_dir and set parsing
     options. In general, the default options should suffice. Note
     that the parser does not allow loading multiple models within
     the same process."""
     if self._parser_model_loaded:
         raise ValueError(
             'Parser is already loaded and can only be loaded once.')
     if not os.path.exists(model_dir):
         raise ValueError('Parser model directory %r does not exist.' %
                          model_dir)
     self._parser_model_loaded = True
     parser.loadModel(model_dir)
     self.parser_model_dir = model_dir
     parser.setOptions(language, case_insensitive, nbest, small_corpus,
                       overparsing, debug, smoothPos)