def load_parsing_model(self, model_dir, language='En', case_insensitive=False, nbest=50, small_corpus=True, overparsing=21, debug=0, smoothPos=0): """Load the parsing model from model_dir and set parsing options. In general, the default options should suffice. Note that the parser does not allow loading multiple models within the same process.""" if self._parser_model_loaded: raise ValueError('Parser is already loaded and can only be loaded once.') if not os.path.exists(model_dir): raise ValueError('Parser model directory %r does not exist.' % model_dir) self._parser_model_loaded = True parser.loadModel(model_dir) self.parser_model_dir = model_dir parser.setOptions(language, case_insensitive, nbest, small_corpus, overparsing, debug, smoothPos)
def set_parser_options(self, language='En', case_insensitive=False, nbest=50, small_corpus=True, overparsing=21, debug=0, smooth_pos=0): """Set options for the parser. Note that this is called automatically by load_parser_model() so you should only need to call this to update the parsing options. The method returns a dictionary of the new options. The options are as follows: language is a string describing the language. Currently, it can be one of En (English), Ch (Chinese), or Ar (Arabic). case_insensitive will make the parser ignore capitalization. nbest is the maximum size of the n-best list. small_corpus=True enables additional smoothing (originally intended for training from small corpora, but helpful in many situations). overparsing determines how much more time the parser will spend on a sentence relative to the time it took to find the first possible complete parse. This affects the speed/accuracy tradeoff. debug takes a non-negative integer. Setting it higher than 0 will cause the parser to print debug messages (surprising, no?). Setting smooth_pos to a number higher than 0 will cause the parser to assign that value as the probability of seeing a known word in a new part-of-speech (one never seen in training).""" if not RerankingParser._parser_model_loaded: raise RuntimeError('Parser must already be loaded (call ' 'load_parser_model() first)') parser.setOptions(language, case_insensitive, nbest, small_corpus, overparsing, debug, smooth_pos) self.parser_options = { 'language': language, 'case_insensitive': case_insensitive, 'nbest': nbest, 'small_corpus': small_corpus, 'overparsing': overparsing, 'debug': debug, 'smooth_pos': smooth_pos } return self.parser_options
def load_parsing_model(self, model_dir, language='En', case_insensitive=False, nbest=50, small_corpus=True, overparsing=21, debug=0, smoothPos=0): """Load the parsing model from model_dir and set parsing options. In general, the default options should suffice. Note that the parser does not allow loading multiple models within the same process.""" if self._parser_model_loaded: raise ValueError( 'Parser is already loaded and can only be loaded once.') if not os.path.exists(model_dir): raise ValueError('Parser model directory %r does not exist.' % model_dir) self._parser_model_loaded = True parser.loadModel(model_dir) self.parser_model_dir = model_dir parser.setOptions(language, case_insensitive, nbest, small_corpus, overparsing, debug, smoothPos)