def __init__(self, path_to_jar=None, path_to_models_jar=None, path_to_ejml_jar=None, model_path='edu/stanford/nlp/models/parser/lexparser/englishPCFG.ser.gz', encoding='utf8', verbose=False, java_options='-mx3G'): """""" self._stanford_jar = find_jar( self._JAR, path_to_jar, env_vars=('STANFORD_PARSER',), searchpath=(), url=_stanford_url, verbose=verbose) # find the most recent model self._model_jar=max( find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, env_vars=('STANFORD_MODELS',), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True), key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name)) # find the most recent ejml self._ejml_jar=max( find_jar_iter( self._EJML_JAR_PATTERN, path_to_ejml_jar, env_vars=('STANFORD_EJML',), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True), key=lambda ejml_name: re.match(self._EJML_JAR_PATTERN, ejml_name)) self.model_path = model_path self._encoding = encoding self.java_options = java_options
def __init__(self, path_to_jar=None, path_to_models_jar=None, model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz', encoding='utf8', verbose=False, java_options='-mx1000m', corenlp_options=''): # find the most recent code and model jar stanford_jar = max( find_jar_iter( self._JAR, path_to_jar, env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True ), key=lambda model_name: re.match(self._JAR, model_name) ) model_jar=max( find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True ), key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name) ) self._classpath = (stanford_jar, model_jar) self.model_path = model_path self._encoding = encoding self.corenlp_options = corenlp_options self.java_options = java_options
def __init__( self, path_to_jar=None, path_to_models_jar=None, verbose=False, java_options=None, corenlp_options=None, port=None, ): if corenlp_options is None: corenlp_options = [ '-preload', 'tokenize,ssplit,pos,lemma,parse,depparse', ] jars = list( find_jar_iter( self._JAR, path_to_jar, env_vars=('CORENLP', ), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, )) # find the most recent code and model jar stanford_jar = max( jars, key=lambda model_name: re.match(self._JAR, model_name)) if port is None: try: port = try_port(9000) except socket.error: port = try_port() corenlp_options.append(str(port)) else: try_port(port) self.url = 'http://localhost:{}'.format(port) model_jar = max(find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, env_vars=('CORENLP_MODELS', ), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, ), key=lambda model_name: re.match( self._MODEL_JAR_PATTERN, model_name)) self.verbose = verbose self._classpath = stanford_jar, model_jar self.corenlp_options = corenlp_options self.java_options = java_options or ['-mx2g']
def __init__( self, path_to_jar=None, path_to_models_jar=None, verbose=False, java_options=None, corenlp_options=None, port=None, ): if corenlp_options is None: corenlp_options = [ '-preload', 'tokenize,ssplit,pos,lemma,parse,depparse', ] jars = list(find_jar_iter( self._JAR, path_to_jar, env_vars=('CORENLP', ), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, )) # find the most recent code and model jar stanford_jar = max( jars, key=lambda model_name: re.match(self._JAR, model_name) ) if port is None: try: port = try_port(9000) except socket.error: port = try_port() corenlp_options.append(str(port)) else: try_port(port) self.url = 'http://localhost:{}'.format(port) model_jar = max( find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, env_vars=('CORENLP_MODELS', ), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, ), key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name) ) self.verbose = verbose self._classpath = stanford_jar, model_jar self.corenlp_options = corenlp_options self.java_options = java_options or ['-mx2g']
def __init__( self, path_to_jar=None, path_to_models_jar=None, verbose=False, java_options=None, corenlp_options=None, port=4466, ): ''' if corenlp_options is None: corenlp_options = ['-preload'] ''' jars = list( find_jar_iter( self._JAR, path_to_jar, env_vars=('CORENLP', ), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, )) # find the most recent code and model jar stanford_jar = max( jars, key=lambda model_name: re.match(self._JAR, model_name)) port = try_port() corenlp_options.extend(['-port', str(port)]) self.host = 'localhost' self.port = port model_jar = max( find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, env_vars=('CORENLP_MODELS', ), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, ), key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name ), ) self.verbose = verbose self._classpath = stanford_jar, model_jar self.corenlp_options = corenlp_options self.java_options = java_options or ['-mx2g']
def __init__( self, path_to_jar=None, path_to_models_jar=None, model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz', encoding='utf8', verbose=False, java_options='-mx4g', corenlp_options='', ): # find the most recent code and model jar stanford_jar = max( find_jar_iter( self._JAR, path_to_jar, env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, ), key=lambda model_path: os.path.dirname(model_path), ) model_jar = max( find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, ), key=lambda model_path: os.path.dirname(model_path), ) # self._classpath = (stanford_jar, model_jar) # Adding logging jar files to classpath stanford_dir = os.path.split(stanford_jar)[0] self._classpath = tuple([model_jar] + find_jars_within_path(stanford_dir)) self.model_path = model_path self._encoding = encoding self.corenlp_options = corenlp_options self.java_options = java_options
def __init__( self, path_to_jar=None, path_to_models_jar=None, model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz", encoding="UTF-8", verbose=False, java_options="-mx1000m", ): self._stanford_jar = find_jar( self._JAR, path_to_jar, env_vars=("STANFORD_PARSER",), searchpath=(), url=_stanford_url, verbose=verbose ) # find the most recent model self._model_jar = max( find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, env_vars=("STANFORD_MODELS",), searchpath=(), url=_stanford_url, verbose=verbose, is_regex=True, ), key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name), ) self.model_path = model_path self._encoding = encoding self.java_options = java_options