Beispiel #1
0
    def __init__(self,  path_to_jar=None, path_to_models_jar=None, path_to_ejml_jar=None, model_path='edu/stanford/nlp/models/parser/lexparser/englishPCFG.ser.gz', encoding='utf8', verbose=False, java_options='-mx3G'):
        """"""

        self._stanford_jar = find_jar(
          self._JAR, path_to_jar,
          env_vars=('STANFORD_PARSER',),
          searchpath=(), url=_stanford_url,
          verbose=verbose)

        # find the most recent model
        self._model_jar=max(
          find_jar_iter(
            self._MODEL_JAR_PATTERN, path_to_models_jar,
            env_vars=('STANFORD_MODELS',),
            searchpath=(), url=_stanford_url,
            verbose=verbose, is_regex=True),
          key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name))

        # find the most recent ejml
        self._ejml_jar=max(
          find_jar_iter(
            self._EJML_JAR_PATTERN, path_to_ejml_jar,
            env_vars=('STANFORD_EJML',),
            searchpath=(), url=_stanford_url,
            verbose=verbose, is_regex=True),
          key=lambda ejml_name: re.match(self._EJML_JAR_PATTERN, ejml_name))

        self.model_path = model_path
        self._encoding = encoding
        self.java_options = java_options
Beispiel #2
0
    def __init__(self, path_to_jar=None, path_to_models_jar=None,
                 model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
                 encoding='utf8', verbose=False,
                 java_options='-mx1000m', corenlp_options=''):

        # find the most recent code and model jar
        stanford_jar = max(
            find_jar_iter(
                self._JAR, path_to_jar,
                env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'),
                searchpath=(), url=_stanford_url,
                verbose=verbose, is_regex=True
            ),
            key=lambda model_name: re.match(self._JAR, model_name)
        )

        model_jar=max(
            find_jar_iter(
                self._MODEL_JAR_PATTERN, path_to_models_jar,
                env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'),
                searchpath=(), url=_stanford_url,
                verbose=verbose, is_regex=True
            ),
            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name)
        )

        self._classpath = (stanford_jar, model_jar)

        self.model_path = model_path
        self._encoding = encoding
        self.corenlp_options = corenlp_options
        self.java_options = java_options
Beispiel #3
0
    def __init__(self, path_to_jar=None, path_to_models_jar=None,
                 model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
                 encoding='utf8', verbose=False,
                 java_options='-mx1000m', corenlp_options=''):

        # find the most recent code and model jar
        stanford_jar = max(
            find_jar_iter(
                self._JAR, path_to_jar,
                env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'),
                searchpath=(), url=_stanford_url,
                verbose=verbose, is_regex=True
            ),
            key=lambda model_name: re.match(self._JAR, model_name)
        )

        model_jar=max(
            find_jar_iter(
                self._MODEL_JAR_PATTERN, path_to_models_jar,
                env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'),
                searchpath=(), url=_stanford_url,
                verbose=verbose, is_regex=True
            ),
            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name)
        )

        self._classpath = (stanford_jar, model_jar)

        self.model_path = model_path
        self._encoding = encoding
        self.corenlp_options = corenlp_options
        self.java_options = java_options
Beispiel #4
0
    def __init__(
        self,
        path_to_jar=None,
        path_to_models_jar=None,
        verbose=False,
        java_options=None,
        corenlp_options=None,
        port=None,
    ):

        if corenlp_options is None:
            corenlp_options = [
                '-preload',
                'tokenize,ssplit,pos,lemma,parse,depparse',
            ]

        jars = list(
            find_jar_iter(
                self._JAR,
                path_to_jar,
                env_vars=('CORENLP', ),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ))

        # find the most recent code and model jar
        stanford_jar = max(
            jars, key=lambda model_name: re.match(self._JAR, model_name))

        if port is None:
            try:
                port = try_port(9000)
            except socket.error:
                port = try_port()
                corenlp_options.append(str(port))
        else:
            try_port(port)

        self.url = 'http://localhost:{}'.format(port)

        model_jar = max(find_jar_iter(
            self._MODEL_JAR_PATTERN,
            path_to_models_jar,
            env_vars=('CORENLP_MODELS', ),
            searchpath=(),
            url=_stanford_url,
            verbose=verbose,
            is_regex=True,
        ),
                        key=lambda model_name: re.match(
                            self._MODEL_JAR_PATTERN, model_name))

        self.verbose = verbose

        self._classpath = stanford_jar, model_jar

        self.corenlp_options = corenlp_options
        self.java_options = java_options or ['-mx2g']
Beispiel #5
0
    def __init__(self,  path_to_jar=None, path_to_models_jar=None, path_to_ejml_jar=None, model_path='edu/stanford/nlp/models/parser/lexparser/englishPCFG.ser.gz', encoding='utf8', verbose=False, java_options='-mx3G'):
        """"""

        self._stanford_jar = find_jar(
          self._JAR, path_to_jar,
          env_vars=('STANFORD_PARSER',),
          searchpath=(), url=_stanford_url,
          verbose=verbose)

        # find the most recent model
        self._model_jar=max(
          find_jar_iter(
            self._MODEL_JAR_PATTERN, path_to_models_jar,
            env_vars=('STANFORD_MODELS',),
            searchpath=(), url=_stanford_url,
            verbose=verbose, is_regex=True),
          key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name))

        # find the most recent ejml
        self._ejml_jar=max(
          find_jar_iter(
            self._EJML_JAR_PATTERN, path_to_ejml_jar,
            env_vars=('STANFORD_EJML',),
            searchpath=(), url=_stanford_url,
            verbose=verbose, is_regex=True),
          key=lambda ejml_name: re.match(self._EJML_JAR_PATTERN, ejml_name))

        self.model_path = model_path
        self._encoding = encoding
        self.java_options = java_options
Beispiel #6
0
    def __init__(
        self, path_to_jar=None, path_to_models_jar=None, verbose=False,
        java_options=None, corenlp_options=None, port=None,
    ):

        if corenlp_options is None:
            corenlp_options = [
                '-preload', 'tokenize,ssplit,pos,lemma,parse,depparse',
            ]

        jars = list(find_jar_iter(
            self._JAR,
            path_to_jar,
            env_vars=('CORENLP', ),
            searchpath=(),
            url=_stanford_url,
            verbose=verbose,
            is_regex=True,
        ))

        # find the most recent code and model jar
        stanford_jar = max(
            jars,
            key=lambda model_name: re.match(self._JAR, model_name)
        )

        if port is None:
            try:
                port = try_port(9000)
            except socket.error:
                port = try_port()
                corenlp_options.append(str(port))
        else:
            try_port(port)

        self.url = 'http://localhost:{}'.format(port)

        model_jar = max(
            find_jar_iter(
                self._MODEL_JAR_PATTERN,
                path_to_models_jar,
                env_vars=('CORENLP_MODELS', ),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ),
            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name)
        )

        self.verbose = verbose

        self._classpath = stanford_jar, model_jar

        self.corenlp_options = corenlp_options
        self.java_options = java_options or ['-mx2g']
Beispiel #7
0
    def __init__(
        self,
        path_to_jar=None,
        path_to_models_jar=None,
        verbose=False,
        java_options=None,
        corenlp_options=None,
        port=4466,
    ):
        '''
        if corenlp_options is None:
            corenlp_options = ['-preload']
        '''
        jars = list(
            find_jar_iter(
                self._JAR,
                path_to_jar,
                env_vars=('CORENLP', ),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ))

        # find the most recent code and model jar
        stanford_jar = max(
            jars, key=lambda model_name: re.match(self._JAR, model_name))

        port = try_port()
        corenlp_options.extend(['-port', str(port)])

        self.host = 'localhost'
        self.port = port

        model_jar = max(
            find_jar_iter(
                self._MODEL_JAR_PATTERN,
                path_to_models_jar,
                env_vars=('CORENLP_MODELS', ),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ),
            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name
                                            ),
        )

        self.verbose = verbose

        self._classpath = stanford_jar, model_jar
        self.corenlp_options = corenlp_options
        self.java_options = java_options or ['-mx2g']
Beispiel #8
0
    def __init__(
        self,
        path_to_jar=None,
        path_to_models_jar=None,
        model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
        encoding='utf8',
        verbose=False,
        java_options='-mx4g',
        corenlp_options='',
    ):

        # find the most recent code and model jar
        stanford_jar = max(
            find_jar_iter(
                self._JAR,
                path_to_jar,
                env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ),
            key=lambda model_path: os.path.dirname(model_path),
        )

        model_jar = max(
            find_jar_iter(
                self._MODEL_JAR_PATTERN,
                path_to_models_jar,
                env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ),
            key=lambda model_path: os.path.dirname(model_path),
        )

        # self._classpath = (stanford_jar, model_jar)

        # Adding logging jar files to classpath
        stanford_dir = os.path.split(stanford_jar)[0]
        self._classpath = tuple([model_jar] +
                                find_jars_within_path(stanford_dir))

        self.model_path = model_path
        self._encoding = encoding
        self.corenlp_options = corenlp_options
        self.java_options = java_options
Beispiel #9
0
    def __init__(
        self,
        path_to_jar=None,
        path_to_models_jar=None,
        model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
        encoding='utf8',
        verbose=False,
        java_options='-mx4g',
        corenlp_options='',
    ):

        # find the most recent code and model jar
        stanford_jar = max(
            find_jar_iter(
                self._JAR,
                path_to_jar,
                env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ),
            key=lambda model_path: os.path.dirname(model_path),
        )

        model_jar = max(
            find_jar_iter(
                self._MODEL_JAR_PATTERN,
                path_to_models_jar,
                env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ),
            key=lambda model_path: os.path.dirname(model_path),
        )

        # self._classpath = (stanford_jar, model_jar)

        # Adding logging jar files to classpath
        stanford_dir = os.path.split(stanford_jar)[0]
        self._classpath = tuple([model_jar] + find_jars_within_path(stanford_dir))

        self.model_path = model_path
        self._encoding = encoding
        self.corenlp_options = corenlp_options
        self.java_options = java_options
Beispiel #10
0
    def __init__(
        self,
        path_to_jar=None,
        path_to_models_jar=None,
        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz",
        encoding="UTF-8",
        verbose=False,
        java_options="-mx1000m",
    ):

        self._stanford_jar = find_jar(
            self._JAR, path_to_jar, env_vars=("STANFORD_PARSER",), searchpath=(), url=_stanford_url, verbose=verbose
        )

        # find the most recent model
        self._model_jar = max(
            find_jar_iter(
                self._MODEL_JAR_PATTERN,
                path_to_models_jar,
                env_vars=("STANFORD_MODELS",),
                searchpath=(),
                url=_stanford_url,
                verbose=verbose,
                is_regex=True,
            ),
            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name),
        )

        self.model_path = model_path
        self._encoding = encoding
        self.java_options = java_options