コード例 #1
0
def corenlp_client():
    """ Client to run tests on """
    client = corenlp.CoreNLPClient(
        annotators='tokenize,ssplit,pos,lemma,ner,depparse',
        server_id='stanfordnlp_main_test_server')
    yield client
    client.stop()
コード例 #2
0
def test_tokensregex():
    with corenlp.CoreNLPClient(
            annotators='tokenize ssplit ner depparse'.split(),
            timeout=60000) as client:
        # Example pattern from: https://nlp.stanford.edu/software/tokensregex.shtml
        pattern = '([ner: PERSON]+) /wrote/ /an?/ []{0,3} /sentence|article/'
        matches = client.tokensregex(TEXT, pattern)
        assert len(matches["sentences"]) == 1
        assert matches["sentences"][0]["length"] == 1
        assert matches == {
            "sentences": [
                {
                    "0": {
                        "text": "Chris wrote a simple sentence",
                        "begin": 0,
                        "end": 5,
                        "1": {
                            "text": "Chris",
                            "begin": 0,
                            "end": 1
                        }
                    },
                    "length": 1
                },
            ]
        }
コード例 #3
0
def corenlp_client():
    """ Client to run tests on """
    client = corenlp.CoreNLPClient(
        annotators='tokenize,ssplit,pos',
        server_id='stanfordnlp_request_tests_server')
    client.register_properties_key('fr-custom', FRENCH_CUSTOM_PROPS)
    yield client
    client.stop()
コード例 #4
0
def test_english_request():
    """ Test case of starting server with Spanish defaults, and then requesting default English properties """
    with corenlp.CoreNLPClient(properties='spanish',
                               server_id='test_english_request') as client:
        ann = client.annotate(EN_DOC,
                              properties_key='english',
                              output_format='text')
        assert ann.strip() == EN_DOC_GOLD.strip()
コード例 #5
0
def test_preload():
    """ Test that the default annotators load fully immediately upon server start """
    with corenlp.CoreNLPClient(server_id='test_server_start_preload') as client:
        # wait for annotators to load
        time.sleep(140)
        results = annotate_and_time(client, EN_DOC)
        assert results['annotation'].strip() == EN_PRELOAD_GOLD.strip()
        assert results['end_time'] - results['start_time'] < 1.5
コード例 #6
0
def test_username_password():
    """ Test starting a server with a username and password """
    with corenlp.CoreNLPClient(properties=USERNAME_PASS_PROPS, username='******', password='******',
                               server_id="test_server_username_pass") as client:
        # check with correct password
        ann = client.annotate(EN_DOC, output_format='text', username='******', password='******')
        assert ann.strip() == USERNAME_PASS_GOLD.strip()
        # check with incorrect password, should throw AnnotationException
        try:
            ann = client.annotate(EN_DOC, output_format='text', username='******', password='******')
            assert False
        except AnnotationException as ae:
            pass
        except Exception as e:
            assert False
コード例 #7
0
def test_external_server():
    """ Test starting up an external server and accessing with a client with start_server=False """
    corenlp_home = os.getenv('CORENLP_HOME')
    start_cmd = f'java -Xmx5g -cp "{corenlp_home}/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 ' \
                f'-timeout 60000 -server_id stanfordnlp_external_server -serverProperties {SERVER_TEST_PROPS}'
    start_cmd = start_cmd and shlex.split(start_cmd)
    external_server_process = subprocess.Popen(start_cmd)
    with corenlp.CoreNLPClient(
            start_server=False,
            endpoint="http://localhost:9001") as external_server_client:
        ann = external_server_client.annotate(TEXT,
                                              annotators='tokenize,ssplit,pos',
                                              output_format='text')
        assert ann.strip() == EN_GOLD
    assert external_server_process
    external_server_process.kill()
コード例 #8
0
def test_semgrex():
    with corenlp.CoreNLPClient(annotators='tokenize ssplit pos lemma ner depparse'.split(), timeout=60000) as client:
        pattern = '{word:wrote} >nsubj {}=subject >dobj {}=object'
        matches = client.semgrex(TEXT, pattern, to_words=True)
        assert matches == [
                {
                    "text": "wrote",
                    "begin": 1,
                    "end": 2,
                    "$subject": {
                        "text": "Chris",
                        "begin": 0,
                        "end": 1
                        },
                    "$object": {
                        "text": "sentence",
                        "begin": 4,
                        "end": 5
                        },
                    "sentence": 0,}]
コード例 #9
0
    matches = corenlp_client.semgrex(text, pattern, to_words=True,
                                     annotators=props['annotators'],
                                     properties=props)
    print(matches)
    assert matches == [
        {
            "text": "wrote",
            "begin": 1,
            "end": 2,
            "$subject": {
                "text": "Chris",
                "begin": 0,
                "end": 1
            },
            "$object": {
                "text": "sentence",
                "begin": 4,
                "end": 5
            },
            "sentence": 0, }]

if __name__ == '__main__':
    sc = corenlp.CoreNLPClient(start_server=False, endpoint="http://localhost:9005")
    properties_key = 'english'
    props = {'pipelineLanguage': properties_key.lower(),
             'annotators': 'tokenize,ssplit,pos,depparse,lemma,natlog,ner,openie',
             'outputFormat': 'text'
             }
    test_tokensregex(sc, props, "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n")
    test_semgrex(sc, props, "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.")
コード例 #10
0
def test_lang_start():
    """ Test starting the server with a Stanford CoreNLP language name """
    with corenlp.CoreNLPClient(properties='german', server_id='test_server_start_lang_name') as client:
        ann = client.annotate(GERMAN_DOC, output_format='text')
        assert ann.strip() == GERMAN_FULL_PROPS_GOLD.strip()
コード例 #11
0
 def __init__(self, host='localhost', port=9005):
     self.host = host
     self.port = port
     self.external_server_client = corenlp.CoreNLPClient(
         start_server=False, endpoint=f"http://{self.host}:{self.port}")
コード例 #12
0
def test_python_dict():
    """ Test starting the server with a Python dictionary as default properties """
    with corenlp.CoreNLPClient(properties=GERMAN_SMALL_PROPS, server_id='test_server_start_python_dict') as client:
        ann = client.annotate(GERMAN_DOC, output_format='text')
        assert ann.strip() == GERMAN_SMALL_PROPS_GOLD.strip()
コード例 #13
0
def test_update():
    with corenlp.CoreNLPClient(annotators="tokenize ssplit".split()) as client:
        ann = client.annotate(TEXT)
        ann = client.update(ann)
        assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]
コード例 #14
0
def test_connect():
    with corenlp.CoreNLPClient() as client:
        client.ensure_alive()
        assert client.is_active
        assert client.is_alive()
コード例 #15
0
def test_context_manager():
    with corenlp.CoreNLPClient(annotators="tokenize,ssplit") as context_client:
        ann = context_client.annotate(TEXT)
        assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]
コード例 #16
0
def test_python_dict_w_annotators():
    """ Test starting the server with a Python dictionary as default properties, override annotators """
    with corenlp.CoreNLPClient(properties=GERMAN_SMALL_PROPS, annotators="tokenize,ssplit",
                               server_id='test_server_start_python_dict_w_annotators') as client:
        ann = client.annotate(GERMAN_DOC, output_format='text')
        assert ann.strip() == GERMAN_SMALL_PROPS_W_ANNOTATORS_GOLD.strip()
コード例 #17
0
def test_props_file():
    """ Test starting the server with a props file """
    with corenlp.CoreNLPClient(properties=SERVER_TEST_PROPS, server_id='test_server_start_props_file') as client:
        ann = client.annotate(EN_DOC, output_format="text")
        assert ann.strip() == EN_PROPS_FILE_GOLD.strip()