Ejemplo n.º 1
0
def test_props_file():
    """ Test starting the server with a props file """
    with corenlp.CoreNLPClient(
            properties=SERVER_TEST_PROPS,
            server_id='test_server_start_props_file') as client:
        ann = client.annotate(EN_DOC, output_format="text")
        assert ann.strip() == EN_PROPS_FILE_GOLD.strip()
Ejemplo n.º 2
0
def test_lang_start():
    """ Test starting the server with a Stanford CoreNLP language name """
    with corenlp.CoreNLPClient(
            properties='german',
            server_id='test_server_start_lang_name') as client:
        ann = client.annotate(GERMAN_DOC, output_format='text')
        assert ann.strip() == GERMAN_FULL_PROPS_GOLD.strip()
Ejemplo n.º 3
0
def test_english_request():
    """ Test case of starting server with Spanish defaults, and then requesting default English properties """
    with corenlp.CoreNLPClient(
            properties='spanish',
            server_id='test_spanish_english_request') as client:
        ann = client.annotate(EN_DOC,
                              properties='english',
                              output_format='text')
        compare_ignoring_whitespace(ann, EN_DOC_GOLD)

    # Rerun the test with a server created in English mode to verify
    # that the expected output is what the defaults actually give us
    with corenlp.CoreNLPClient(properties='english',
                               server_id='test_english_request') as client:
        ann = client.annotate(EN_DOC, output_format='text')
        compare_ignoring_whitespace(ann, EN_DOC_GOLD)
Ejemplo n.º 4
0
def test_python_dict():
    """ Test starting the server with a Python dictionary as default properties """
    with corenlp.CoreNLPClient(
            properties=GERMAN_SMALL_PROPS,
            server_id='test_server_start_python_dict') as client:
        ann = client.annotate(GERMAN_DOC, output_format='text')
        assert ann.strip() == GERMAN_SMALL_PROPS_GOLD.strip()
Ejemplo n.º 5
0
def test_default_annotators():
    """
    Test case of creating a client with start_server=False and a set of annotators
    The annotators should be used instead of the server's default annotators
    """
    with corenlp.CoreNLPClient(server_id='test_default_annotators',
                               output_format='text',
                               annotators=[
                                   'tokenize', 'ssplit', 'pos', 'lemma', 'ner',
                                   'depparse'
                               ]) as client:
        with corenlp.CoreNLPClient(start_server=False,
                                   output_format='text',
                                   annotators=['tokenize', 'ssplit',
                                               'pos']) as client2:
            ann = client2.annotate(EN_DOC)
Ejemplo n.º 6
0
def corenlp_client():
    """ Client to run tests on """
    client = corenlp.CoreNLPClient(
        annotators='tokenize,ssplit,pos,lemma,ner,depparse',
        server_id='stanza_main_test_server')
    yield client
    client.stop()
Ejemplo n.º 7
0
def test_lang_start():
    """ Test starting the server with a Stanford CoreNLP language name """
    with corenlp.CoreNLPClient(
            properties='german',
            server_id='test_server_start_lang_name') as client:
        ann = client.annotate(GERMAN_DOC, output_format='text')
        compare_ignoring_whitespace(ann, GERMAN_FULL_PROPS_GOLD)
Ejemplo n.º 8
0
def corenlp_client():
    """ Client to run tests on """
    client = corenlp.CoreNLPClient(annotators='tokenize,ssplit,pos',
                                   server_id='stanza_request_tests_server')
    client.register_properties_key('fr-custom', FRENCH_CUSTOM_PROPS)
    yield client
    client.stop()
Ejemplo n.º 9
0
def test_python_dict_w_annotators():
    """ Test starting the server with a Python dictionary as default properties, override annotators """
    with corenlp.CoreNLPClient(
            properties=GERMAN_SMALL_PROPS,
            annotators="tokenize,ssplit",
            server_id='test_server_start_python_dict_w_annotators') as client:
        ann = client.annotate(GERMAN_DOC, output_format='text')
        assert ann.strip() == GERMAN_SMALL_PROPS_W_ANNOTATORS_GOLD.strip()
Ejemplo n.º 10
0
def test_unknown_request():
    """ Test case of starting server with Spanish defaults, and then requesting UNBAN_MOX_OPAL properties """
    with corenlp.CoreNLPClient(properties='spanish',
                               server_id='test_english_request') as client:
        with pytest.raises(ValueError):
            ann = client.annotate(EN_DOC,
                                  properties_key='UNBAN_MOX_OPAL',
                                  output_format='text')
Ejemplo n.º 11
0
def test_english_request():
    """ Test case of starting server with Spanish defaults, and then requesting default English properties """
    with corenlp.CoreNLPClient(properties='spanish',
                               server_id='test_english_request') as client:
        ann = client.annotate(EN_DOC,
                              properties_key='english',
                              output_format='text')
        compare_ignoring_whitespace(ann, EN_DOC_GOLD)
Ejemplo n.º 12
0
def test_preload():
    """ Test that the default annotators load fully immediately upon server start """
    with corenlp.CoreNLPClient(
            server_id='test_server_start_preload') as client:
        # wait for annotators to load
        time.sleep(140)
        results = annotate_and_time(client, EN_DOC)
        compare_ignoring_whitespace(results['annotation'], EN_PRELOAD_GOLD)
        assert results['end_time'] - results['start_time'] < 3
Ejemplo n.º 13
0
def test_external_server_try_start():
    """ Test starting up a server with a client with start_server=StartServer.TRY_START """
    corenlp_home = os.getenv('CORENLP_HOME')
    with corenlp.CoreNLPClient(
            start_server=corenlp.StartServer.TRY_START,
            annotators='tokenize,ssplit,pos',
            endpoint="http://localhost:9001") as external_server_client:
        ann = external_server_client.annotate(TEXT,
                                              annotators='tokenize,ssplit,pos',
                                              output_format='text')
    assert ann.strip() == EN_GOLD
Ejemplo n.º 14
0
def test_external_server():
    """ Test starting up an external server and accessing with a client with start_server=False """
    corenlp_home = os.getenv('CORENLP_HOME')
    start_cmd = f'java -Xmx5g -cp "{corenlp_home}/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 ' \
                f'-timeout 60000 -server_id stanza_external_server -serverProperties {SERVER_TEST_PROPS}'
    start_cmd = start_cmd and shlex.split(start_cmd)
    external_server_process = subprocess.Popen(start_cmd)
    with corenlp.CoreNLPClient(start_server=False, endpoint="http://localhost:9001") as external_server_client:
        ann = external_server_client.annotate(TEXT, annotators='tokenize,ssplit,pos', output_format='text')
        assert ann.strip() == EN_GOLD
    assert external_server_process
    external_server_process.kill()
Ejemplo n.º 15
0
def test_codepoints():
    """ Test case of asking for codepoints from the English tokenizer """
    with corenlp.CoreNLPClient(
            annotators=['tokenize', 'ssplit'],  # 'depparse','coref'],
            properties={'tokenize.codepoint': 'true'}) as client:
        ann = client.annotate(codepoint_doc)
        for i, (codepoints, characters) in enumerate(
                zip(expected_codepoints, expected_characters)):
            token = ann.sentence[0].token[i]
            assert token.codepointOffsetBegin == codepoints[0]
            assert token.codepointOffsetEnd == codepoints[1]
            assert token.beginChar == characters[0]
            assert token.endChar == characters[1]
Ejemplo n.º 16
0
def test_external_server_force_start():
    """ Test starting up an external server and accessing with a client with start_server=StartServer.FORCE_START """
    corenlp_home = os.getenv('CORENLP_HOME')
    start_cmd = f'java -Xmx5g -cp "{corenlp_home}/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 ' \
                f'-timeout 60000 -server_id stanza_external_server -serverProperties {SERVER_TEST_PROPS}'
    start_cmd = start_cmd and shlex.split(start_cmd)
    external_server_process = subprocess.Popen(start_cmd)
    time.sleep(5) # wait and make sure the external CoreNLP server is up and running
    with pytest.raises(corenlp.PermanentlyFailedException):
        with corenlp.CoreNLPClient(start_server=corenlp.StartServer.FORCE_START, endpoint="http://localhost:9001") as external_server_client:
            ann = external_server_client.annotate(TEXT, annotators='tokenize,ssplit,pos', output_format='text')
    assert external_server_process
    external_server_process.terminate()
    external_server_process.wait(5)
Ejemplo n.º 17
0
def test_codepoint_text():
    """ Test case of extracting the correct sentence text using codepoints """

    text = 'Unban mox opal 🐱.  This is a second sentence.'

    with corenlp.CoreNLPClient(annotators=["tokenize", "ssplit"],
                               properties={'tokenize.codepoint':
                                           'true'}) as client:
        ann = client.annotate(text)

        text_start = ann.sentence[0].token[0].codepointOffsetBegin
        text_end = ann.sentence[0].token[-1].codepointOffsetEnd
        sentence_text = text[text_start:text_end]
        assert sentence_text == 'Unban mox opal 🐱.'

        text_start = ann.sentence[1].token[0].codepointOffsetBegin
        text_end = ann.sentence[1].token[-1].codepointOffsetEnd
        sentence_text = text[text_start:text_end]
        assert sentence_text == 'This is a second sentence.'
Ejemplo n.º 18
0
def test_username_password():
    """ Test starting a server with a username and password """
    with corenlp.CoreNLPClient(
            properties=USERNAME_PASS_PROPS,
            username='******',
            password='******',
            server_id="test_server_username_pass") as client:
        # check with correct password
        ann = client.annotate(EN_DOC,
                              output_format='text',
                              username='******',
                              password='******')
        assert ann.strip() == USERNAME_PASS_GOLD.strip()
        # check with incorrect password, should throw AnnotationException
        try:
            ann = client.annotate(EN_DOC,
                                  output_format='text',
                                  username='******',
                                  password='******')
            assert False
        except AnnotationException as ae:
            pass
        except Exception as e:
            assert False
Ejemplo n.º 19
0
def corenlp_client():
    """ Client to run tests on """
    client = corenlp.CoreNLPClient(annotators='tokenize,ssplit,pos',
                                   server_id='stanza_request_tests_server')
    yield client
    client.stop()
Ejemplo n.º 20
0
def test_context_manager():
    with corenlp.CoreNLPClient(
            annotators="tokenize,ssplit",
            endpoint="http://localhost:9001") as context_client:
        ann = context_client.annotate(TEXT)
        assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]
Ejemplo n.º 21
0
def test_no_duplicate_servers():
    """We expect a second server on the same port to fail"""
    with pytest.raises(corenlp.PermanentlyFailedException):
        with corenlp.CoreNLPClient(
                annotators="tokenize,ssplit") as duplicate_server:
            raise RuntimeError("This should have failed")
Ejemplo n.º 22
0
def test_context_manager():
    with corenlp.CoreNLPClient(annotators="tokenize,ssplit") as context_client:
        ann = context_client.annotate(TEXT)
        assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]