def test_props_file(): """ Test starting the server with a props file """ with corenlp.CoreNLPClient( properties=SERVER_TEST_PROPS, server_id='test_server_start_props_file') as client: ann = client.annotate(EN_DOC, output_format="text") assert ann.strip() == EN_PROPS_FILE_GOLD.strip()
def test_lang_start(): """ Test starting the server with a Stanford CoreNLP language name """ with corenlp.CoreNLPClient( properties='german', server_id='test_server_start_lang_name') as client: ann = client.annotate(GERMAN_DOC, output_format='text') assert ann.strip() == GERMAN_FULL_PROPS_GOLD.strip()
def test_english_request(): """ Test case of starting server with Spanish defaults, and then requesting default English properties """ with corenlp.CoreNLPClient( properties='spanish', server_id='test_spanish_english_request') as client: ann = client.annotate(EN_DOC, properties='english', output_format='text') compare_ignoring_whitespace(ann, EN_DOC_GOLD) # Rerun the test with a server created in English mode to verify # that the expected output is what the defaults actually give us with corenlp.CoreNLPClient(properties='english', server_id='test_english_request') as client: ann = client.annotate(EN_DOC, output_format='text') compare_ignoring_whitespace(ann, EN_DOC_GOLD)
def test_python_dict(): """ Test starting the server with a Python dictionary as default properties """ with corenlp.CoreNLPClient( properties=GERMAN_SMALL_PROPS, server_id='test_server_start_python_dict') as client: ann = client.annotate(GERMAN_DOC, output_format='text') assert ann.strip() == GERMAN_SMALL_PROPS_GOLD.strip()
def test_default_annotators(): """ Test case of creating a client with start_server=False and a set of annotators The annotators should be used instead of the server's default annotators """ with corenlp.CoreNLPClient(server_id='test_default_annotators', output_format='text', annotators=[ 'tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'depparse' ]) as client: with corenlp.CoreNLPClient(start_server=False, output_format='text', annotators=['tokenize', 'ssplit', 'pos']) as client2: ann = client2.annotate(EN_DOC)
def corenlp_client(): """ Client to run tests on """ client = corenlp.CoreNLPClient( annotators='tokenize,ssplit,pos,lemma,ner,depparse', server_id='stanza_main_test_server') yield client client.stop()
def test_lang_start(): """ Test starting the server with a Stanford CoreNLP language name """ with corenlp.CoreNLPClient( properties='german', server_id='test_server_start_lang_name') as client: ann = client.annotate(GERMAN_DOC, output_format='text') compare_ignoring_whitespace(ann, GERMAN_FULL_PROPS_GOLD)
def corenlp_client(): """ Client to run tests on """ client = corenlp.CoreNLPClient(annotators='tokenize,ssplit,pos', server_id='stanza_request_tests_server') client.register_properties_key('fr-custom', FRENCH_CUSTOM_PROPS) yield client client.stop()
def test_python_dict_w_annotators(): """ Test starting the server with a Python dictionary as default properties, override annotators """ with corenlp.CoreNLPClient( properties=GERMAN_SMALL_PROPS, annotators="tokenize,ssplit", server_id='test_server_start_python_dict_w_annotators') as client: ann = client.annotate(GERMAN_DOC, output_format='text') assert ann.strip() == GERMAN_SMALL_PROPS_W_ANNOTATORS_GOLD.strip()
def test_unknown_request(): """ Test case of starting server with Spanish defaults, and then requesting UNBAN_MOX_OPAL properties """ with corenlp.CoreNLPClient(properties='spanish', server_id='test_english_request') as client: with pytest.raises(ValueError): ann = client.annotate(EN_DOC, properties_key='UNBAN_MOX_OPAL', output_format='text')
def test_english_request(): """ Test case of starting server with Spanish defaults, and then requesting default English properties """ with corenlp.CoreNLPClient(properties='spanish', server_id='test_english_request') as client: ann = client.annotate(EN_DOC, properties_key='english', output_format='text') compare_ignoring_whitespace(ann, EN_DOC_GOLD)
def test_preload(): """ Test that the default annotators load fully immediately upon server start """ with corenlp.CoreNLPClient( server_id='test_server_start_preload') as client: # wait for annotators to load time.sleep(140) results = annotate_and_time(client, EN_DOC) compare_ignoring_whitespace(results['annotation'], EN_PRELOAD_GOLD) assert results['end_time'] - results['start_time'] < 3
def test_external_server_try_start(): """ Test starting up a server with a client with start_server=StartServer.TRY_START """ corenlp_home = os.getenv('CORENLP_HOME') with corenlp.CoreNLPClient( start_server=corenlp.StartServer.TRY_START, annotators='tokenize,ssplit,pos', endpoint="http://localhost:9001") as external_server_client: ann = external_server_client.annotate(TEXT, annotators='tokenize,ssplit,pos', output_format='text') assert ann.strip() == EN_GOLD
def test_external_server(): """ Test starting up an external server and accessing with a client with start_server=False """ corenlp_home = os.getenv('CORENLP_HOME') start_cmd = f'java -Xmx5g -cp "{corenlp_home}/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 ' \ f'-timeout 60000 -server_id stanza_external_server -serverProperties {SERVER_TEST_PROPS}' start_cmd = start_cmd and shlex.split(start_cmd) external_server_process = subprocess.Popen(start_cmd) with corenlp.CoreNLPClient(start_server=False, endpoint="http://localhost:9001") as external_server_client: ann = external_server_client.annotate(TEXT, annotators='tokenize,ssplit,pos', output_format='text') assert ann.strip() == EN_GOLD assert external_server_process external_server_process.kill()
def test_codepoints(): """ Test case of asking for codepoints from the English tokenizer """ with corenlp.CoreNLPClient( annotators=['tokenize', 'ssplit'], # 'depparse','coref'], properties={'tokenize.codepoint': 'true'}) as client: ann = client.annotate(codepoint_doc) for i, (codepoints, characters) in enumerate( zip(expected_codepoints, expected_characters)): token = ann.sentence[0].token[i] assert token.codepointOffsetBegin == codepoints[0] assert token.codepointOffsetEnd == codepoints[1] assert token.beginChar == characters[0] assert token.endChar == characters[1]
def test_external_server_force_start(): """ Test starting up an external server and accessing with a client with start_server=StartServer.FORCE_START """ corenlp_home = os.getenv('CORENLP_HOME') start_cmd = f'java -Xmx5g -cp "{corenlp_home}/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 ' \ f'-timeout 60000 -server_id stanza_external_server -serverProperties {SERVER_TEST_PROPS}' start_cmd = start_cmd and shlex.split(start_cmd) external_server_process = subprocess.Popen(start_cmd) time.sleep(5) # wait and make sure the external CoreNLP server is up and running with pytest.raises(corenlp.PermanentlyFailedException): with corenlp.CoreNLPClient(start_server=corenlp.StartServer.FORCE_START, endpoint="http://localhost:9001") as external_server_client: ann = external_server_client.annotate(TEXT, annotators='tokenize,ssplit,pos', output_format='text') assert external_server_process external_server_process.terminate() external_server_process.wait(5)
def test_codepoint_text(): """ Test case of extracting the correct sentence text using codepoints """ text = 'Unban mox opal 🐱. This is a second sentence.' with corenlp.CoreNLPClient(annotators=["tokenize", "ssplit"], properties={'tokenize.codepoint': 'true'}) as client: ann = client.annotate(text) text_start = ann.sentence[0].token[0].codepointOffsetBegin text_end = ann.sentence[0].token[-1].codepointOffsetEnd sentence_text = text[text_start:text_end] assert sentence_text == 'Unban mox opal 🐱.' text_start = ann.sentence[1].token[0].codepointOffsetBegin text_end = ann.sentence[1].token[-1].codepointOffsetEnd sentence_text = text[text_start:text_end] assert sentence_text == 'This is a second sentence.'
def test_username_password(): """ Test starting a server with a username and password """ with corenlp.CoreNLPClient( properties=USERNAME_PASS_PROPS, username='******', password='******', server_id="test_server_username_pass") as client: # check with correct password ann = client.annotate(EN_DOC, output_format='text', username='******', password='******') assert ann.strip() == USERNAME_PASS_GOLD.strip() # check with incorrect password, should throw AnnotationException try: ann = client.annotate(EN_DOC, output_format='text', username='******', password='******') assert False except AnnotationException as ae: pass except Exception as e: assert False
def corenlp_client(): """ Client to run tests on """ client = corenlp.CoreNLPClient(annotators='tokenize,ssplit,pos', server_id='stanza_request_tests_server') yield client client.stop()
def test_context_manager(): with corenlp.CoreNLPClient( annotators="tokenize,ssplit", endpoint="http://localhost:9001") as context_client: ann = context_client.annotate(TEXT) assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]
def test_no_duplicate_servers(): """We expect a second server on the same port to fail""" with pytest.raises(corenlp.PermanentlyFailedException): with corenlp.CoreNLPClient( annotators="tokenize,ssplit") as duplicate_server: raise RuntimeError("This should have failed")
def test_context_manager(): with corenlp.CoreNLPClient(annotators="tokenize,ssplit") as context_client: ann = context_client.annotate(TEXT) assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]