def corenlp_client(): """ Client to run tests on """ client = corenlp.CoreNLPClient( annotators='tokenize,ssplit,pos,lemma,ner,depparse', server_id='stanfordnlp_main_test_server') yield client client.stop()
def test_tokensregex(): with corenlp.CoreNLPClient( annotators='tokenize ssplit ner depparse'.split(), timeout=60000) as client: # Example pattern from: https://nlp.stanford.edu/software/tokensregex.shtml pattern = '([ner: PERSON]+) /wrote/ /an?/ []{0,3} /sentence|article/' matches = client.tokensregex(TEXT, pattern) assert len(matches["sentences"]) == 1 assert matches["sentences"][0]["length"] == 1 assert matches == { "sentences": [ { "0": { "text": "Chris wrote a simple sentence", "begin": 0, "end": 5, "1": { "text": "Chris", "begin": 0, "end": 1 } }, "length": 1 }, ] }
def corenlp_client(): """ Client to run tests on """ client = corenlp.CoreNLPClient( annotators='tokenize,ssplit,pos', server_id='stanfordnlp_request_tests_server') client.register_properties_key('fr-custom', FRENCH_CUSTOM_PROPS) yield client client.stop()
def test_english_request(): """ Test case of starting server with Spanish defaults, and then requesting default English properties """ with corenlp.CoreNLPClient(properties='spanish', server_id='test_english_request') as client: ann = client.annotate(EN_DOC, properties_key='english', output_format='text') assert ann.strip() == EN_DOC_GOLD.strip()
def test_preload(): """ Test that the default annotators load fully immediately upon server start """ with corenlp.CoreNLPClient(server_id='test_server_start_preload') as client: # wait for annotators to load time.sleep(140) results = annotate_and_time(client, EN_DOC) assert results['annotation'].strip() == EN_PRELOAD_GOLD.strip() assert results['end_time'] - results['start_time'] < 1.5
def test_username_password(): """ Test starting a server with a username and password """ with corenlp.CoreNLPClient(properties=USERNAME_PASS_PROPS, username='******', password='******', server_id="test_server_username_pass") as client: # check with correct password ann = client.annotate(EN_DOC, output_format='text', username='******', password='******') assert ann.strip() == USERNAME_PASS_GOLD.strip() # check with incorrect password, should throw AnnotationException try: ann = client.annotate(EN_DOC, output_format='text', username='******', password='******') assert False except AnnotationException as ae: pass except Exception as e: assert False
def test_external_server(): """ Test starting up an external server and accessing with a client with start_server=False """ corenlp_home = os.getenv('CORENLP_HOME') start_cmd = f'java -Xmx5g -cp "{corenlp_home}/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 ' \ f'-timeout 60000 -server_id stanfordnlp_external_server -serverProperties {SERVER_TEST_PROPS}' start_cmd = start_cmd and shlex.split(start_cmd) external_server_process = subprocess.Popen(start_cmd) with corenlp.CoreNLPClient( start_server=False, endpoint="http://localhost:9001") as external_server_client: ann = external_server_client.annotate(TEXT, annotators='tokenize,ssplit,pos', output_format='text') assert ann.strip() == EN_GOLD assert external_server_process external_server_process.kill()
def test_semgrex(): with corenlp.CoreNLPClient(annotators='tokenize ssplit pos lemma ner depparse'.split(), timeout=60000) as client: pattern = '{word:wrote} >nsubj {}=subject >dobj {}=object' matches = client.semgrex(TEXT, pattern, to_words=True) assert matches == [ { "text": "wrote", "begin": 1, "end": 2, "$subject": { "text": "Chris", "begin": 0, "end": 1 }, "$object": { "text": "sentence", "begin": 4, "end": 5 }, "sentence": 0,}]
matches = corenlp_client.semgrex(text, pattern, to_words=True, annotators=props['annotators'], properties=props) print(matches) assert matches == [ { "text": "wrote", "begin": 1, "end": 2, "$subject": { "text": "Chris", "begin": 0, "end": 1 }, "$object": { "text": "sentence", "begin": 4, "end": 5 }, "sentence": 0, }] if __name__ == '__main__': sc = corenlp.CoreNLPClient(start_server=False, endpoint="http://localhost:9005") properties_key = 'english' props = {'pipelineLanguage': properties_key.lower(), 'annotators': 'tokenize,ssplit,pos,depparse,lemma,natlog,ner,openie', 'outputFormat': 'text' } test_tokensregex(sc, props, "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n") test_semgrex(sc, props, "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.")
def test_lang_start(): """ Test starting the server with a Stanford CoreNLP language name """ with corenlp.CoreNLPClient(properties='german', server_id='test_server_start_lang_name') as client: ann = client.annotate(GERMAN_DOC, output_format='text') assert ann.strip() == GERMAN_FULL_PROPS_GOLD.strip()
def __init__(self, host='localhost', port=9005): self.host = host self.port = port self.external_server_client = corenlp.CoreNLPClient( start_server=False, endpoint=f"http://{self.host}:{self.port}")
def test_python_dict(): """ Test starting the server with a Python dictionary as default properties """ with corenlp.CoreNLPClient(properties=GERMAN_SMALL_PROPS, server_id='test_server_start_python_dict') as client: ann = client.annotate(GERMAN_DOC, output_format='text') assert ann.strip() == GERMAN_SMALL_PROPS_GOLD.strip()
def test_update(): with corenlp.CoreNLPClient(annotators="tokenize ssplit".split()) as client: ann = client.annotate(TEXT) ann = client.update(ann) assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]
def test_connect(): with corenlp.CoreNLPClient() as client: client.ensure_alive() assert client.is_active assert client.is_alive()
def test_context_manager(): with corenlp.CoreNLPClient(annotators="tokenize,ssplit") as context_client: ann = context_client.annotate(TEXT) assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]
def test_python_dict_w_annotators(): """ Test starting the server with a Python dictionary as default properties, override annotators """ with corenlp.CoreNLPClient(properties=GERMAN_SMALL_PROPS, annotators="tokenize,ssplit", server_id='test_server_start_python_dict_w_annotators') as client: ann = client.annotate(GERMAN_DOC, output_format='text') assert ann.strip() == GERMAN_SMALL_PROPS_W_ANNOTATORS_GOLD.strip()
def test_props_file(): """ Test starting the server with a props file """ with corenlp.CoreNLPClient(properties=SERVER_TEST_PROPS, server_id='test_server_start_props_file') as client: ann = client.annotate(EN_DOC, output_format="text") assert ann.strip() == EN_PROPS_FILE_GOLD.strip()