Esempio n. 1
0
    def test_online_coref(self):
        # Test text, sents, tokens input
        # utterances = [
        #     {'speaker_id': 1, 'text': 'I read an article today. It is about US politics.'},
        #     {'speaker_id': 2, 'tokens': [['What', 'does', 'it', 'say', 'about', 'US', 'politics', '?']]},  # Tokens
        #     {'speaker_id': 1, 'text': 'It talks about the US presidential election.'},
        #     {'speaker_id': 2, 'text': ['I am interested to hear.', 'Can you elaborate more?']},  # Sents
        #     {'speaker_id': 1, 'text': 'Sure! The presidential election is indeed interesting.'}
        # ]

        utterances = [{
            'speaker_id': 1,
            'tokens': [['I', 'read', 'an', 'article', 'today', '.']]
        }, {
            'speaker_id':
            2,
            'tokens':
            [['Can', 'you', 'tell', 'me', 'what', 'it', 'is', 'about', '?']]
        }]

        context = None
        tokens_to_date = []
        for turn, uttr in enumerate(utterances):
            if 'tokens' in uttr:
                input_doc = Input(
                    tokens=uttr['tokens'],
                    speaker_ids=uttr['speaker_id'],
                    coref_context=self.convert_output_to_context(context),
                    models=['ocr'])
            else:
                input_doc = Input(
                    text=uttr['text'],
                    speaker_ids=uttr['speaker_id'],
                    coref_context=self.convert_output_to_context(context),
                    models=['ocr'])
            output_doc = en_services.online_coref.predict_sequentially(
                input_doc, check_sanitization=True)
            print(json.dumps(output_doc))
            context = output_doc['ocr']

            # Print cluster text
            tokens_to_date += flatten(input_doc.tokens)
            for cluster in output_doc['ocr']['clusters']:
                for i in range(len(cluster)):
                    m1, m2 = tuple(cluster[i])
                    cluster[i] = (m1, m2, ' '.join(tokens_to_date[m1:m2]))
            print(f'cluster text: {output_doc["ocr"]["clusters"]}')
            print()
Esempio n. 2
0
def main():
    if len(sys.argv) == 1:
        sys.argv.append('--help')

    arg_parser = argparse.ArgumentParser(
        description='ELIT-{}'.format(__version__))
    task_parser = arg_parser.add_subparsers(dest="task",
                                            help='which task to perform?')
    parse_parser = task_parser.add_parser(
        name='parse', help='interactive parse per document')
    server_parser = task_parser.add_parser(
        name='serve',
        help='start http server',
        description='A http server for ELIT')
    server_parser.add_argument('--port', type=int, default=8000)
    server_parser.add_argument('--workers',
                               type=int,
                               default=1,
                               help='number of workers')

    args = arg_parser.parse_args()

    if args.task == 'parse':
        from elit.server.en_parser import service_parser
        for line in sys.stdin:
            line = line.strip()
            doc = service_parser.parse([Input(text=line)])[0]
            print(doc)
    elif args.task == 'serve':
        from elit.server import server
        server.run(port=args.port, workers=args.workers)
Esempio n. 3
0
 def test_sents_input(self):
     text = [
         "Emory NLP is a research lab in Atlanta, GA.",
         "It is founded by Jinho D. Choi in 2014.",
         'Dr. Choi is a professor at Emory University.'
     ]
     doc = en_services.parser.parse([Input(text=text)])[0]
     print(doc)
Esempio n. 4
0
 def test_doc_coref_sents(self):
     text = [
         "Emory NLP is a research lab in Atlanta, GA.",
         "It is founded by Jinho D. Choi in 2014.",
         'Dr. Choi is a professor at Emory University.'
     ]
     input_doc = Input(text=text, models=['dcr'])
     print(en_services.doc_coref.predict(input_doc))
Esempio n. 5
0
def main():
    text = [
        "Emory NLP is a research lab in Atlanta, GA. "
        "It is founded by Jinho D. Choi in 2014. Dr. Choi is a professor at Emory University."
    ]
    input = Input(text=text)
    input.models = ['lem']
    docs = en_services.parser.parse([input])
    for doc in docs:
        print(doc)

    # See elit.client for coreference examples
    text = 'Pfizer said last week it may need the U.S. government to help it secure some components needed to ' \
           'make the vaccine. While the company halved its 2020 production target due to manufacturing issues, ' \
           'it said last week its manufacturing is running smoothly now. The government also has the option to ' \
           'acquire up to an additional 400 million doses of the vaccine.'
    input_doc = Input(text=text, models=['dcr'])
    doc = service_doc_coref.predict(input_doc)
    print(doc)
Esempio n. 6
0
 def test_doc_coref_tokens(self):
     tokens = [[
         "Emory", "NLP", "is", "a", "research", "lab", "in", "Atlanta", ",",
         "GA", "."
     ],
               [
                   "It", "is", "founded", "by", "Jinho", "D.", "Choi", "in",
                   "2014", ".", "Dr.", "Choi", "is", "a", "professor", "at",
                   "Emory", "University", "."
               ]]
     input_doc = Input(tokens=tokens, models=['dcr'])
     print(en_services.doc_coref.predict(input_doc))
Esempio n. 7
0
 def test_tokens_input(self):
     tokens = [[
         "Emory", "NLP", "is", "a", "research", "lab", "in", "Atlanta", ",",
         "GA", "."
     ],
               [
                   "It", "is", "founded", "by", "Jinho", "D.", "Choi", "in",
                   "2014", "."
               ],
               [
                   "Dr.", "Choi", "is", "a", "professor", "at", "Emory",
                   "University", "."
               ]]
     doc = en_services.parser.parse([Input(tokens=tokens)])[0]
     print(doc)
Esempio n. 8
0
    def test_doc_coref_concurrent(self):

        batch_size = 32
        inputs = [Input(text=self.get_sample_text(), models=['dcr'])
                  ] * batch_size

        start_time = time.time()
        docs = en_services.doc_coref.predict(inputs)
        end_time = time.time()
        print(
            f'Concurrent doc coref time elapse for {batch_size} small documents: {end_time - start_time :.2f}s'
        )

        assert len(docs) == len(inputs)
        print(docs[0])
        print(docs[-1])
Esempio n. 9
0
 def test_tokens_input(self):
     tokens = [
         "yes i do what 's your job".split(),
     ]
     doc = en_services.parser.parse([Input(tokens=tokens)])[0]
     print(doc)
Esempio n. 10
0
async def parse(text: str):
    input = Input(text=text)
    output: Document = await runner.process_input(input)
    if not isinstance(output, Document):
        raise HandlingError("Internal Server Error", code=500)
    return output.to_dict()