def process_document(self, document: Document, params: Dict[str, Any]): terms_index_name = params.get('terms_index', 'umls_terms') label_negated = document.get_labeler('negated') terms = document.get_label_index(terms_index_name) triggers = document.labels['negation_triggers'] deps = document.get_label_index('dependencies') upos_tags = document.get_label_index('upos_tags') with label_negated: for sentence in document.get_label_index('sentences'): sentence_terms = terms.inside(sentence) sentence_triggers = triggers.inside(sentence) if len(sentence_triggers) > 0: negations, _ = self.negex.check_sentence( sentence_terms, sentence_triggers, deps, upos_tags) for start_index, end_index in negations: label_negated(start_index, end_index)
def process_document(self, document: Document, params: Dict[str, Any]): label_trigger = document.get_labeler('negation_triggers') with label_trigger: for sentence in document.get_label_index('sentences'): triggers = self.negex.detect_negex_triggers(sentence.text) for start_index, end_index, tags in triggers: label_trigger(sentence.start_index + start_index, sentence.start_index + end_index, tags=tags)
def process_document(self, document: Document, params: Dict[str, Any]): terms_index_name = params.get('terms_index', 'umls_terms') label_negated = document.get_labeler('negated') label_trigger = document.get_labeler('negation_trigger') terms = document.get_label_index(terms_index_name) with label_negated, label_trigger: for sentence in document.get_label_index('sentences'): sentence_terms = [(t.start_index - sentence.start_index, t.end_index - sentence.start_index) for t in terms.inside(sentence)] negations, triggers = self.negex.check_sentence( sentence.text, sentence_terms) for start_index, end_index in negations: label_negated(sentence.start_index + start_index, sentence.start_index + end_index) for start_index, end_index in triggers: label_trigger(sentence.start_index + start_index, sentence.start_index + end_index)
def test_labeler_distinct(mocker): client = mocker.Mock(EventsClient) event = Event(event_id='1', client=client) document = Document(document_name='plaintext', text='The quick brown fox jumped over the lazy dog.', event=event) with document.get_labeler('index', distinct=True) as add_generic_label: add_generic_label(0, 10, x=1) add_generic_label(11, 15, x=2) add_generic_label(16, 20, x=3) labels = [ GenericLabel(0, 10, document=document, x=1), GenericLabel(11, 15, document=document, x=2), GenericLabel(16, 20, document=document, x=3) ] label_adapter = DistinctGenericLabelAdapter client.add_labels.assert_called_with(event_id='1', document_name='plaintext', index_name='index', labels=labels, adapter=label_adapter) assert document.get_label_index('index') == labels
def test_sentences_unknown_character(bi_lstm_model): document = Document('plaintext', text='• Sentence which contains unknown character.') bi_lstm_model.process_document(document, {}) assert document.get_label_index('sentences') == [GenericLabel(2, 44)]
# Copyright 2019 Regents of the University of Minnesota. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Hello world tutorial pipeline.""" import sys if __name__ == '__main__': from mtap import Document, Event, EventsClient, Pipeline, RemoteProcessor with EventsClient(address=sys.argv[1]) as client, \ Pipeline( RemoteProcessor(processor_id='hello', address=sys.argv[2]) ) as pipeline: with Event(event_id='1', client=client) as event: document = Document(document_name='name', text='YOUR NAME') event.add_document(document) pipeline.run(document) index = document.get_label_index('hello') for label in index: print(label.response)