import torch from mtap import Document, processor, run_processor from mtap.processing import DocumentProcessor from mtap.processing.descriptions import labels, label_property from biomedicus.dependencies.stanza_parser import stanza_deps_and_upos_tags @processor('biomedicus-selective-dependencies', human_name="BioMedICUS Stanza Selective Dependency Parser", entry_point=__name__, description= "Calls out to the Stanford Stanza framework for dependency parsing" "on a appropriate subset of sentences.", inputs=[ labels(name='sentences', reference='biomedicus-sentences/sentences'), labels(name='umls_terms', reference='biomedicus-concepts/umls_terms', name_from_parameter='terms_index'), labels("negation_triggers", reference='biomedicus-negex-triggers') ], outputs=[ labels( name='dependencies', description="The dependent words.", properties=[ label_property('deprel', description="The dependency relation", data_type='str'), label_property(
@processor( name='biomedicus-deepen', human_name='DEEPEN Negation Detector', description='Detects which UMLS terms are negated.', entry_point=__name__, parameters=[ parameter( name='terms_index', data_type='str', description= 'The label index containing terms that should be checked for negation' ) ], inputs=[ labels(name='sentences', reference='biomedicus-sentences/sentences'), labels(name='dependencies', reference='biomedicus-selective-dependencies/dependencies'), labels(name='umls_terms', reference='biomedicus-concepts/umls_terms', name_from_parameter='terms_index') ], outputs=[ labels("negated", description="Spans of negated terms."), labels("negation_trigger", description="Spans of phrases that trigger negation.") ]) class DeepenProcessor(mtap.processing.DocumentProcessor): def __init__(self): self.negex = DeepenTagger()
for match in _split.finditer(text): split_timer.stop() start = match.start() local_text = text[prev:start] for ss, se in predict_segment(model, input_mapper, local_text, device): yield prev + ss, prev + se prev = match.end() split_timer.start() @processor('biomedicus-sentences', human_name="Sentence Detector", description="Labels sentences given document text.", entry_point=__name__, outputs=[labels('sentences')]) class SentenceProcessor(DocumentProcessor): def __init__(self, input_mapper: InputMapping, model: BiLSTM, device): self.input_mapper = input_mapper self.model = model self.device = device def process_document(self, document: Document, params: Dict[str, Any]): with document.get_labeler('sentences', distinct=True) as add_sentence: for start, end in predict_text(self.model, self.input_mapper, document.text, self.device): add_sentence(start, end) def bi_lstm_hparams_parser(): parser = ArgumentParser(add_help=False)
@processor( name='biomedicus-negex-triggers', human_name='Negex Triggers Tagger', description='Labels phrases which are negation triggers.', entry_point=__name__, parameters=[ parameter( name='terms_index', data_type='str', description= 'The label index containing terms that should be checked for negation' ) ], inputs=[ labels(name='sentences', reference='biomedicus-sentences/sentences'), labels(name='umls_terms', reference='biomedicus-concepts/umls_terms', name_from_parameter='terms_index') ], outputs=[ labels("negation_trigger", description="Spans of phrases that trigger negation.", properties=[ label_property( "tags", data_type='List[str]', description='The tags that apply to the trigger, ' 'for example: POST PREN') ]) ])
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Dict, Any import mtap from mtap.processing.descriptions import labels @mtap.processor('mtap-python-references-example', human_name='Python References Examples', description='Shows use of referential fields on labels.', outputs=[ labels('referenced'), labels('map_references'), labels('list_references'), labels('references') ]) class ReferencesExampleProcessor(mtap.DocumentProcessor): def process_document(self, document: mtap.Document, params: Dict[str, Any]): referenced = [ mtap.GenericLabel(0, 1), mtap.GenericLabel(1, 2), mtap.GenericLabel(2, 3), mtap.GenericLabel(3, 4) ] # references can be a map of strings to labels
from mtap.api.v1 import processing_pb2 from mtap.processing import DocumentProcessor, _runners from mtap.processing.descriptions import parameter, labels, label_property from mtap.processing._service import _ProcessorServicer @processor('mtap-test-processor', description='Processor desc.', parameters=[ parameter('a_param', required=True, data_type='bool', description="desc.") ], inputs=[ labels('input_index', properties=[label_property('bar', data_type='bool')]) ], outputs=[ labels('output_index', description='desc.', properties=[ label_property('foo', data_type='str', nullable=True, description='A label property.') ]) ]) class ExampleTestProcessor(DocumentProcessor): def process_document(self, document: Document, params: Dict[str, Any]): pass
stanza_sentence.tokens[0].start_char) token_end = (sentence.start_index + token.end_char - stanza_sentence.tokens[0].start_char) sentence_upos_tags.append( GenericLabel(token_begin, token_end, tag=word.upos)) return sentence_deps, sentence_upos_tags @processor( 'biomedicus-dependencies', human_name="BioMedICUS Stanza Dependency Parser", entry_point=__name__, description= "Calls out to the Stanford Stanza framework for dependency parsing.", inputs=[ labels(name='sentences', reference='biomedicus-sentences/sentences') ], outputs=[ labels(name='dependencies', description="The dependent words.", properties=[ label_property('deprel', description="The dependency relation", data_type='str'), label_property( 'head', description= "The head of this label or null if its the root.", nullable=True, data_type='ref:dependencies'), label_property(
_pattern = re.compile(r'^[\s]*(.*?)[\s]*$', re.MULTILINE) def get_sentences(text: str) -> List[Location]: for match in _pattern.finditer(text): yield match.start(1), match.end(1) @processor( name='biomedicus-sentences-one-per-line', human_name='One per Line Sentences', description= 'Labels sentences where each line in the input document is a sentence.', entry_point=__name__, outputs=[labels(name='sentences')]) class OnePerLineSentencesProcessor(mtap.processing.DocumentProcessor): def process_document(self, document: Document, params: Dict[str, Any]): with document.get_labeler('sentences') as sentence_labeler: for start, end in get_sentences(document.text): sentence_labeler(start, end) def main(args=None): proc = OnePerLineSentencesProcessor() mtap.run_processor(proc, args=args) if __name__ == '__main__': main()
from typing import Dict, Any, Optional import mtap from mtap.processing.descriptions import parameter, labels, label_property @mtap.processor('mtap-example-processor-python', human_name="Python Example Processor", description="counts the number of times the letters a and b occur in a document", parameters=[ parameter('do_work', required=True, data_type='bool', description="Whether the processor should do anything.") ], outputs=[ labels('mtap.examples.letter_counts', properties=[label_property('letter', data_type='str'), label_property('count', data_type='int')]) ]) class ExampleProcessor(mtap.DocumentProcessor): """Does some labeling of the counts of the letter 'a' and 'b' in a document. """ def process_document(self, document: mtap.Document, params: Dict[str, Any]) -> Optional[Dict[str, Any]]: if params['do_work']: with self.started_stopwatch('fetch_time'): text = document.text a_count = text.count('a') b_count = text.count('b')
from typing import Dict, Any import mtap import requests from mtap import DocumentProcessor, processor_parser, run_processor from mtap.processing.descriptions import labels, processor logger = logging.getLogger("biomedicus.sentences.bi_lstm_torchserve") @processor('biomedicus-sentences', human_name="Sentence Detector", description="Labels sentences given document text.", entry_point=__name__, outputs=[ labels('sentences') ]) class SentencesProcessor(DocumentProcessor): def __init__(self, torchserve_address): self.torchserve_address = torchserve_address logger.info('Using "%s" as endpoint', torchserve_address) def process_document(self, document: 'mtap.Document', params: Dict[str, Any]): payload = {'text': str(document.text)} r = requests.post(self.torchserve_address, data=payload) result = r.json() with document.get_labeler('sentences', distinct=True) as create_sentence: for begin, end in result: create_sentence(begin, end)