Beispiel #1
0
    def __init__(self,
                 conf_path: Union[str, Path],
                 output_directory: Union[str, Path],
                 *,
                 events_address: Optional[str] = None,
                 events_client: EventsClient = None,
                 serializer: Optional[str] = None,
                 include_label_text: bool = False):
        if events_address == 'None' or events_address == 'none' or events_address == 'null' or events_address == '':
            events_address = None
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        else:
            self.close_client = True
            self.events_client = EventsClient(address=events_address)

        self.pipeline = Pipeline.from_yaml_file(conf_path)

        if serializer == 'None':
            serializer = None
        if serializer is not None:
            serialization_proc = SerializationProcessor(
                get_serializer(serializer),
                output_directory,
                include_label_text=include_label_text)
            ser_comp = LocalProcessor(serialization_proc,
                                      component_id='serializer',
                                      client=self.events_client)
            self.pipeline.append(ser_comp)
Beispiel #2
0
class DefaultPipeline:
    """The biomedicus default pipeline for processing clinical documents.

    Attributes
        events_client (mtap.EventsClient): An MTAP events client used by the pipeline.
        pipeline (mtap.Pipeline): An MTAP pipeline to use to process documents.

    """
    def __init__(self, conf: PipelineConf, *, events_client: EventsClient = None):
        conf.populate_addresses()
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [
            (conf.sentences_id, conf.sentences_address),
            (conf.section_headers_id, conf.section_headers_address),
            (conf.tagger_id, conf.tagger_address),
            (conf.acronyms_id, conf.acronyms_address),
            (conf.concepts_id, conf.concepts_address),
            (conf.negation_id, conf.negation_address),
            (conf.selective_dependencies_id, conf.selective_dependencies_address),
            (conf.deepen_id, conf.deepen_address)
        ]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline]
            )
        else:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier, address=addr) for identifier, addr in pipeline]
            )
        if conf.serializer is not None:
            serialization_proc = SerializationProcessor(get_serializer(conf.serializer),
                                                        conf.output_directory,
                                                        include_label_text=conf.include_label_text)
            ser_comp = LocalProcessor(serialization_proc, component_id='serializer',
                                      client=self.events_client)
            self.pipeline.append(ser_comp)

    def process_text(self, text: str, *, event_id: str = None) -> ProcessingResult:
        with Event(event_id=event_id, client=self.events_client) as event:
            document = event.create_document('plaintext', text=text)
            f = self.pipeline.run(document)
        return f

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.pipeline.close()
        if self.close_client:
            self.events_client.close()
Beispiel #3
0
class DefaultPipeline:
    def __init__(self,
                 conf: DefaultPipelineConf,
                 *,
                 events_client: EventsClient = None):
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [(conf.sentences_id, conf.sentences_address),
                    (conf.tagger_id, conf.tagger_address),
                    (conf.acronyms_id, conf.acronyms_address),
                    (conf.concepts_id, conf.concepts_address),
                    (conf.negation_id, conf.negation_address)]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline],
                n_threads=conf.threads)
        else:
            self.pipeline = Pipeline(*[
                RemoteProcessor(identifier, address=addr)
                for identifier, addr in pipeline
            ],
                                     n_threads=conf.threads)
        if conf.serializer is not None:
            serialization_proc = SerializationProcessor(
                get_serializer(conf.serializer),
                conf.output_directory,
                include_label_text=conf.include_label_text)
            ser_comp = LocalProcessor(serialization_proc,
                                      component_id='serializer',
                                      client=self.events_client)
            self.pipeline.append(ser_comp)

    def process_text(self,
                     text: str,
                     *,
                     event_id: str = None) -> ProcessingResult:
        with Event(event_id=event_id, client=self.events_client) as event:
            document = event.create_document('plaintext', text=text)
            f = self.pipeline.run(document)
        return f

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.pipeline.close()
        if self.close_client:
            self.events_client.close()
Beispiel #4
0
class DefaultPipeline:
    """The biomedicus default pipeline for processing clinical documents.

    Attributes
        events_client (mtap.EventsClient): An MTAP events client used by the pipeline.
        pipeline (mtap.Pipeline): An MTAP pipeline to use to process documents.

    """
    def __init__(self,
                 conf_path: Union[str, Path],
                 output_directory: Union[str, Path],
                 *,
                 events_address: Optional[str] = None,
                 events_client: EventsClient = None,
                 serializer: Optional[str] = None,
                 include_label_text: bool = False):
        if events_address == 'None' or events_address == 'none' or events_address == 'null' or events_address == '':
            events_address = None
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        else:
            self.close_client = True
            self.events_client = EventsClient(address=events_address)

        self.pipeline = Pipeline.from_yaml_file(conf_path)

        if serializer == 'None':
            serializer = None
        if serializer is not None:
            serialization_proc = SerializationProcessor(
                get_serializer(serializer),
                output_directory,
                include_label_text=include_label_text)
            ser_comp = LocalProcessor(serialization_proc,
                                      component_id='serializer',
                                      client=self.events_client)
            self.pipeline.append(ser_comp)

    def process_text(self,
                     text: str,
                     *,
                     event_id: str = None) -> ProcessingResult:
        with Event(event_id=event_id, client=self.events_client) as event:
            document = event.create_document('plaintext', text=text)
            f = self.pipeline.run(document)
        return f

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.pipeline.close()
        if self.close_client:
            self.events_client.close()
Beispiel #5
0
class DefaultPipeline:
    """The biomedicus default pipeline for processing clinical documents.

    Attributes
        events_client (mtap.EventsClient): An MTAP events client used by the pipeline.
        pipeline (mtap.Pipeline): An MTAP pipeline to use to process documents.

    """
    def __init__(self,
                 conf: PipelineConf,
                 *,
                 events_client: EventsClient = None):
        conf.populate_addresses()
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [(conf.sentences_id, conf.sentences_address),
                    (conf.tagger_id, conf.tagger_address)]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline])
        else:
            self.pipeline = Pipeline(*[
                RemoteProcessor(identifier, address=addr)
                for identifier, addr in pipeline
            ])

    def process_text(self,
                     text: str,
                     *,
                     event_id: str = None) -> ProcessingResult:
        with Event(event_id=event_id, client=self.events_client) as event:
            document = event.create_document('plaintext', text=text)
            f = self.pipeline.run(document)
        return f

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.pipeline.close()
        if self.close_client:
            self.events_client.close()
Beispiel #6
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument('input',
                        metavar='INPUT_FILE',
                        help='The input GENIA XML file.')
    parser.add_argument('--events',
                        metavar='EVENTS',
                        default=None,
                        help='The address of the events service.')
    parser.add_argument('--tnt-trainer',
                        metavar='TRAINER',
                        default=None,
                        help='The address of the TnT trainer.')
    args = parser.parse_args(args)
    etree = ElementTree.parse(args.input)
    set = etree.getroot()
    with EventsClient(args.events) as client, Pipeline(
            RemoteProcessor('biomedicus-tnt-trainer',
                            address=args.tnt_trainer)) as pipeline:
        for article in set.findall('article'):
            id = list(article.find('articleinfo'))[0].text
            with Event(id, client) as event:
                db = DocumentBuilder()
                for sentence in article.find('title').findall(
                        'sentence') + article.find('abstract').findall(
                            'sentence'):
                    db.add_sentence(sentence)
                d = db.build_doc(event)
                pipeline.run(d)
Beispiel #7
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument("input_directory", metavar="INPUT_DIR")
    parser.add_argument("output_directory", metavar="OUTPUT_DIR")
    parser.add_argument("--events")
    parser.add_argument("--tagger")
    parser.add_argument("--sentences")
    parser.add_argument("--acronyms")
    parser.add_argument("--norms")
    parser.add_argument("--concepts")
    args = parser.parse_args(args)

    input_dir = Path(args.input_directory)
    with EventsClient(address=args.events) as client, Pipeline(
            RemoteProcessor('biomedicus-sentences', address=args.sentences),
            RemoteProcessor('biomedicus-tnt-tagger', address=args.tagger),
            RemoteProcessor('biomedicus-acronyms', address=args.acronyms),
            RemoteProcessor('biomedicus-concepts', address=args.concepts),
            LocalProcessor(SerializationProcessor(
                JsonSerializer, output_dir=args.output_directory),
                           component_id='serialize',
                           client=client)) as pipeline:
        for path in input_dir.glob("**/*.txt"):
            print("READING FILE:", str(path))
            with path.open('r') as f:
                contents = f.read()
            with Event(event_id=path.stem, client=client) as event:
                document = event.create_document("plaintext", text=contents)
                pipeline.run(document)

        pipeline.print_times()
def test_dependencies(events_service, dependencies_service, test_results):
    test_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'dependencies'
    uas = Accuracy('UAS', equivalence_test=uas_equal)
    las = Accuracy('LAS', equivalence_test=las_equal)
    with EventsClient(address=events_service) as client, \
            Pipeline(
                RemoteProcessor(processor_id='biomedicus-dependencies',
                                address=dependencies_service),
                LocalProcessor(Metrics(uas, las, tested='dependencies', target='gold_dependencies'),
                               component_id='accuracy', client=client)
            ) as pipeline:
        for test_file in test_dir.glob('**/*.pickle'):
            with PickleSerializer.file_to_event(test_file,
                                                client=client) as event:
                document = event.documents['plaintext']
                results = pipeline.run(document)
                accuracy_dict = results.component_result(
                    'accuracy').result_dict
                print('Results for document: UAS: {}. LAS: {}.'.format(
                    accuracy_dict['UAS'], accuracy_dict['LAS']))

    print('UAS:', uas.value)
    print('LAS:', las.value)
    timing_info = pipeline.processor_timer_stats(
        'biomedicus-dependencies').timing_info
    test_results['biomedicus-dependencies'] = {
        'UAS': uas.value,
        'LAS': las.value,
        'Corpus': "MiPACQ converted to UD from PTB test set",
        'Remote Call Duration': str(timing_info['remote_call'].mean),
        'Process Method Duration': str(timing_info['process_method'].mean)
    }
def test_tnt_performance(events_service, pos_tags_service, test_results):
    input_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'pos_tags'
    accuracy = Accuracy()
    with EventsClient(address=events_service) as client, Pipeline(
            RemoteProcessor(processor_id='biomedicus-tnt-tagger',
                            address=pos_tags_service,
                            params={'token_index': 'gold_tags'}),
            LocalProcessor(Metrics(accuracy,
                                   tested='pos_tags',
                                   target='gold_tags'),
                           component_id='metrics'),
            events_client=client) as pipeline:
        for test_file in input_dir.glob('**/*.pickle'):
            event = PickleSerializer.file_to_event(test_file, client=client)
            with event:
                document = event.documents['gold']
                results = pipeline.run(document)
                print(
                    'Accuracy for event - ', event.event_id, ':',
                    results.component_result(
                        'metrics').result_dict['accuracy'])

        print('Accuracy:', accuracy.value)
        pipeline.print_times()
        timing_info = pipeline.processor_timer_stats(
            'biomedicus-tnt-tagger').timing_info
        test_results['TnT Pos Tagger'] = {
            'Accuracy': accuracy.value,
            'Remote Call Duration': str(timing_info['remote_call'].mean),
            'Process Method Duration': str(timing_info['process_method'].mean)
        }
        assert accuracy.value > 0.9
Beispiel #10
0
def test_concepts_performance(events_service, concepts_service, test_results):
    input_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'concepts'
    recall = Accuracy(name='recall', mode='any', fields=['cui'])
    precision = Accuracy(name='precision', mode='any', fields=['cui'])
    with EventsClient(address=events_service) as client, \
            Pipeline(
                RemoteProcessor(processor_id='biomedicus-concepts', address=concepts_service),
                LocalProcessor(Metrics(recall, tested='umls_concepts', target='gold_concepts'),
                               component_id='metrics'),
                LocalProcessor(Metrics(precision, tested='gold_concepts', target='umls_concepts'),
                               component_id='metrics_reverse'),
                events_client=client
            ) as pipeline:
        for test_file in input_dir.glob('**/*.pickle'):
            with PickleSerializer.file_to_event(test_file, client=client) as event:
                document = event.documents['plaintext']
                pipeline.run(document)

    print('Precision:', precision.value)
    print('Recall:', recall.value)
    timing_info = pipeline.processor_timer_stats('biomedicus-concepts').timing_info
    test_results['Concepts'] = {
        'Precision': precision.value,
        'Recall': recall.value,
        'Remote Call Duration': str(timing_info['remote_call'].mean),
        'Process Method Duration': str(timing_info['process_method'].mean)
    }
    assert recall.value > 0.6
Beispiel #11
0
def test_java_references(python_events, java_references_processor):
    with EventsClient(address=python_events) as client, Pipeline(
        RemoteProcessor('mtap-java-reference-labels-example-processor',
                        address=java_references_processor)
    ) as pipeline:
        with Event(event_id='1', client=client) as event:
            document = event.create_document('plaintext', 'abcd')
            pipeline.run(document)
            references = document.labels['references']
            assert references[0].a == GenericLabel(0, 1)
            assert references[0].b == GenericLabel(1, 2)
            assert references[1].a == GenericLabel(2, 3)
            assert references[1].b == GenericLabel(3, 4)

            map_references = document.labels['map_references']
            assert map_references[0].ref == {
                'a': GenericLabel(0, 1),
                'b': GenericLabel(1, 2),
                'c': GenericLabel(2, 3),
                'd': GenericLabel(3, 4)
            }

            list_references = document.labels['list_references']
            assert list_references[0].ref == [GenericLabel(0, 1), GenericLabel(1, 2)]
            assert list_references[1].ref == [GenericLabel(2, 3), GenericLabel(3, 4)]
def test_modification_detector_performance(events_service, modification_detector_service,
                                           test_results):
    input_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'negation' / 'i2b2_2010'
    confusion = metrics.FirstTokenConfusion()
    metrics_processor = metrics.Metrics(confusion, tested='negated', target='i2b2concepts',
                                        target_filter=is_negated)
    with EventsClient(address=events_service) as client, Pipeline(
            RemoteProcessor('biomedicus-negation', address=modification_detector_service,
                            params={'terms_index': 'i2b2concepts'}),
            LocalProcessor(metrics_processor, component_id='metrics', client=client)
    ) as pipeline:
        for test_file in input_dir.glob('**/*.pickle'):
            with PickleSerializer.file_to_event(test_file, client=client) as event:
                document = event.documents['plaintext']
                results = pipeline.run(document)
                print('F1 for event - "{}": {:0.3f} - elapsed: {}'.format(
                    event.event_id,
                    results.component_result('metrics').result_dict['first_token_confusion']['f1'],
                    results.component_result('biomedicus-negation').timing_info['process_method']
                ))

        print('Overall Precision:', confusion.precision)
        print('Overall Recall:', confusion.recall)
        print('Overall F1:', confusion.f1)
        pipeline.print_times()
        timing_info = pipeline.processor_timer_stats('biomedicus-negation').timing_info
        test_results['biomedicus-modification'] = {
            'Gold Standard': "2010 i2b2-VA",
            'Precision': confusion.precision,
            'Recall': confusion.recall,
            'F1': confusion.f1,
            'Per-Document Mean Remote Call Duration': str(timing_info['remote_call'].mean),
            'Per-Document Mean Process Method Duration': str(timing_info['process_method'].mean)
        }
Beispiel #13
0
def test_sentence_performance(events_service, sentences_service, test_results):
    input_dir = Path(os.environ['BIOMEDICUS_TEST_DATA']) / 'sentences'

    confusion = metrics.FirstTokenConfusion()
    with EventsClient(address=events_service) as client, Pipeline(
            RemoteProcessor(processor_id='biomedicus-sentences',
                            address=sentences_service),
            LocalProcessor(metrics.Metrics(confusion,
                                           tested='sentences',
                                           target='Sentence'),
                           component_id='metrics',
                           client=client)) as pipeline:
        for test_file in input_dir.glob('**/*.json'):
            with JsonSerializer.file_to_event(test_file,
                                              client=client) as event:
                document = event.documents['plaintext']
                results = pipeline.run(document)
                print('F1 for event - "{}": {:0.3f} - elapsed: {}'.format(
                    event.event_id,
                    results[1].results['first_token_confusion']['f1'],
                    results[0].timing_info['process_method']))

        print('Overall Precision:', confusion.precision)
        print('Overall Recall:', confusion.recall)
        print('Overall F1:', confusion.f1)
        pipeline.print_times()
        timing_info = pipeline.processor_timer_stats()[0].timing_info
        test_results['Sentences'] = {
            'Precision': confusion.precision,
            'Recall': confusion.recall,
            'F1': confusion.f1,
            'Remote Call Duration': str(timing_info['remote_call'].mean),
            'Process Method Duration': str(timing_info['process_method'].mean)
        }
        assert confusion.f1 > 0.85
Beispiel #14
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument('--events-service')
    parser.add_argument('--sentences-service')
    parser.add_argument('--dependencies-service')
    parser.add_argument('input_file')
    conf = parser.parse_args(args)

    with EventsClient(address=conf.events_service) as client, \
            Pipeline(
                RemoteProcessor('biomedicus-sentences', address=conf.sentences_service),
                RemoteProcessor('biomedicus-dependencies', address=conf.dependencies_service)
            ) as pipeline:
        with open(conf.input_file, 'r') as in_f:
            txt = in_f.read()
        with Event(event_id=Path(conf.input_file).name,
                   client=client) as event:
            document = event.create_document('plaintext', txt)
            pipeline.run(document)
            for sentence in document.labels['sentences']:
                print(sentence.text)
                print('\n')
                for dependency in document.labels['dependencies'].inside(
                        sentence):
                    print((dependency.text, dependency.deprel,
                           dependency.head.text
                           if dependency.head is not None else 'ROOT'))
                print('\n')
def test_acronyms_performance(events_service, acronyms_service, test_results):
    input_dir = Path(os.environ['BIOMEDICUS_PHI_TEST_DATA']) / 'acronyms'
    top_score_accuracy = Accuracy(name='top_score_accuracy',
                                  fields=['expansion'])
    any_accuracy = Accuracy(name='any_accuracy',
                            mode='any',
                            fields=['expansion'])
    detection_recall = Accuracy(name='detection_recall',
                                mode='location',
                                fields=['expansion'])
    detection_precision = Accuracy(name='detection_precision',
                                   mode='location',
                                   fields=['expansion'])
    with EventsClient(address=events_service) as client, Pipeline(
            RemoteProcessor(processor_id='biomedicus-acronyms',
                            address=acronyms_service),
            LocalProcessor(Metrics(top_score_accuracy,
                                   detection_recall,
                                   tested='acronyms',
                                   target='gold_acronyms'),
                           component_id='top_score_metrics',
                           client=client),
            LocalProcessor(Metrics(detection_precision,
                                   tested='gold_acronyms',
                                   target='acronyms'),
                           component_id='top_score_reverse',
                           client=client),
            LocalProcessor(Metrics(any_accuracy,
                                   tested='all_acronym_senses',
                                   target='gold_acronyms'),
                           component_id='all_senses_metrics',
                           client=client)) as pipeline:
        for test_file in input_dir.glob('**/*.json'):
            with JsonSerializer.file_to_event(test_file,
                                              client=client) as event:
                document = event.documents['plaintext']
                pipeline.run(document)

        print('Top Sense Accuracy:', top_score_accuracy.value)
        print('Any Sense Accuracy:', any_accuracy.value)
        print('Detection Recall:', detection_recall.value)
        print('Detection Precision:', detection_precision.value)
        pipeline.print_times()
        timing_info = pipeline.processor_timer_stats(
            'biomedicus-acronyms').timing_info
        test_results['acronyms'] = {
            'Top sense accuracy': top_score_accuracy.value,
            'Any sense accuracy': any_accuracy.value,
            'Detection Recall': detection_recall.value,
            'Detection Precision': detection_precision.value,
            'Remote Call Duration': str(timing_info['remote_call'].mean),
            'Process Method Duration': str(timing_info['process_method'].mean)
        }
        assert top_score_accuracy.value > 0.4
        assert any_accuracy.value > 0.4
        assert detection_recall.value > 0.65
Beispiel #16
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument("input_directory", metavar="INPUT_DIR")
    parser.add_argument("concepts_csv", metavar="PATH_TO_CONCEPTS_CSV")
    parser.add_argument("output_directory", metavar="OUTPUT_DIR")
    parser.add_argument("--sentences")
    parser.add_argument("--tagger")
    parser.add_argument("--acronyms")
    parser.add_argument("--events")

    ns = parser.parse_args(args)

    print('Reading concepts csv...')
    concepts = {}
    with open(ns.concepts_csv, 'r') as f:
        for line in f.readlines():
            splits = line.split(',')
            end = splits[0]
            start = splits[1]
            cui = splits[5]
            identifier = splits[6]
            try:
                v = concepts[identifier]
            except KeyError:
                v = []
                concepts[identifier] = v
            v.append((start, end, cui))

    print('Reading mipacq source files...')
    with EventsClient(address=ns.events) as client, \
            Pipeline(
                RemoteProcessor('biomedicus-sentences', address=ns.sentences),
                RemoteProcessor('biomedicus-tnt-tagger', address=ns.tagger),
                RemoteProcessor('biomedicus-acronyms', address=ns.acronyms),
                LocalProcessor(SerializationProcessor(PickleSerializer,
                                                      output_dir=ns.output_directory),
                               component_id='serialize',
                               client=client)
            ) as pipeline:
        for path in Path(ns.input_directory).glob('**/*.source'):
            identifier = path.stem.split('-')[0]
            try:
                doc_concepts = concepts[identifier]
            except KeyError:
                continue
            with Event(event_id=identifier, client=client) as event:
                with path.open('r') as f:
                    text = f.read()
                document = event.create_document('plaintext', text)
                with document.get_labeler('gold_concepts') as label_concept:
                    for start, end, cui in doc_concepts:
                        label_concept(start, end, cui=cui)
                pipeline.run(document)
    def __init__(self,
                 conf: PipelineConf,
                 *,
                 events_client: EventsClient = None):
        conf.populate_addresses()
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [(conf.sentences_id, conf.sentences_address),
                    (conf.section_headers_id, conf.section_headers_address),
                    (conf.tagger_id, conf.tagger_address),
                    (conf.acronyms_id, conf.acronyms_address),
                    (conf.concepts_id, conf.concepts_address),
                    (conf.negation_id, conf.negation_address),
                    (conf.selective_dependencies_id,
                     conf.selective_dependencies_address),
                    (conf.deepen_id, conf.deepen_address)]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline])
        else:
            self.pipeline = Pipeline(*[
                RemoteProcessor(identifier, address=addr)
                for identifier, addr in pipeline
            ])
        if conf.serializer is not None:
            serialization_proc = SerializationProcessor(
                get_serializer(conf.serializer),
                conf.output_directory,
                include_label_text=conf.include_label_text)
            ser_comp = LocalProcessor(serialization_proc,
                                      component_id='serializer',
                                      client=self.events_client)
            self.pipeline.append(ser_comp)
Beispiel #18
0
    def __init__(self,
                 conf: PipelineConf,
                 *,
                 events_client: EventsClient = None):
        conf.populate_addresses()
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [(conf.sentences_id, conf.sentences_address),
                    (conf.tagger_id, conf.tagger_address)]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline])
        else:
            self.pipeline = Pipeline(*[
                RemoteProcessor(identifier, address=addr)
                for identifier, addr in pipeline
            ])
Beispiel #19
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument('input',
                        metavar='INPUT_FOLDER',
                        help='A folder containing PTB formatted documents.')
    parser.add_argument('--glob', metavar='GLOB', default='*.mrg')
    parser.add_argument('--source-name',
                        metavar='DOCUMENT_NAME',
                        default='source',
                        help='What document to dump the PTB text into.')
    parser.add_argument(
        '--target-name',
        metavar='DOCUMENT_NAME',
        default='plaintext',
        help='What document to the plaintext and annotations into.')
    parser.add_argument('--events',
                        metavar='EVENTS',
                        default=None,
                        help='The address of the events service.')
    parser.add_argument('--ptb-reader',
                        metavar='READER',
                        default=None,
                        help='The address of the PTB Reader.')
    parser.add_argument('--tnt-trainer',
                        metavar='TRAINER',
                        default=None,
                        help='The address of the TnT trainer.')
    args = parser.parse_args(args)
    with EventsClient(address=args.events) as client, Pipeline(
            RemoteProcessor('ptb-reader',
                            address=args.ptb_reader,
                            params={
                                'source_document_name': args.source_name,
                                'target_document_name': args.target_name
                            }),
            RemoteProcessor('biomedicus-tnt-trainer',
                            address=args.tnt_trainer,
                            params={'document_name':
                                    args.target_name})) as pipeline:
        for f in Path(args.input).rglob(args.glob):
            print('Reading:', f)
            with f.open('r') as r:
                text = r.read()
            with Event(event_id=f.name, client=client) as event:
                d = Document(args.source_name, text=text)
                event.add_document(d)
                pipeline.run(event)
def test_normalization(events_service, normalization_processor):
    with EventsClient(address=events_service) as client, \
            Pipeline(RemoteProcessor(processor_id='biomedicus_normalizer',
                                     address=normalization_processor)) as pipeline, \
            PickleSerializer.file_to_event(Path(__file__).parent / '97_95.pickle',
                                         client=client) as event:
        document = event.documents['plaintext']
        pipeline.run(document)
        for norm_form in document.get_label_index('norm_forms'):
            if norm_form.text == "according":
                assert norm_form.norm == "accord"
            if norm_form.text == "expressing":
                assert norm_form.norm == "express"
            if norm_form.text == "receiving":
                assert norm_form.norm == "receive"
            if norm_form.text == "days":
                assert norm_form.norm == "day"
Beispiel #21
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument('--events-service', default='localhost:10100')
    parser.add_argument('--sentences-service', default='localhost:10102')
    conf = parser.parse_args(args)
    with Pipeline(
            RemoteProcessor(
                'biomedicus-sentences',
                address=conf.sentences_service)) as pipeline, EventsClient(
                    address=conf.events_service) as events_client:
        text = sys.stdin.read()
        with Event(client=events_client) as event:
            doc = event.create_document('plaintext', text)
            result = pipeline.run(doc)
            for sentence in doc.get_label_index('sentences'):
                print('S: "', sentence.text, '"')
            for k, v in result[0].timing_info.items():
                print('{}: {}'.format(k, v))
Beispiel #22
0
def test_disc_pipeline(disc_python_events, disc_python_processor, disc_java_processor):
    with EventsClient(address=disc_python_events) as client, mtap.Pipeline(
            RemoteProcessor('mtap-example-processor-python', address='localhost:50501',
                            params={'do_work': True}),
            RemoteProcessor('mtap-example-processor-java', address='localhost:50502',
                            params={'do_work': True})
    ) as pipeline:
        with Event(event_id='1', client=client) as event:
            event.metadata['a'] = 'b'
            document = event.create_document('plaintext', PHASERS)
            pipeline.run(document)
            letter_counts = document.get_label_index('mtap.examples.letter_counts')
            a_counts = letter_counts[0]
            assert a_counts.count == 23
            b_counts = letter_counts[1]
            assert b_counts.count == 6
            pipeline.print_times()
            thes = document.get_label_index("mtap.examples.word_occurrences")
            assert thes[0].start_index == 121
            assert thes[0].end_index == 124
Beispiel #23
0
def main(args=None):
    parser = ArgumentParser(
        description=
        'Converts files from the i2b2/VA 2010 format to serialized MTAP events '
        'containing the ')
    parser.add_argument(
        'input_directory',
        type=Path,
        help=
        'An input directory containing a "txt" folder containing text files '
        'and an "ast" folder containing the assertions in the i2b2/VA '
        'pipe-delimited format.')
    parser.add_argument(
        'output_directory',
        type=Path,
        help='An output directory to write the serialized mtap events to.')
    parser.add_argument('--target-document', default='plaintext')
    parser.add_argument('--serializer',
                        default='pickle',
                        choices=standard_serializers.keys(),
                        help='The serializer to use.')
    parser.add_argument('--events', help="Address of the events client.")
    parser.add_argument('--tagger', help="Address of the pos tagger to use.")

    conf = parser.parse_args(args)

    serializer = standard_serializers[conf.serializer]

    with EventsClient(address=conf.events) as client, Pipeline(
            LocalProcessor(OnePerLineSentencesProcessor(),
                           component_id='sentences',
                           client=client),
            RemoteProcessor('biomedicus-tnt-tagger', address=conf.tagger),
            LocalProcessor(SerializationProcessor(
                serializer, output_dir=conf.output_directory),
                           component_id='serializer',
                           client=client)) as pipeline:
        results = pipeline.run_multithread(
            events(conf.input_directory, conf.target_document, client=client))
        pipeline.print_times()
Beispiel #24
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument('input',
                        metavar='INPUT_DIR',
                        help='A folder containing PTB formatted documents.')
    parser.add_argument('output',
                        metavar='OUTPUT_DIR',
                        help='A folder to write the json files to.')
    parser.add_argument('--glob', metavar='GLOB', default='*.mrg')
    parser.add_argument('--events',
                        metavar='EVENTS',
                        default=None,
                        help='The address of the events service.')
    parser.add_argument('--ptb-reader',
                        metavar='READER',
                        default=None,
                        help='The address of the PTB Reader.')
    args = parser.parse_args(args)
    with EventsClient(address=args.events) as client, Pipeline(
            RemoteProcessor('ptb-reader',
                            address=args.ptb_reader,
                            params={
                                'source_document_name': 'source',
                                'target_document_name': 'gold',
                                'pos_tags_index': 'gold_tags'
                            }),
            LocalProcessor(SerializationProcessor(JsonSerializer,
                                                  output_dir=args.output),
                           component_id='serializer',
                           client=client)) as pipeline:
        for f in Path(args.input).rglob(args.glob):
            print('Reading:', f)
            with f.open('r') as r:
                text = r.read()
            with Event(event_id=f.name, client=client) as event:
                d = Document('source', text=text)
                event.add_document(d)
                pipeline.run(event)
Beispiel #25
0
def main(args=None):
    parser = ArgumentParser()
    parser.add_argument("input_directory", metavar="INPUT_DIR")
    parser.add_argument("output_directory", metavar="OUTPUT_DIR")
    parser.add_argument("--events")
    parser.add_argument("--rtf")
    parser.add_argument("--tagger")
    parser.add_argument("--acronyms")
    parser.add_argument("--sentences")
    args = parser.parse_args(args)

    input_dir = Path(args.input_directory)
    with EventsClient(address=args.events) as client, Pipeline(
            RemoteProcessor('rtf-processor',
                            address=args.rtf,
                            params={
                                'binary_data_name': 'rtf',
                                'output_document_name': 'plaintext'
                            }),
            RemoteProcessor('sentences',
                            address=args.sentences,
                            params={'document_name': 'plaintext'}),
            RemoteProcessor('tnt-tagger',
                            address=args.tagger,
                            params={'document_name': 'plaintext'}),
            RemoteProcessor('acronyms', address=args.acronyms),
            LocalProcessor(SerializationProcessor(
                JsonSerializer, output_dir=args.output_directory),
                           component_id='serialize',
                           client=client)) as pipeline:
        for path in input_dir.glob("**/*.rtf"):
            with path.open('rb') as f:
                contents = f.read()
            with Event(event_id=path.stem, client=client) as event:
                event.binaries['rtf'] = contents
                pipeline.run(event)

        pipeline.print_times()
Beispiel #26
0
 def call():
     client = EventsClient(address='a', _pool=object(), _channel=events_channel)
     result = client.get_label_index_info(event_id='1', document_name='plaintext')
     return result
Beispiel #27
0
 def call():
     client = EventsClient(address='a', _pool=object(), _channel=events_channel)
     result = client.get_all_binary_data_names(event_id='1')
     return result
Beispiel #28
0
class Pipeline(MutableSequence['processing.ComponentDescriptor']):
    """An object which can be used to build and run a pipeline of remote and local processors.

    Pipelines are a :obj:`~typing.MutableSequence` containing
    one or more :obj:`~mtap.processing.pipeline.ComponentDescriptor`,
    a pipeline can be modified after creation using this functionality.

    Args:
        *components (ComponentDescriptor):
            A list of component descriptors created using :class:`RemoteProcessor` or
            :class:`LocalProcessor`.

    Keyword Args:
        name (~typing.Optional[str]): An optional name for the pipeline, defaults to 'pipeline'.
        config (~typing.Optional[Config]): An optional config override.

    Examples:
        Remote pipeline with name discovery:

        >>> with mtap.Events() as events, mtap.Pipeline(
        >>>         RemoteProcessor('processor-1-id'),
        >>>         RemoteProcessor('processor-2-id'),
        >>>         RemoteProcessor('processor-3-id')
        >>>     ) as pipeline:
        >>>     for txt in txts:
        >>>         with events.open_event() as event:
        >>>             document = event.add_document('plaintext', txt)
        >>>             results = pipeline.run(document)

        Remote pipeline using addresses:

        >>> with mtap.Events(address='localhost:50051') as events, mtap.Pipeline(
        >>>         RemoteProcessor('processor-1-name', address='localhost:50052'),
        >>>         RemoteProcessor('processor-2-id', address='localhost:50053'),
        >>>         RemoteProcessor('processor-3-id', address='localhost:50054')
        >>>     ) as pipeline:
        >>>     for txt in txts:
        >>>         event = events.open_event()
        >>>         document = event.add_document('plaintext', txt)
        >>>         results = pipeline.run(document)

        Modifying pipeline

        >>> pipeline = Pipeline(RemoteProcessor('foo', address='localhost:50000'),
                                RemoteProcessor('bar', address='localhost:50000'))
        >>> pipeline
        Pipeline(RemoteProcessor(processor_id='foo', address='localhost:50000', component_id=None, params=None),
                 RemoteProcessor(processor_id='bar', address='localhost:50000', component_id=None, params=None))
        >>> pipeline.append(RemoteProcessor('baz', address='localhost:50001'))
        >>> pipeline
        Pipeline(RemoteProcessor(processor_id='foo', address='localhost:50000', component_id=None, params=None),
                 RemoteProcessor(processor_id='bar', address='localhost:50000', component_id=None, params=None),
                 RemoteProcessor(processor_id='baz', address='localhost:50001', component_id=None, params=None))
        >>> del pipeline[1]
        >>> pipeline
        Pipeline(RemoteProcessor(processor_id='foo', address='localhost:50000', component_id=None, params=None),
                 RemoteProcessor(processor_id='baz', address='localhost:50001', component_id=None, params=None))
        >>> pipeline[1] = RemoteProcessor(processor_id='bar', address='localhost:50003')
        >>> pipeline
        Pipeline(RemoteProcessor(processor_id='foo', address='localhost:50000', component_id=None, params=None),
                 RemoteProcessor(processor_id='bar', address='localhost:50003', component_id=None, params=None))
        >>> pipeline += list(pipeline)  # Putting in a new list to prevent an infinite recursion
        >>> pipeline
        Pipeline(RemoteProcessor(processor_id='foo', address='localhost:50000', component_id=None, params=None),
                 RemoteProcessor(processor_id='bar', address='localhost:50003', component_id=None, params=None),
                 RemoteProcessor(processor_id='foo', address='localhost:50000', component_id=None, params=None),
                 RemoteProcessor(processor_id='bar', address='localhost:50003', component_id=None, params=None))

    Attributes:
        name (str): The pipeline's name.
    """
    __slots__ = [
        '_component_ids', 'name', '_component_descriptors', 'events_address',
        'mp_config', '_created_events_client', '_events_client', 'times_map',
        '__components'
    ]

    def __init__(self,
                 *components: 'processing.ComponentDescriptor',
                 name: Optional[str] = None,
                 events_address: Optional[str] = None,
                 events_client: Optional[EventsClient] = None,
                 mp_config: Optional[MpConfig] = None):
        self._component_ids = {}
        self.name = name or 'pipeline'
        self._component_descriptors = list(components)
        self.events_address = events_address
        self._created_events_client = False
        self._events_client = None
        if events_client is not None:
            self.events_client = events_client
        self.mp_config = mp_config or MpConfig()
        self.times_map = {}

    def __reduce__(self):
        return _create_pipeline, (self.name, self.events_address,
                                  self._events_client, self.mp_config) + tuple(
                                      self._component_descriptors)

    @staticmethod
    def from_yaml_file(conf_path: Union[pathlib.Path, str]) -> 'Pipeline':
        """Creates a pipeline from a yaml pipeline configuration file.

        Args:
            conf_path (str or pathlib.Path): The path to the configuration file.

        Returns:
            Pipeline object from the configuration.

        """
        conf_path = pathlib.Path(conf_path)
        from yaml import load
        try:
            from yaml import CLoader as Loader
        except ImportError:
            from yaml import Loader
        with conf_path.open('rb') as f:
            conf = load(f, Loader=Loader)
        return Pipeline.load_configuration(conf)

    @staticmethod
    def load_configuration(conf: Dict) -> 'Pipeline':
        """Creates a pipeline from a pipeline configuration dictionary.

        Args:
            conf (Dict): The pipeline configuration dictionary.

        Returns:
            Pipeline created from the configuration.

        """
        name = conf.get('name', None)
        events_address = conf.get('events_address', None) or conf.get(
            'events_addresses', None)
        components = []
        conf_components = conf.get('components', [])
        for conf_component in conf_components:
            components.append(
                RemoteProcessor(processor_id=conf_component['processor_id'],
                                address=conf_component['address'],
                                component_id=conf_component.get(
                                    'component_id', None),
                                params=dict(conf_component.get('params', {}))))
        mp_config = MpConfig.from_configuration(conf.get('mp_config', {}))
        return Pipeline(*components,
                        name=name,
                        events_address=events_address,
                        mp_config=mp_config)

    @property
    def events_client(self) -> EventsClient:
        if self._events_client is not None:
            return self._events_client
        self._created_events_client = True
        self._events_client = EventsClient(address=self.events_address)
        return self._events_client

    @events_client.setter
    def events_client(self, value: EventsClient):
        self._events_client = value

    @property
    def _components(self) -> 'List[processing.ProcessingComponent]':
        try:
            return self.__components
        except AttributeError:
            self.__components = [
                desc.create_pipeline_component(self._component_ids,
                                               lambda: self.events_client)
                for desc in self._component_descriptors
            ]
            return self.__components

    @_components.deleter
    def _components(self):
        for component in self.__components:
            component.close()
        del self.__components

    def run_multithread(self,
                        source: Union[Iterable[Union['mtap.Document',
                                                     'mtap.Event']],
                                      'processing.ProcessingSource'],
                        *,
                        params: Optional[Dict[str, Any]] = None,
                        show_progress: Optional[bool] = None,
                        total: Optional[int] = None,
                        close_events: Optional[bool] = None,
                        max_failures: Optional[int] = None,
                        workers: Optional[int] = None,
                        read_ahead: Optional[int] = None,
                        mp_context=None):
        """Runs this pipeline on a source which provides multiple documents / events.

        Concurrency is per-event, with each event being provided a thread which runs it through the
        pipeline.

        Args:
            source (~typing.Union[~typing.Iterable[~typing.Union[Event, Document]], ProcessingSource])
                A generator of events or documents to process. This should be an
                :obj:`~typing.Iterable` of either :obj:`Event` or :obj:`Document` objects or a
                :obj:`~mtap.processing.ProcessingSource`.
            params (~typing.Optional[dict[str, ~typing.Any]])
                Json object containing params specific to processing this event, the existing params
                dictionary defined in :func:`~PipelineBuilder.add_processor` will be updated with
                the contents of this dict.
            show_progress (~typing.Optional[bool])
                Whether to print a progress bar using tqdm.
            total (~typing.Optional[int])
                An optional argument indicating the total number of events / documents that will be
                provided by the iterable, for the progress bar.
            close_events (~typing.Optional[bool])
                Whether the pipeline should close events after they have been fully processed
                through all components.
            max_failures (~typing.Optional[int])
                The number of acceptable failures. Once this amount is exceeded processing will
                halt. Note that because of the nature of conccurrency processing may continue for a
                short amount of time before termination.
            workers (~typing.Optional[int])
                The number of threads to process documents on.
            read_ahead (~typing.Optional[int])
                The number of source documents to read ahead into memory before processing.
            mp_context (multiprocessing context, optional)
                An optional override for the multiprocessing context.

        Examples:
            >>> docs = list(Path('abc/').glob('*.txt'))
            >>> def document_source():
            >>>     for path in docs:
            >>>         with path.open('r') as f:
            >>>             txt = f.read()
            >>>         with Event(event_id=path.name, client=client) as event:
            >>>             doc = event.create_document('plaintext', txt)
            >>>             yield doc
            >>>
            >>> pipeline.run_multithread(document_source(), total=len(docs))

        """
        show_progress = show_progress if show_progress is not None else self.mp_config.show_progress
        close_events = close_events if close_events is not None else self.mp_config.close_events
        max_failures = max_failures if max_failures is not None else self.mp_config.max_failures
        workers = workers if workers is not None else self.mp_config.workers
        mp_context = (multiprocessing.get_context(
            self.mp_config.mp_start_method)
                      if mp_context is None else mp_context)
        read_ahead = read_ahead if read_ahead is not None else self.mp_config.read_ahead
        with _PipelineMultiRunner(self, source, params, show_progress, total,
                                  close_events, max_failures, workers,
                                  read_ahead, mp_context) as runner:
            runner.run()

    def run(
        self,
        target: Union['mtap.Event', 'mtap.Document'],
        *,
        params: Optional[Dict[str,
                              Any]] = None) -> 'processing.PipelineResult':
        """Processes the event/document using all of the processors in the pipeline.

        Args:
            target (~typing.Union[Event, Document]): Either an event or a document to process.
            params (dict[str, ~typing.Any]):
                Json object containing params specific to processing this event, the existing params
                dictionary defined in :func:`~PipelineBuilder.add_processor` will be updated with
                the contents of this dict.

        Returns:
            list[ProcessingResult]: The results of all the processors in the pipeline.

        Examples:
            >>> e = mtap.Event()
            >>> document = mtap.Document('plaintext', text="...", event=e)
            >>> with Pipeline(...) as pipeline:
            >>>     pipeline.run(document)
            >>>     # is equivalent to pipeline.run(document.event, params={'document_name': document.document_name})

            The 'document_name' param is used to indicate to :obj:`~mtap.DocumentProcessor`
            which document on the event to process.
        """
        event, params = _event_and_params(target, params)
        event_id = event.event_id

        result = self._run_by_event_id(event_id,
                                       event.event_service_instance_id, params)
        self._add_result_times(result)

        for component_result in result.component_results:
            try:
                event.add_created_indices(component_result.created_indices)
            except AttributeError:
                pass
        return result

    def _run_by_event_id(self, event_id, event_service_instance_id, params):
        start = datetime.now()
        results = [
            component.call_process(event_id, event_service_instance_id, params)
            for component in self._components
        ]
        total = datetime.now() - start
        results = [
            _base.ProcessingResult(identifier=component.component_id,
                                   result_dict=result[0],
                                   timing_info=result[1],
                                   created_indices=result[2])
            for component, result in zip(self._components, results)
        ]
        logger.debug('Finished processing event_id: %s', event_id)
        return _base.PipelineResult(results, total)

    def _add_result_times(self, result):
        times = {}
        for component_id, _, component_times, _ in result.component_results:
            times.update({
                component_id + ':' + k: v
                for k, v in component_times.items()
            })
        times[self.name + 'total'] = result.elapsed_time
        _timing.add_times(self.times_map, times)

    @overload
    def processor_timer_stats(self) -> 'List[processing.AggregateTimingInfo]':
        """Returns the timing information for all processors.

        Returns:
            List[AggregateTimingInfo]:
                A list of timing info objects, one for each processor, in the same order
                that the processors were added to the pipeline.
        """
        ...

    @overload
    def processor_timer_stats(
            self, identifier: str) -> 'processing.AggregateTimingInfo':
        """Returns the timing info for one processor.

        Args:
            identifier (Optional[str]): The pipeline component_id for the processor to return
                timing info.

        Returns:
            AggregateTimingInfo: The timing info for the specified processor.

        """
        ...

    def processor_timer_stats(self, identifier=None):
        if identifier is not None:
            aggregates = _timing.create_timer_stats(self.times_map,
                                                    identifier + ':')
            aggregates = {
                k[(len(identifier) + 1):]: v
                for k, v in aggregates.items()
            }
            return _base.AggregateTimingInfo(identifier=identifier,
                                             timing_info=aggregates)
        timing_infos = []
        for component in self._components:
            component_id = component.component_id
            aggregates = _timing.create_timer_stats(self.times_map,
                                                    component_id + ':')
            aggregates = {
                k[(len(component_id) + 1):]: v
                for k, v in aggregates.items()
            }
            timing_infos.append(
                _base.AggregateTimingInfo(identifier=component_id,
                                          timing_info=aggregates))

        return timing_infos

    def pipeline_timer_stats(self) -> 'processing.AggregateTimingInfo':
        """The aggregated statistics for the global runtime of the pipeline.

        Returns:
            AggregateTimingInfo: The timing stats for the global runtime of the pipeline.

        """
        pipeline_id = self.name
        aggregates = _timing.create_timer_stats(self.times_map, pipeline_id)
        aggregates = {k[len(pipeline_id):]: v for k, v in aggregates.items()}
        return _base.AggregateTimingInfo(identifier=self.name,
                                         timing_info=aggregates)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def close(self):
        """Closes any open connections to remote processors.
        """
        for component in self._components:
            try:
                component.close()
            except AttributeError:
                pass
        if self._created_events_client:
            self._events_client.close()

    def as_processor(self) -> 'processing.EventProcessor':
        """Returns the pipeline as a processor.

        Returns:
            EventProcessor: An event processor that can be added to other pipelines or hosted.
        """
        return _PipelineProcessor(self._components)

    def print_times(self):
        """Prints all of the times collected during this pipeline using :func:`print`.
        """
        self.pipeline_timer_stats().print_times()
        for pipeline_timer in self.processor_timer_stats():
            pipeline_timer.print_times()

    def __getitem__(self, item):
        return self._component_descriptors[item]

    def __setitem__(self, key, value):
        self._clear_components()
        self._component_descriptors[key] = value

    def __delitem__(self, key):
        self._clear_components()
        del self._component_descriptors[key]

    def __len__(self):
        return len(self._component_descriptors)

    def _clear_components(self):
        try:
            del self._components
        except AttributeError:
            pass

    def insert(self, index, o) -> None:
        self._clear_components()
        self._component_descriptors.insert(index, o)

    def __repr__(self):
        return "Pipeline(" + ', '.join(
            [repr(component)
             for component in self._component_descriptors]) + ')'
Beispiel #29
0
 def events_client(self) -> EventsClient:
     if self._events_client is not None:
         return self._events_client
     self._created_events_client = True
     self._events_client = EventsClient(address=self.events_address)
     return self._events_client
Beispiel #30
0
# Copyright 2019 Regents of the University of Minnesota.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Hello world tutorial pipeline."""
import sys

if __name__ == '__main__':
    from mtap import Document, Event, EventsClient, Pipeline, RemoteProcessor

    with EventsClient(address=sys.argv[1]) as client, \
            Pipeline(
                RemoteProcessor(processor_id='hello', address=sys.argv[2])
            ) as pipeline:
        with Event(event_id='1', client=client) as event:
            document = Document(document_name='name', text='YOUR NAME')
            event.add_document(document)
            pipeline.run(document)
            index = document.get_label_index('hello')
            for label in index:
                print(label.response)