Esempio n. 1
0
class DefaultPipeline:
    """The biomedicus default pipeline for processing clinical documents.

    Attributes
        events_client (mtap.EventsClient): An MTAP events client used by the pipeline.
        pipeline (mtap.Pipeline): An MTAP pipeline to use to process documents.

    """
    def __init__(self, conf: PipelineConf, *, events_client: EventsClient = None):
        conf.populate_addresses()
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [
            (conf.sentences_id, conf.sentences_address),
            (conf.section_headers_id, conf.section_headers_address),
            (conf.tagger_id, conf.tagger_address),
            (conf.acronyms_id, conf.acronyms_address),
            (conf.concepts_id, conf.concepts_address),
            (conf.negation_id, conf.negation_address),
            (conf.selective_dependencies_id, conf.selective_dependencies_address),
            (conf.deepen_id, conf.deepen_address)
        ]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline]
            )
        else:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier, address=addr) for identifier, addr in pipeline]
            )
        if conf.serializer is not None:
            serialization_proc = SerializationProcessor(get_serializer(conf.serializer),
                                                        conf.output_directory,
                                                        include_label_text=conf.include_label_text)
            ser_comp = LocalProcessor(serialization_proc, component_id='serializer',
                                      client=self.events_client)
            self.pipeline.append(ser_comp)

    def process_text(self, text: str, *, event_id: str = None) -> ProcessingResult:
        with Event(event_id=event_id, client=self.events_client) as event:
            document = event.create_document('plaintext', text=text)
            f = self.pipeline.run(document)
        return f

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.pipeline.close()
        if self.close_client:
            self.events_client.close()
Esempio n. 2
0
class DefaultPipeline:
    def __init__(self,
                 conf: DefaultPipelineConf,
                 *,
                 events_client: EventsClient = None):
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [(conf.sentences_id, conf.sentences_address),
                    (conf.tagger_id, conf.tagger_address),
                    (conf.acronyms_id, conf.acronyms_address),
                    (conf.concepts_id, conf.concepts_address),
                    (conf.negation_id, conf.negation_address)]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline],
                n_threads=conf.threads)
        else:
            self.pipeline = Pipeline(*[
                RemoteProcessor(identifier, address=addr)
                for identifier, addr in pipeline
            ],
                                     n_threads=conf.threads)
        if conf.serializer is not None:
            serialization_proc = SerializationProcessor(
                get_serializer(conf.serializer),
                conf.output_directory,
                include_label_text=conf.include_label_text)
            ser_comp = LocalProcessor(serialization_proc,
                                      component_id='serializer',
                                      client=self.events_client)
            self.pipeline.append(ser_comp)

    def process_text(self,
                     text: str,
                     *,
                     event_id: str = None) -> ProcessingResult:
        with Event(event_id=event_id, client=self.events_client) as event:
            document = event.create_document('plaintext', text=text)
            f = self.pipeline.run(document)
        return f

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.pipeline.close()
        if self.close_client:
            self.events_client.close()
Esempio n. 3
0
class DefaultPipeline:
    """The biomedicus default pipeline for processing clinical documents.

    Attributes
        events_client (mtap.EventsClient): An MTAP events client used by the pipeline.
        pipeline (mtap.Pipeline): An MTAP pipeline to use to process documents.

    """
    def __init__(self,
                 conf: PipelineConf,
                 *,
                 events_client: EventsClient = None):
        conf.populate_addresses()
        if events_client is not None:
            self.close_client = False
            self.events_client = events_client
        elif conf.events_address is not None:
            self.close_client = True
            self.events_client = EventsClient(address=conf.events_address)
        else:
            raise ValueError("Events client or address not specified.")

        pipeline = [(conf.sentences_id, conf.sentences_address),
                    (conf.tagger_id, conf.tagger_address)]
        if conf.use_discovery:
            self.pipeline = Pipeline(
                *[RemoteProcessor(identifier) for identifier, _ in pipeline])
        else:
            self.pipeline = Pipeline(*[
                RemoteProcessor(identifier, address=addr)
                for identifier, addr in pipeline
            ])

    def process_text(self,
                     text: str,
                     *,
                     event_id: str = None) -> ProcessingResult:
        with Event(event_id=event_id, client=self.events_client) as event:
            document = event.create_document('plaintext', text=text)
            f = self.pipeline.run(document)
        return f

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.pipeline.close()
        if self.close_client:
            self.events_client.close()