Ejemplo n.º 1
0
    def load(self):
        """Load and clean the reports."""
        collection = bioc.BioCCollection()
        reports = pd.read_csv(self.reports_path,
                              header=None,
                              names=[REPORTS])[REPORTS].tolist()

        for i, report in enumerate(reports):
            clean_report = self.clean(report)
            document = text2bioc.text2document(str(i), clean_report)

            if self.extract_impression:
                document = section_split.split_document(document)
                self.extract_impression_from_passages(document)

            split_document = self.splitter.split_doc(document)

            assert len(split_document.passages) == 1,\
                ('Each document must have a single passage, ' +
                 'the Impression section.')

            collection.add_document(split_document)

        self.reports = reports
        self.collection = collection
Ejemplo n.º 2
0
def get_bioc_collection(df):
    collection = bioc.BioCCollection()
    splitter = NegBioSSplitter()
    for i, report in enumerate(df["Report Impression"]):
        document = text2bioc.text2document(str(i), report)
        document = splitter.split_doc(document)
        collection.add_document(document)
    return collection
Ejemplo n.º 3
0
 def load(self):
     collection = bioc.BioCCollection()
     for i, sentence in enumerate(self.report):
         clean_report = self.clean(sentence)
         document = text2bioc.text2document(str(i), clean_report)
         split_document = self.splitter.split_doc(document)
         assert len(split_document.passages) == 1,\
             ('Each document must have a single passage, ' +
              'the Impression section.')
         collection.add_document(split_document)
     self.collection = collection
Ejemplo n.º 4
0
    def prep_collection(self):
        """Apply splitter and create bioc collection"""
        collection = bioc.BioCCollection()
        for i, report in enumerate(self.reports):
            clean_report = self.clean(report)
            document = text2bioc.text2document(str(i), clean_report)

            if self.extract_impression:
                document = section_split.split_document(document)
                self.extract_impression_from_passages(document)

            split_document = self.splitter.split_doc(document)

            assert len(split_document.passages) == 1,\
                ('Each document must have a single passage, ' +
                 'the Impression section.')

            collection.add_document(split_document)
        self.collection = collection