Ejemplo n.º 1
0
    def get_data_df(self):
        doc = self.doc
        i_0 = self.first_i

        predicates = doc._.lex_matches
        n = len(predicates)

        t = Timer()
        t.start()

        data = [{
            'i': i_0 + tok.i,
            'sent_i': i_0 + tok.sent.start,
            't': i_0 + tok._.subsent_root.i,
            'neg': tok._.negated,
            'lemma': tok.lemma_,
            'text': tok.text,
            'R_agent': agent.root.text if agent else None,
            'R_patient': patient.root.text if patient else None,
            **{('L_' + doc.vocab[cat].text): 1.0
               for cat in tok._.lex},
        } for tok in predicates for agent in (tok._.agents or [None])
                for patient in (tok._.patients or [None])]

        table = pd.DataFrame(data).sort_values('i')

        predicate_cols = [c for c in list(table.columns) if c.startswith('L_')]
        table[predicate_cols] = table[predicate_cols].fillna(0)

        t.stop()
        logger.debug('%d predicates (%d distinct) [%s]', len(table.index), n,
                     t)

        return table
Ejemplo n.º 2
0
 def get_entities_df(self):
     t = Timer()
     t.start()
     ent_cls = proc_ent.entity_classifier(self.doc.vocab)
     df = pd.DataFrame(ent_cls(self.doc))
     t.stop()
     logger.debug('%d entities [%s]', len(df.index), t)
     return df