def marking_false_negatives(current_false_negatives, modifiers, targets):
    fn_report_results = []
    print('Marking up False Negatives')
    for anno_doc in current_false_negatives.values():
        report_context = markup_context_document(anno_doc.text, modifiers, targets)
        # package this up into a class that the RadNLP utilities can use
        results = classrslts(context_document=report_context, exam_type="Chest X-Ray", report_text=anno_doc.text, classification_result='N/A')
        fn_report_results.append(results)
    return fn_report_results
Example #2
0
 def classify_doc(self, doc, doc_name='t_m_p.txt'):
     self.last_doc_name = doc_name
     if self.modifiers is None or self.targets is None:
         print('DocumentClassifier\'s "modifiers" and/or "targets" has not been set yet.\n' +
               'Use function: setModifiersTargets(modifiers, targets) or setModifiersTargetsFromFiles(modifiers_file,' + 'targets_file) to set them up.')
     context_doc = markup_context_document(doc, self.modifiers, self.targets)
     if doc_name is not None and self.save_markups and len(context_doc.getDocumentGraph().nodes()) > 0:
         self.saved_markups_map[doc_name] = context_doc
     markups = get_document_markups(context_doc)
     annotations, relations, doc_txt = convertMarkups2DF(markups)
     matched_conclusion_types = self.feature_inferencer.process(annotations, relations)
     doc_conclusion = self.document_inferencer.process(matched_conclusion_types)
     return doc_conclusion
Example #3
0
class DocumentClassifier(object):
    def __init__(self, targets=None, modifiers=None, feature_inference_rule=None, document_inference_rule=None,
                 expected_values=None, save_markups=True):
        self.document_inferencer = DocumentInferencer(document_inference_rule)
        self.feature_inferencer = FeatureInferencer(feature_inference_rule)
        self.conclusions = []
        self.modifiers = modifiers
        self.targets = targets
        self.save_markups = save_markups
        self.expected_values = [value.lower() for value in expected_values]
        self.saved_markups_map = dict()
        self.last_doc_name = ''

        if modifiers is not None and targets is not None:
            if isinstance(modifiers, str) and isinstance(targets, str):
                if (modifiers.endswith('.csv') or modifiers.endswith('.tsv') or modifiers.endswith(
                        '.txt') or modifiers.endswith('.yml')) \
                        and (targets.endswith('.csv') or targets.endswith('.tsv') or targets.endswith(
                    '.txt') or targets.endswith('.yml')):
                    self.setModifiersTargetsFromFiles(modifiers, targets)
            else:
                self.setModifiersTargets(modifiers, targets)

    def setModifiersTargets(self, modifiers, targets):
        self.modifiers = modifiers
        self.targets = targets

    def setModifiersTargetsFromFiles(self, modifiers_file, targets_file):
        self.targets = get_item_data(targets_file)
        self.modifiers = get_item_data(modifiers_file)

    def reset_saved_predictions(self):
        self.saved_markups_map = {}
        self.save_markups = True
        self.expected_value = None

    def predict(self, doc, doc_name='t_m_p.txt'):
        self.last_doc_name = doc_name
        doc_conclusion = self.classify_doc(doc, doc_name)
        if doc_conclusion in self.expected_values:
            return 1
        return 0

    def eval(self, gold_docs):
        import sklearn
        import pandas as pd
        fn_docs = []
        fp_docs = []
        prediction_metrics = []
        gold_labels = [x.positive_label for x in gold_docs.values()]
        pred_labels = []
        print('Start to evaluate against reference standards...')
        for doc_name, gold_doc in gold_docs.items():
            gold_label = gold_doc.positive_label
            pred_label = self.predict(gold_doc.text, doc_name)
            pred_labels.append(pred_label)
            #       Differentiate false positive and false negative error
            if gold_label == 0 and pred_label == 1:
                fp_docs.append(doc_name)
            elif gold_label == 1 and pred_label == 0:
                fn_docs.append(doc_name)

        precision = sklearn.metrics.precision_score(gold_labels, pred_labels)
        recall = sklearn.metrics.recall_score(gold_labels, pred_labels)
        f1 = sklearn.metrics.f1_score(gold_labels, pred_labels)
        # Let's use Pandas to make a confusion matrix for us
        confusion_matrix_df = pd.crosstab(pd.Series(gold_labels, name='Actual'),
                                          pd.Series(pred_labels, name='Predicted'))
        prediction_metrics.append('Precision : {0:.3f}'.format(precision))
        prediction_metrics.append('Recall :    {0:.3f}'.format(recall))
        prediction_metrics.append('F1:         {0:.3f}'.format(f1))

        return fn_docs, fp_docs, '\n'.join(prediction_metrics), confusion_matrix_df)

    def predict_against(self, doc, expected_values, doc_name='t_m_p.txt'):
        doc_conclusion = self.classify_doc(doc, doc_name)
        if doc_conclusion in expected_values:
            return 1
        return 0

    def classify_doc(self, doc, doc_name='t_m_p.txt'):
        self.last_doc_name = doc_name
        if self.modifiers is None or self.targets is None:
            print('DocumentClassifier\'s "modifiers" and/or "targets" has not been set yet.\n' +
                  'Use function: setModifiersTargets(modifiers, targets) or setModifiersTargetsFromFiles(modifiers_file,' + 'targets_file) to set them up.')
        context_doc = markup_context_document(doc, self.modifiers, self.targets)
        if doc_name is not None and self.save_markups and len(context_doc.getDocumentGraph().nodes()) > 0:
            self.saved_markups_map[doc_name] = context_doc
        markups = get_document_markups(context_doc)
        annotations, relations, doc_txt = convertMarkups2DF(markups)
        matched_conclusion_types = self.feature_inferencer.process(annotations, relations)
        doc_conclusion = self.document_inferencer.process(matched_conclusion_types)
        return doc_conclusion