コード例 #1
0
    def filter_articles(self,
                        ris_string,
                        ensemble_type="svm_cnn",
                        threshold_type='sensitive',
                        auto_use_ptyp=True,
                        remove_non_rcts=True):

        print('Parsing RIS data')
        ris_data = ris.loads(ris_string)
        import json
        with open("debug.json", 'w') as f:
            json.dumps(ris_data)
        preds = self.predict_ris(ris_data,
                                 ensemble_type=ensemble_type,
                                 threshold_type=threshold_type,
                                 auto_use_ptyp=auto_use_ptyp)
        out = []

        pred_key_map = {
            "score": "ZS",
            "model": "ZM",
            "threshold_type": "ZT",
            "threshold_value": "ZC",
            "is_rct": "ZR",
            "ptyp_rct": "ZP"
        }

        for ris_row, pred_row in zip(ris_data, preds):
            if remove_non_rcts == False or pred_row['is_rct']:
                ris_row.update(
                    {pred_key_map[k]: v
                     for k, v in pred_row.items()})

                out.append(ris_row)
        return ris.dumps(out)
コード例 #2
0
def test_calibration():
    print("Testing RobotSearch...")
    target_classes = ["svm", "cnn", "svm_cnn"]
    target_modes = ["balanced", "precise", "sensitive"]

    rct_bot = RCTRobot()

    print("Loading test PubMed file")
    with open(os.path.join(robotreviewer.DATA_ROOT, 'rct/pubmed_test.txt'),
              'r') as f:
        ris_string = f.read()

    print('Parsing RIS data')
    ris_data = ris.loads(ris_string)

    print("Loading expected results (from validation paper)")
    with open(
            os.path.join(robotreviewer.DATA_ROOT, 'rct/pubmed_expected.json'),
            'r') as f:
        expected_results = json.load(f)

    for target_class in target_classes:
        for target_mode in target_modes:
            for use_ptyp in [True, False]:

                expected_model_class = "{}_ptyp".format(
                    target_class) if use_ptyp else target_class

                print("Testing {} model; use_ptyp={}; mode={}".format(
                    target_class, use_ptyp, target_mode))
                data = rct_bot.predict_ris(ris_data,
                                           ensemble_type=target_class,
                                           threshold_type=target_mode,
                                           auto_use_ptyp=use_ptyp)

                exp_pmids = [str(r['PMID'][0]) for r in ris_data]
                obs_pmids = [
                    str(r['pmid']) for r in
                    expected_results[expected_model_class][target_mode]
                ]

                print("Number matching PMIDS: {}".format(
                    sum([i == j for i, j in zip(exp_pmids, obs_pmids)])))

                obs_score = np.array([r['score'] for r in data])
                obs_clf = np.array([r['is_rct'] for r in data])

                exp_score = np.array([
                    float(r['score']) for r in
                    expected_results[expected_model_class][target_mode]
                ])
                exp_clf = np.array([
                    r['is_rct'] for r in expected_results[expected_model_class]
                    [target_mode]
                ])

                print("Totals assessed: {} obs, {} exp".format(
                    len(obs_score), len(exp_score)))
                match_clf = np.sum(np.equal(obs_clf, exp_clf))

                disag = np.where((np.equal(obs_clf, exp_clf) == False))[0]
                hedges_y = np.array([
                    r['hedges_is_rct'] == '1' for r in
                    expected_results[expected_model_class][target_mode]
                ])

                exp_sens = np.sum(exp_clf[hedges_y]) / np.sum(hedges_y)
                exp_spec = np.sum(
                    np.invert(exp_clf)[np.invert(hedges_y)]) / np.sum(
                        np.invert(hedges_y))

                obs_sens = np.sum(obs_clf[hedges_y]) / np.sum(hedges_y)
                obs_spec = np.sum(
                    np.invert(obs_clf)[np.invert(hedges_y)]) / np.sum(
                        np.invert(hedges_y))

                print("Expected: sens {} spec {}".format(exp_sens, exp_spec))

                print("Observed: sens {} spec {}".format(obs_sens, obs_spec))