model_input = sys.argv[1]
    input_directory = sys.argv[2]
    output_directory = sys.argv[3]

    # Find files.
    input_files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'):
            input_files.append(f)

    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)

    # Load model.
    print('Loading 12ECG model...')
    model = load_12ECG_model(model_input)

    # Iterate over files.
    print('Extracting 12ECG features...')
    num_files = len(input_files)

    for i, f in enumerate(input_files):
        print('    {}/{}...'.format(i+1, num_files))
        tmp_input_file = os.path.join(input_directory,f)
        data,header_data = load_challenge_data(tmp_input_file)
        current_label, current_score,classes = run_12ECG_classifier(data,header_data, model)
        # Save results.
        save_challenge_predictions(output_directory,f,current_score,current_label,classes)


    print('Done.')
Esempio n. 2
0
    # Find files.
    input_files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(
                os.path.join(input_directory, f)
        ) and not f.lower().startswith('.') and f.lower().endswith('mat'):
            input_files.append(f)

    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)

    classes = get_classes(input_directory, input_files)

    # Load model.
    print('Loading 12ECG model...')
    model = load_12ECG_model()

    # Iterate over files.
    print('Extracting 12ECG features...')
    num_files = len(input_files)

    for i, f in enumerate(input_files):
        print('    {}/{}...'.format(i + 1, num_files))
        tmp_input_file = os.path.join(input_directory, f)
        data, header_data = load_challenge_data(tmp_input_file)
        current_label, current_score = run_12ECG_classifier(
            data, header_data, classes, model)
        # Save results.
        save_challenge_predictions(output_directory, f, current_score,
                                   current_label, classes)
    print('Done.')
Esempio n. 3
0
def confusion_matrixes(input_directory):
    # Find files.
    input_files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(
                os.path.join(input_directory, f)
        ) and not f.lower().startswith('.') and f.lower().endswith('mat'):
            input_files.append(f)

    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)

    classes = get_classes(input_directory, input_files)
    classes = np.array(classes)
    print(classes)
    # Load model.
    print('Loading 12ECG model...')
    model = load_12ECG_model()

    # Iterate over files.
    print('Extracting 12ECG features...')
    num_files = len(input_files)
    results = np.asarray([[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                          [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]])
    confusion_matrix_labels = np.zeros((9, 9))
    confusion_matrix_scores = np.zeros((9, 9))
    bad = 0
    good = 0
    for i, f in enumerate(input_files):
        print('    {}/{}...'.format(i + 1, num_files))
        tmp_input_file = os.path.join(input_directory, f)
        data, header_data = load_challenge_data(tmp_input_file)
        current_label, current_score, real_out = run_12ECG_classifier(
            data, header_data, classes, model)
        confusion_matrix_labels[np.argmax(real_out)] += current_label
        confusion_matrix_scores[np.argmax(real_out)] += current_score
        if np.argmax(real_out) == np.argmax(current_label):
            good += 1
        else:
            bad += 1
        # Save results.
    for i in range(confusion_matrix_labels.shape[0]):
        confusion_matrix_labels[i] = confusion_matrix_labels[i] / np.sum(
            confusion_matrix_labels[i]) * 100
    df_cm = pd.DataFrame(confusion_matrix_labels,
                         index=[i for i in classes],
                         columns=[i for i in classes])
    plt.figure(figsize=(9, 9))
    svm = sn.heatmap(df_cm, annot=True)
    figure = svm.get_figure()
    figure.savefig('./cmatrix/labels/labels.png', dpi=400)

    df_cm = pd.DataFrame(confusion_matrix_scores,
                         index=[i for i in classes],
                         columns=[i for i in classes])
    plt.figure(figsize=(9, 9))
    svm = sn.heatmap(df_cm, annot=True)
    figure = svm.get_figure()
    figure.savefig('./cmatrix/scores/scores.png', dpi=400)
    print("Accuracy: %0.4f" % ((good / (good + bad))))
    print('Done.')
Esempio n. 4
0
def eval_all(tranches:Optional[str]=None) -> pd.DataFrame:
    """ finished, checked,

    Parameters:
    -----------
    tranches: str, optional,
        tranches for making the evaluation,
        can be one of "AB", "E", "F", or None (None defaults to "ABEF")
    """
    models = load_12ECG_model()
    dr = CR(TrainCfg.db_dir)
    ds_config = deepcopy(TrainCfg)
    if tranches:
        ds_config.tranches_for_training = tranches
    ds = CINC2020(config=ds_config, training=False)

    print("start collecting results...")
    time.sleep(3)

    truth_labels, truth_array = [], []
    binary_predictions, scalar_predictions = [], []
    classes = ModelCfg.full_classes
    # ds.records = ds.records[:10]  # for fast debug
    with tqdm(ds.records, total=len(ds.records)) as t:
        for rec in t:
            data_fp = dr.get_data_filepath(rec)
            data, header_data = load_challenge_data(data_fp)
            current_label, current_score, _ = \
                run_12ECG_classifier(data, header_data, models, verbose=0)
            binary_predictions.append(current_label)
            scalar_predictions.append(current_score)
            tl = dr.get_labels(rec, fmt='a')
            ta = list(repeat(0, len(classes)))
            for c in tl:
                ta[classes.index(c)] = 1
            truth_labels.append(tl)
            truth_array.append(ta)
    
    # gather results into a DataFrame
    print("gathering results into a `DataFrame`...")
    df_eval_res = pd.DataFrame(scalar_predictions)
    df_eval_res.columns = classes
    df_eval_res['binary_predictions'] = ''
    df_eval_res['truth_labels'] = ''
    classes = np.array(classes)
    for idx, row in df_eval_res.iterrows():
        df_eval_res.at[idx, 'binary_predictions'] = \
            classes[np.where(binary_predictions[idx]==1)[0]].tolist()
        df_eval_res.at[idx, 'truth_labels'] = truth_labels[idx]
    df_eval_res.index = ds.records

    classes = classes.tolist()
    auroc, auprc, accuracy, f_measure, f_beta_measure, g_beta_measure, challenge_metric = \
        evaluate_12ECG_score(
            classes=classes,
            truth=np.array(truth_array),
            scalar_pred=np.array(scalar_predictions),
            binary_pred=np.array(binary_predictions),
        )
    msg = f"""
        results on tranches {tranches or 'all'}:
        ------------------------------
        auroc:              {auroc}
        auprc:              {auprc}
        accuracy:           {accuracy}
        f_measure:          {f_measure}
        f_beta_measure:     {f_beta_measure}
        g_beta_measure:     {g_beta_measure}
        challenge_metric:   {challenge_metric}
        ----------------------------------------
    """
    print(msg)  # in case no logger

    return df_eval_res
Esempio n. 5
0
def eval_all_parallel(tranches:Optional[str]=None) -> pd.DataFrame:
    """
    since signal preprocessing in `special_detectors` already uses `multiprocessing`,
    it would raise
    ``AssertionError: daemonic processes are not allowed to have children``
    """
    batch_size = 16

    loaded_models = load_12ECG_model()
    dr = CR(TrainCfg.db_dir)
    ds_config = deepcopy(TrainCfg)
    if tranches:
        ds_config.tranches_for_training = tranches
    ds = CINC2020(config=ds_config, training=False)
    data_loader = DataLoader(
        dataset=ds,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=collate_fn,
        drop_last=False,
    )
    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    truth_array = np.array([]).reshape(0, len(ModelCfg.full_classes))
    binary_predictions = np.array([]).reshape(0, len(ModelCfg.full_classes))
    scalar_predictions = np.array([]).reshape(0, len(ModelCfg.full_classes))

    print("start collecting results...")
    time.sleep(3)

    with tqdm(total=len(ds)) as pbar:
        for step, (signals, labels) in enumerate(data_loader):
            signals = signals.to(device=device, dtype=_DTYPE)
            labels = labels.numpy()
            labels = extend_predictions(
                labels,
                ds.all_classes,
                ModelCfg.full_classes,
            )
            truth_array = np.concatenate((truth_array, labels))

            dl_scores = []
            for subset, model in loaded_models.items():
                model.eval()
                subset_scores, subset_bin = model.inference(signals)
                if subset in ModelCfg.tranche_classes.keys():
                    subset_scores = extend_predictions(
                        subset_scores,
                        ModelCfg.tranche_classes[subset],
                        ModelCfg.dl_classes,
                    )
                subset_scores = subset_scores[0]  # remove the batch dimension
                dl_scores.append(subset_scores)

            if "NSR" in ModelCfg.dl_classes:
                dl_nsr_cid = ModelCfg.dl_classes.index("NSR")
            elif "426783006" in ModelCfg.dl_classes:
                dl_nsr_cid = ModelCfg.dl_classes.index("426783006")
            else:
                dl_nsr_cid = None

            # TODO: make a classifier using the scores from the 4 different dl models
            dl_scores = np.max(np.array(dl_scores), axis=0)
            dl_conclusions = (dl_scores >= ModelCfg.bin_pred_thr).astype(int)

            # treat exceptional cases
            max_prob = dl_scores.max()
            if max_prob < ModelCfg.bin_pred_nsr_thr and dl_nsr_cid is not None:
                dl_conclusions[row_idx, dl_nsr_cid] = 1
            elif dl_conclusions.sum() == 0:
                dl_conclusions = ((dl_scores+ModelCfg.bin_pred_look_again_tol) >= max_prob)
                dl_conclusions = (dl_conclusions & (dl_scores >= ModelCfg.bin_pred_nsr_thr))
                dl_conclusions = dl_conclusions.astype(int)

            dl_scores = extend_predictions(
                dl_scores,
                ModelCfg.dl_classes,
                ModelCfg.full_classes,
            )
            dl_conclusions = extend_predictions(
                dl_conclusions,
                ModelCfg.dl_classes,
                ModelCfg.full_classes,
            )

            with mp.Pool(processes=batch_size) as pool:
                sd_conclusion = pool.starmap(
                    func=_run_special_detector_once,
                    iterable=[(s,) for s in signals.tolist()],
                )
            sd_conclusion = np.array(sd_conclusion)

            step_scores = np.where(dl_scores>=sd_conclusion, dl_scores, sd_conclusion)
            step_conclusions = np.where(dl_conclusions*sd_conclusion!=0, np.ones_like(dl_conclusions, dtype=int), np.zeros_like(dl_conclusions, dtype=int))

            binary_predictions = np.concatenate((binary_predictions, step_conclusions))
            scalar_predictions = np.concatenate((scalar_predictions, step_scores))
            pbar.update(signals.shape[0])

    truth_labels = [dr.get_labels(rec, fmt='a') for rec in ds.records]

    # gather results into a DataFrame
    print("gathering results into a `DataFrame`...")
    df_eval_res = pd.DataFrame(scalar_predictions)
    df_eval_res.columns = ModelCfg.full_classes
    df_eval_res['binary_predictions'] = ''
    df_eval_res['truth_labels'] = ''
    for idx, row in df_eval_res.iterrows():
        df_eval_res.at[idx, 'binary_predictions'] = \
            np.array(ModelCfg.full_classes)[np.where(binary_predictions[idx]==1)[0]].tolist()
        df_eval_res.at[idx, 'truth_labels'] = truth_labels[idx]
    df_eval_res.index = ds.records

    auroc, auprc, accuracy, f_measure, f_beta_measure, g_beta_measure, challenge_metric = \
        evaluate_12ECG_score(
            classes=ModelCfg.full_classes,
            truth=np.array(truth_array),
            scalar_pred=np.array(scalar_predictions),
            binary_pred=np.array(binary_predictions),
        )
    msg = f"""
        results on tranches {tranches or 'all'}:
        ------------------------------
        auroc:              {auroc}
        auprc:              {auprc}
        accuracy:           {accuracy}
        f_measure:          {f_measure}
        f_beta_measure:     {f_beta_measure}
        g_beta_measure:     {g_beta_measure}
        challenge_metric:   {challenge_metric}
        ----------------------------------------
    """
    print(msg)  # in case no logger

    return df_eval_res
Esempio n. 6
0
 def __init__(self):
     self.classes = np.array([ "AF" ,"I-AVB" ,"LBBB" ,"Normal" ,"PAC" ,"PVC" ,"RBBB" ,"STD" ,"STE" ])
     self.model = load_12ECG_model()