Example #1
0
def datarecord2(input_directory,target_lead,buf_size=100,segnum=24,seg_length=750, full_seg=True, stt=0):
    input_files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'):
            input_files.append(f)


    classes=get_classes(input_directory,input_files)
    num_files = len(input_files)
    datalabel=[]
    SEG_buf = np.zeros([1, seg_length, segnum], dtype=np.float32)
    SEGs = np.zeros([1, seg_length, segnum], dtype=np.float32)
    for i, f in enumerate(input_files):
        print('    {}/{}...'.format(i + 1, num_files))
        tmp_input_file = os.path.join(input_directory, f)
        data, header_data = load_challenge_data(tmp_input_file)
        datalabel.append(getdata_class(header_data))
        datalead = data[target_lead,:]
        # SEGt = np.float32(utils.sig_process(data, target_length=target_len))
        SEGt = utils.Stack_Segs_generate2(datalead, seg_num=segnum, seg_length=seg_length, full_seg=full_seg, stt=stt)
        del data,datalead
        SEG_buf = np.concatenate((SEG_buf, SEGt))
        del SEGt
        if SEG_buf.shape[0] >= buf_size:
            SEGs = np.concatenate((SEGs, SEG_buf[1:]))
            del SEG_buf
            SEG_buf = np.zeros([1, seg_length, segnum], dtype=np.float32)
    if SEG_buf.shape[0] > 1:
        SEGs = np.concatenate((SEGs, SEG_buf[1:]))
    del SEG_buf
    datalabel = np.array(datalabel)
    return SEGs[1:], datalabel,len(classes)
Example #2
0
def datarecord(input_directory, downsample, buf_size=100, leadnum=12):
    input_files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(
                os.path.join(input_directory, f)
        ) and not f.lower().startswith('.') and f.lower().endswith('mat'):
            input_files.append(f)

    classes = get_classes(input_directory, input_files)
    num_files = len(input_files)
    datalabel = []
    target_len = int(72000 / downsample)
    SEG_buf = np.zeros([1, target_len, leadnum], dtype=np.float32)
    SEGs = np.zeros([1, target_len, leadnum], dtype=np.float32)
    for i, f in enumerate(input_files):
        print('    {}/{}...'.format(i + 1, num_files))
        tmp_input_file = os.path.join(input_directory, f)
        data, header_data = load_challenge_data(tmp_input_file)
        datalabel.append(getdata_class(header_data))
        SEGt = np.float32(utils.sig_process(data, target_length=target_len))
        del data
        SEG_buf = np.concatenate((SEG_buf, SEGt))
        del SEGt
        if SEG_buf.shape[0] >= buf_size:
            SEGs = np.concatenate((SEGs, SEG_buf[1:]))
            del SEG_buf
            SEG_buf = np.zeros([1, target_len, leadnum], dtype=np.float32)
    if SEG_buf.shape[0] > 1:
        SEGs = np.concatenate((SEGs, SEG_buf[1:]))
    del SEG_buf
    datalabel = np.array(datalabel)
    return SEGs[1:], datalabel, len(classes)
Example #3
0
def datarecord(input_directory):
    input_files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'):
            input_files.append(f)

    num_files = len(input_files)
    datalabel=[]
    for i, f in enumerate(input_files):
        print('    {}/{}...'.format(i + 1, num_files))
        tmp_input_file = os.path.join(input_directory, f)
        data, header_data = load_challenge_data(tmp_input_file)
        datalabel.append(getdata_class(header_data)-1)
    datalabel = np.array(datalabel)
    return np.array(input_files), datalabel
def datafeatrecord(input_directory,
                   records,
                   downsample,
                   buf_size=100,
                   leadnum=12,
                   featurenum=25):
    # input_files = []
    # for f in os.listdir(input_directory):
    #     if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'):
    #         input_files.append(f)

    classes = get_classes(input_directory, records)
    num_files = len(records)
    datalabel = np.zeros([1, 9])
    # label0temp=[]
    target_len = int(72000 / downsample)
    SEG_buf = np.zeros([1, target_len, leadnum + 1], dtype=np.float32)
    SEGs = np.zeros([1, target_len, leadnum + 1], dtype=np.float32)
    # feat_buf=np.zeros([1,1,target_len], dtype=np.float32)
    featurezero = np.zeros([target_len, 1])
    for i, f in enumerate(records):
        print('    {}/{}...'.format(i + 1, num_files))
        tmp_input_file = os.path.join(input_directory, f)
        data, header_data = load_challenge_data(tmp_input_file)
        labelonhot, label0 = getdata_class(header_data)
        datalabel = np.concatenate((datalabel, labelonhot), axis=0)
        # label0temp.append(label0)
        features = np.asarray(get_12ECG_features(data, header_data))
        featurezero[0:featurenum, 0] = features[0:featurenum]
        # feats_reshape = features.reshape(1, -1)
        feats_reshape = featurezero.reshape(
            [1, featurezero.shape[0], featurezero.shape[1]])
        # feat_buf=np.concatenate((feat_buf,feats_reshape))

        SEGt = np.float32(utils.sig_process(data, target_length=target_len))
        SEGt = np.concatenate((SEGt, feats_reshape), axis=2)
        del data
        SEG_buf = np.concatenate((SEG_buf, SEGt))
        del SEGt
        if SEG_buf.shape[0] >= buf_size:
            SEGs = np.concatenate((SEGs, SEG_buf[1:]))
            del SEG_buf
            SEG_buf = np.zeros([1, target_len, leadnum + 1], dtype=np.float32)
    if SEG_buf.shape[0] > 1:
        SEGs = np.concatenate((SEGs, SEG_buf[1:]))
    del SEG_buf
    # label0temp = np.array(label0temp)
    return SEGs[1:], datalabel[1:]
def datarecord(input_directory):
    input_files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(
                os.path.join(input_directory, f)
        ) and not f.lower().startswith('.') and f.lower().endswith('mat'):
            input_files.append(f)

    num_files = len(input_files)
    datalabel = []
    classnamemultemp = []
    for i, f in enumerate(input_files):
        print('    {}/{}...'.format(i + 1, num_files))
        tmp_input_file = os.path.join(input_directory, f)
        data, header_data = load_challenge_data(tmp_input_file)
        labelonhot, label0 = getdata_class(header_data)
        datalabel.append(label0)
        # classnamemultemp.append(classnamemul)
    datalabel = np.array(datalabel)
    # text_save('classname.txt',classnamemultemp)

    return np.array(input_files), datalabel
Example #6
0
def eval_all(tranches:Optional[str]=None) -> pd.DataFrame:
    """ finished, checked,

    Parameters:
    -----------
    tranches: str, optional,
        tranches for making the evaluation,
        can be one of "AB", "E", "F", or None (None defaults to "ABEF")
    """
    models = load_12ECG_model()
    dr = CR(TrainCfg.db_dir)
    ds_config = deepcopy(TrainCfg)
    if tranches:
        ds_config.tranches_for_training = tranches
    ds = CINC2020(config=ds_config, training=False)

    print("start collecting results...")
    time.sleep(3)

    truth_labels, truth_array = [], []
    binary_predictions, scalar_predictions = [], []
    classes = ModelCfg.full_classes
    # ds.records = ds.records[:10]  # for fast debug
    with tqdm(ds.records, total=len(ds.records)) as t:
        for rec in t:
            data_fp = dr.get_data_filepath(rec)
            data, header_data = load_challenge_data(data_fp)
            current_label, current_score, _ = \
                run_12ECG_classifier(data, header_data, models, verbose=0)
            binary_predictions.append(current_label)
            scalar_predictions.append(current_score)
            tl = dr.get_labels(rec, fmt='a')
            ta = list(repeat(0, len(classes)))
            for c in tl:
                ta[classes.index(c)] = 1
            truth_labels.append(tl)
            truth_array.append(ta)
    
    # gather results into a DataFrame
    print("gathering results into a `DataFrame`...")
    df_eval_res = pd.DataFrame(scalar_predictions)
    df_eval_res.columns = classes
    df_eval_res['binary_predictions'] = ''
    df_eval_res['truth_labels'] = ''
    classes = np.array(classes)
    for idx, row in df_eval_res.iterrows():
        df_eval_res.at[idx, 'binary_predictions'] = \
            classes[np.where(binary_predictions[idx]==1)[0]].tolist()
        df_eval_res.at[idx, 'truth_labels'] = truth_labels[idx]
    df_eval_res.index = ds.records

    classes = classes.tolist()
    auroc, auprc, accuracy, f_measure, f_beta_measure, g_beta_measure, challenge_metric = \
        evaluate_12ECG_score(
            classes=classes,
            truth=np.array(truth_array),
            scalar_pred=np.array(scalar_predictions),
            binary_pred=np.array(binary_predictions),
        )
    msg = f"""
        results on tranches {tranches or 'all'}:
        ------------------------------
        auroc:              {auroc}
        auprc:              {auprc}
        accuracy:           {accuracy}
        f_measure:          {f_measure}
        f_beta_measure:     {f_beta_measure}
        g_beta_measure:     {g_beta_measure}
        challenge_metric:   {challenge_metric}
        ----------------------------------------
    """
    print(msg)  # in case no logger

    return df_eval_res
Example #7
0
 def predict(self,file_path):
     data, header_data = load_challenge_data(file_path)
     current_label,current_score,leads,fs = run_12ECG_classifier(data, header_data, self.classes, self.model)
     
     return current_label, current_score, leads, self.classes,fs
Example #8
0
def data_files_load(input_directory,input_files = '',mapping_labels = False, reference_label = False, meta_data = True):
    """Load data files as list along with the labels from the given data directory"""
    # INPUT:
    # input_directory - path of input data
    # input_files - list of files
    # mapping_labels - True (map labels e.g. AF to string numbers)
    # reference_label - True (Load labels from the given 'REFERENCE.csv' in the given 'input_directory')
    # meta_data - True (return meta_data list)

    # OUTPUT:


    # Example
    # [list_data,list_label] = data_files_load(pth_data)
    # list_data - list comprising data (list_data[0][1,:] --> To access first sample and 2nd channel ECG data )
    # list_label = list comprising labesl (list_label[0] --> To access label of first sample)

    if(len(input_files) == 0):
        print('Getting files list from the data directoy - OK')
        input_files = data_files_list(input_directory)

    # converting list of files to absolute path
    string_pth = input_directory+os.path.sep
    input_files_pth = [string_pth+s for s in input_files]

    list_data = []
    list_label = []
    list_fname = []
    list_meta = []

    if(reference_label): # Load labels from the given reference file 'REFERENCE.csv'
        print('Getting labels from the REFERENCE file - OK')
        df_reference = pd.read_csv('REFERENCE.csv', sep=',')
    else:
        print('Getting labels from HEADER - OK')


    for ii in range(len(input_files_pth)):

        tmp_input_file = input_files_pth[ii]
        print('Loading --> ',tmp_input_file)
        data, header_data = load_challenge_data(tmp_input_file)


        if(meta_data or not reference_label):
            tmp_hea = header_data[0].split(' ')
            # print(tmp_hea)
            # ['A0001', '12', '500', '7500', '16-Mar-2020', '19:07:01\n']
            ptID = tmp_hea[0] # 'A0001'
            num_leads = int(tmp_hea[1]) # '12'
            sample_Fs= int(tmp_hea[2]) # '500'
            gain_lead = np.zeros(num_leads) # 1000

            for ll in range(num_leads):
                tmp_hea = header_data[ll+1].split(' ')
                gain_lead[ll] = int(tmp_hea[2].split('/')[0])


            for iline in header_data:
                if iline.startswith('#Age'):
                    tmp_age = iline.split(': ')[1].strip()
                    age = int(tmp_age if tmp_age != 'NaN' else 57)
                elif iline.startswith('#Sex'):
                    tmp_sex = iline.split(': ')[1]
                    if tmp_sex.strip()=='Female':
                        sex =1
                    else:
                        sex=0
                elif iline.startswith('#Dx'):
                    label = iline.split(': ')[1].split(',')[0]

            tmp_meta = np.hstack([age,sex,sample_Fs,gain_lead])

        if(reference_label): # Load labels from the given reference file 'REFERENCE.csv'
            tmp_idx = df_reference[df_reference['Recording'] == input_files[ii][:-4]].index[0]
            label = df_reference.loc[tmp_idx,['First_label','Second_label','Third_label']].tolist()

        else: # Extract labels from the given header
            #------------------------------------------------------------
            # Extracting label from header data
            #--------------------------------------------------------------
            pass

            #print('Label: ',label)

            if(mapping_labels):
                mapping =  {'Normal': '1', 'AF': '2', 'I-AVB': '3', 'LBBB':'4', 'RBBB':'5', 'PAC':'6', 'PVC' :'7', 'STD':'8', 'STE': '9'}
                for key, value in mapping.items():
                    label = label.replace(key, value)
                    label = label[:-1]

        list_data.append(data)
        list_label.append(label)
        list_fname.append(input_files[ii])
        if(meta_data):
            list_meta.append(tmp_meta)
        else:
            list_meta.append(header_data)

    return list_data,list_label,list_fname,list_meta