def datarecord2(input_directory,target_lead,buf_size=100,segnum=24,seg_length=750, full_seg=True, stt=0): input_files = [] for f in os.listdir(input_directory): if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'): input_files.append(f) classes=get_classes(input_directory,input_files) num_files = len(input_files) datalabel=[] SEG_buf = np.zeros([1, seg_length, segnum], dtype=np.float32) SEGs = np.zeros([1, seg_length, segnum], dtype=np.float32) for i, f in enumerate(input_files): print(' {}/{}...'.format(i + 1, num_files)) tmp_input_file = os.path.join(input_directory, f) data, header_data = load_challenge_data(tmp_input_file) datalabel.append(getdata_class(header_data)) datalead = data[target_lead,:] # SEGt = np.float32(utils.sig_process(data, target_length=target_len)) SEGt = utils.Stack_Segs_generate2(datalead, seg_num=segnum, seg_length=seg_length, full_seg=full_seg, stt=stt) del data,datalead SEG_buf = np.concatenate((SEG_buf, SEGt)) del SEGt if SEG_buf.shape[0] >= buf_size: SEGs = np.concatenate((SEGs, SEG_buf[1:])) del SEG_buf SEG_buf = np.zeros([1, seg_length, segnum], dtype=np.float32) if SEG_buf.shape[0] > 1: SEGs = np.concatenate((SEGs, SEG_buf[1:])) del SEG_buf datalabel = np.array(datalabel) return SEGs[1:], datalabel,len(classes)
def datarecord(input_directory, downsample, buf_size=100, leadnum=12): input_files = [] for f in os.listdir(input_directory): if os.path.isfile( os.path.join(input_directory, f) ) and not f.lower().startswith('.') and f.lower().endswith('mat'): input_files.append(f) classes = get_classes(input_directory, input_files) num_files = len(input_files) datalabel = [] target_len = int(72000 / downsample) SEG_buf = np.zeros([1, target_len, leadnum], dtype=np.float32) SEGs = np.zeros([1, target_len, leadnum], dtype=np.float32) for i, f in enumerate(input_files): print(' {}/{}...'.format(i + 1, num_files)) tmp_input_file = os.path.join(input_directory, f) data, header_data = load_challenge_data(tmp_input_file) datalabel.append(getdata_class(header_data)) SEGt = np.float32(utils.sig_process(data, target_length=target_len)) del data SEG_buf = np.concatenate((SEG_buf, SEGt)) del SEGt if SEG_buf.shape[0] >= buf_size: SEGs = np.concatenate((SEGs, SEG_buf[1:])) del SEG_buf SEG_buf = np.zeros([1, target_len, leadnum], dtype=np.float32) if SEG_buf.shape[0] > 1: SEGs = np.concatenate((SEGs, SEG_buf[1:])) del SEG_buf datalabel = np.array(datalabel) return SEGs[1:], datalabel, len(classes)
def datarecord(input_directory): input_files = [] for f in os.listdir(input_directory): if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'): input_files.append(f) num_files = len(input_files) datalabel=[] for i, f in enumerate(input_files): print(' {}/{}...'.format(i + 1, num_files)) tmp_input_file = os.path.join(input_directory, f) data, header_data = load_challenge_data(tmp_input_file) datalabel.append(getdata_class(header_data)-1) datalabel = np.array(datalabel) return np.array(input_files), datalabel
def datafeatrecord(input_directory, records, downsample, buf_size=100, leadnum=12, featurenum=25): # input_files = [] # for f in os.listdir(input_directory): # if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('mat'): # input_files.append(f) classes = get_classes(input_directory, records) num_files = len(records) datalabel = np.zeros([1, 9]) # label0temp=[] target_len = int(72000 / downsample) SEG_buf = np.zeros([1, target_len, leadnum + 1], dtype=np.float32) SEGs = np.zeros([1, target_len, leadnum + 1], dtype=np.float32) # feat_buf=np.zeros([1,1,target_len], dtype=np.float32) featurezero = np.zeros([target_len, 1]) for i, f in enumerate(records): print(' {}/{}...'.format(i + 1, num_files)) tmp_input_file = os.path.join(input_directory, f) data, header_data = load_challenge_data(tmp_input_file) labelonhot, label0 = getdata_class(header_data) datalabel = np.concatenate((datalabel, labelonhot), axis=0) # label0temp.append(label0) features = np.asarray(get_12ECG_features(data, header_data)) featurezero[0:featurenum, 0] = features[0:featurenum] # feats_reshape = features.reshape(1, -1) feats_reshape = featurezero.reshape( [1, featurezero.shape[0], featurezero.shape[1]]) # feat_buf=np.concatenate((feat_buf,feats_reshape)) SEGt = np.float32(utils.sig_process(data, target_length=target_len)) SEGt = np.concatenate((SEGt, feats_reshape), axis=2) del data SEG_buf = np.concatenate((SEG_buf, SEGt)) del SEGt if SEG_buf.shape[0] >= buf_size: SEGs = np.concatenate((SEGs, SEG_buf[1:])) del SEG_buf SEG_buf = np.zeros([1, target_len, leadnum + 1], dtype=np.float32) if SEG_buf.shape[0] > 1: SEGs = np.concatenate((SEGs, SEG_buf[1:])) del SEG_buf # label0temp = np.array(label0temp) return SEGs[1:], datalabel[1:]
def datarecord(input_directory): input_files = [] for f in os.listdir(input_directory): if os.path.isfile( os.path.join(input_directory, f) ) and not f.lower().startswith('.') and f.lower().endswith('mat'): input_files.append(f) num_files = len(input_files) datalabel = [] classnamemultemp = [] for i, f in enumerate(input_files): print(' {}/{}...'.format(i + 1, num_files)) tmp_input_file = os.path.join(input_directory, f) data, header_data = load_challenge_data(tmp_input_file) labelonhot, label0 = getdata_class(header_data) datalabel.append(label0) # classnamemultemp.append(classnamemul) datalabel = np.array(datalabel) # text_save('classname.txt',classnamemultemp) return np.array(input_files), datalabel
def eval_all(tranches:Optional[str]=None) -> pd.DataFrame: """ finished, checked, Parameters: ----------- tranches: str, optional, tranches for making the evaluation, can be one of "AB", "E", "F", or None (None defaults to "ABEF") """ models = load_12ECG_model() dr = CR(TrainCfg.db_dir) ds_config = deepcopy(TrainCfg) if tranches: ds_config.tranches_for_training = tranches ds = CINC2020(config=ds_config, training=False) print("start collecting results...") time.sleep(3) truth_labels, truth_array = [], [] binary_predictions, scalar_predictions = [], [] classes = ModelCfg.full_classes # ds.records = ds.records[:10] # for fast debug with tqdm(ds.records, total=len(ds.records)) as t: for rec in t: data_fp = dr.get_data_filepath(rec) data, header_data = load_challenge_data(data_fp) current_label, current_score, _ = \ run_12ECG_classifier(data, header_data, models, verbose=0) binary_predictions.append(current_label) scalar_predictions.append(current_score) tl = dr.get_labels(rec, fmt='a') ta = list(repeat(0, len(classes))) for c in tl: ta[classes.index(c)] = 1 truth_labels.append(tl) truth_array.append(ta) # gather results into a DataFrame print("gathering results into a `DataFrame`...") df_eval_res = pd.DataFrame(scalar_predictions) df_eval_res.columns = classes df_eval_res['binary_predictions'] = '' df_eval_res['truth_labels'] = '' classes = np.array(classes) for idx, row in df_eval_res.iterrows(): df_eval_res.at[idx, 'binary_predictions'] = \ classes[np.where(binary_predictions[idx]==1)[0]].tolist() df_eval_res.at[idx, 'truth_labels'] = truth_labels[idx] df_eval_res.index = ds.records classes = classes.tolist() auroc, auprc, accuracy, f_measure, f_beta_measure, g_beta_measure, challenge_metric = \ evaluate_12ECG_score( classes=classes, truth=np.array(truth_array), scalar_pred=np.array(scalar_predictions), binary_pred=np.array(binary_predictions), ) msg = f""" results on tranches {tranches or 'all'}: ------------------------------ auroc: {auroc} auprc: {auprc} accuracy: {accuracy} f_measure: {f_measure} f_beta_measure: {f_beta_measure} g_beta_measure: {g_beta_measure} challenge_metric: {challenge_metric} ---------------------------------------- """ print(msg) # in case no logger return df_eval_res
def predict(self,file_path): data, header_data = load_challenge_data(file_path) current_label,current_score,leads,fs = run_12ECG_classifier(data, header_data, self.classes, self.model) return current_label, current_score, leads, self.classes,fs
def data_files_load(input_directory,input_files = '',mapping_labels = False, reference_label = False, meta_data = True): """Load data files as list along with the labels from the given data directory""" # INPUT: # input_directory - path of input data # input_files - list of files # mapping_labels - True (map labels e.g. AF to string numbers) # reference_label - True (Load labels from the given 'REFERENCE.csv' in the given 'input_directory') # meta_data - True (return meta_data list) # OUTPUT: # Example # [list_data,list_label] = data_files_load(pth_data) # list_data - list comprising data (list_data[0][1,:] --> To access first sample and 2nd channel ECG data ) # list_label = list comprising labesl (list_label[0] --> To access label of first sample) if(len(input_files) == 0): print('Getting files list from the data directoy - OK') input_files = data_files_list(input_directory) # converting list of files to absolute path string_pth = input_directory+os.path.sep input_files_pth = [string_pth+s for s in input_files] list_data = [] list_label = [] list_fname = [] list_meta = [] if(reference_label): # Load labels from the given reference file 'REFERENCE.csv' print('Getting labels from the REFERENCE file - OK') df_reference = pd.read_csv('REFERENCE.csv', sep=',') else: print('Getting labels from HEADER - OK') for ii in range(len(input_files_pth)): tmp_input_file = input_files_pth[ii] print('Loading --> ',tmp_input_file) data, header_data = load_challenge_data(tmp_input_file) if(meta_data or not reference_label): tmp_hea = header_data[0].split(' ') # print(tmp_hea) # ['A0001', '12', '500', '7500', '16-Mar-2020', '19:07:01\n'] ptID = tmp_hea[0] # 'A0001' num_leads = int(tmp_hea[1]) # '12' sample_Fs= int(tmp_hea[2]) # '500' gain_lead = np.zeros(num_leads) # 1000 for ll in range(num_leads): tmp_hea = header_data[ll+1].split(' ') gain_lead[ll] = int(tmp_hea[2].split('/')[0]) for iline in header_data: if iline.startswith('#Age'): tmp_age = iline.split(': ')[1].strip() age = int(tmp_age if tmp_age != 'NaN' else 57) elif iline.startswith('#Sex'): tmp_sex = iline.split(': ')[1] if tmp_sex.strip()=='Female': sex =1 else: sex=0 elif iline.startswith('#Dx'): label = iline.split(': ')[1].split(',')[0] tmp_meta = np.hstack([age,sex,sample_Fs,gain_lead]) if(reference_label): # Load labels from the given reference file 'REFERENCE.csv' tmp_idx = df_reference[df_reference['Recording'] == input_files[ii][:-4]].index[0] label = df_reference.loc[tmp_idx,['First_label','Second_label','Third_label']].tolist() else: # Extract labels from the given header #------------------------------------------------------------ # Extracting label from header data #-------------------------------------------------------------- pass #print('Label: ',label) if(mapping_labels): mapping = {'Normal': '1', 'AF': '2', 'I-AVB': '3', 'LBBB':'4', 'RBBB':'5', 'PAC':'6', 'PVC' :'7', 'STD':'8', 'STE': '9'} for key, value in mapping.items(): label = label.replace(key, value) label = label[:-1] list_data.append(data) list_label.append(label) list_fname.append(input_files[ii]) if(meta_data): list_meta.append(tmp_meta) else: list_meta.append(header_data) return list_data,list_label,list_fname,list_meta