def read_data(): long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') # mat1 = [truncate_long(ts, 9000) for ts in long_data] # mat2 = [truncate_long(ts, 6000) for ts in long_data] mat3 = [truncate_long(ts, 3000) for ts in long_data] # mat4 = [sample_long(ts, 10) for ts in mat1] # mat5 = [sample_long(ts, 10) for ts in mat2] # mat6 = [sample_long(ts, 10) for ts in mat3] label_onehot = ReadData.Label2OneHot(long_label) # plt.plot(mat1[0]) # plt.plot(mat4[0]) mat = mat3 all_feature = np.array(mat, dtype=np.float32) all_label = np.array(label_onehot, dtype=np.float32) kf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, test_index in kf.split(all_feature, long_label): train_data = all_feature[train_index] train_label = all_label[train_index] test_data = all_feature[test_index] test_label = all_label[test_index] break train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2) test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2) return train_data, train_label, test_data, test_label
def slide_and_cut(tmp_data, tmp_label, tmp_pid): out_pid = [] out_data = [] out_label = [] window_size = 6000 cnter = {'N': 0, 'O': 0, 'A': 0, '~': 0} for i in range(len(tmp_data)): #print(tmp_label[i]) if cnter[tmp_label[i]] is not None: cnter[tmp_label[i]] += len(tmp_data[i]) stride_N = 500 stride_O = int(stride_N // (cnter['N'] / cnter['O'])) stride_A = int(stride_N // (cnter['N'] / cnter['A'])) stride_P = int(0.85 * stride_N // (cnter['N'] / cnter['~'])) stride = {'N': stride_N, 'O': stride_O, 'A': stride_A, '~': stride_P} for i in range(len(tmp_data)): tmp_stride = stride[tmp_label[i]] tmp_ts = tmp_data[i] for j in range(0, len(tmp_ts) - window_size, tmp_stride): out_pid.append(tmp_pid[i]) out_data.append(tmp_ts[j:j + window_size]) out_label.append(tmp_label[i]) out_label = ReadData.Label2OneHot(out_label) out_data = np.expand_dims(np.array(out_data, dtype=np.float32), axis=2) out_label = np.array(out_label, dtype=np.float32) out_pid = np.array(out_pid, dtype=np.string_) return out_data, out_label, out_pid
def slide_and_cut(tmp_data, tmp_label, tmp_pid): ''' slide to get more samples from long data Counter({'N': 5050, 'O': 2456, 'A': 738, '~': 284}) ''' out_pid = [] out_data = [] out_label = [] window_size = 6000 cnter = {'N': 0, 'O': 0, 'A': 0, '~': 0} for i in range(len(tmp_data)): cnter[tmp_label[i]] += len(tmp_data[i]) stride_N = 500 stride_O = int(stride_N // (cnter['N'] / cnter['O'])) stride_A = int(stride_N // (cnter['N'] / cnter['A'])) stride_P = int(0.85 * stride_N // (cnter['N'] / cnter['~'])) stride = {'N': stride_N, 'O': stride_O, 'A': stride_A, '~': stride_P} print(stride) for i in range(len(tmp_data)): if i % 1000 == 0: print(i) tmp_stride = stride[tmp_label[i]] tmp_ts = tmp_data[i] for j in range(0, len(tmp_ts) - window_size, tmp_stride): out_pid.append(tmp_pid[i]) out_data.append(tmp_ts[j:j + window_size]) out_label.append(tmp_label[i]) print(Counter(out_label)) idx = np.array(list(range(len(out_label)))) out_label = ReadData.Label2OneHot(out_label) out_data = np.expand_dims(np.array(out_data, dtype=np.float32), axis=2) out_label = np.array(out_label, dtype=np.float32) out_pid = np.array(out_pid, dtype=np.string_) idx_shuffle = np.random.permutation(idx) out_data = out_data[idx_shuffle] out_label = out_label[idx_shuffle] out_pid = out_pid[idx_shuffle] return out_data, out_label, out_pid
def read_data(): X = ReadData.read_centerwave('../../data1/centerwave_resampled.csv') _, _, Y = ReadData.ReadData('../../data1/QRSinfo.csv') all_feature = np.array(X) print(all_feature.shape) all_label = np.array(Y) all_label_num = np.array(ReadData.Label2OneHot(Y)) kf = StratifiedKFold(n_splits=5, shuffle=True) i_fold = 1 print('all feature shape: {0}'.format(all_feature.shape)) for train_index, test_index in kf.split(all_feature, all_label): train_data = all_feature[train_index] train_label = all_label_num[train_index] test_data = all_feature[test_index] test_label = all_label_num[test_index] print('read data done') return all_feature, all_label_num, train_data, train_label, test_data, test_label
def read_data(): long_pid, long_data, long_label = ReadData.ReadData( '../../data1/centerwave.csv' ) mat1 = [truncate_long(ts, 9000) for ts in long_data] mat2 = [truncate_long(ts, 6000) for ts in long_data] mat3 = [truncate_long(ts, 3000) for ts in long_data] mat4 = [sample_long(ts, 10) for ts in mat1] mat5 = [sample_long(ts, 10) for ts in mat2] mat6 = [sample_long(ts, 10) for ts in mat3] label_onehot = ReadData.Label2OneHot(long_label) # plt.plot(mat1[0]) # plt.plot(mat4[0]) mat1 = np.expand_dims(np.array(mat1), axis=2) label_onehot = np.array(label_onehot) return mat1, label_onehot
def read_seq(): long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') seq_pid = [] seq_data = [] seq_label = [] seq_len = 1000 for i in range(len(long_pid)): ts = long_data[i] for j in range(len(ts) // seq_len): seq_data.append(ts[j * seq_len:(j + 1) * seq_len]) seq_pid.append(long_pid[i]) seq_label.append(long_label[i]) long_label = seq_label seq_data = np.array(seq_data, dtype=np.float32) seq_data = normalize(seq_data, axis=0) seq_label = ReadData.Label2OneHot(seq_label) seq_label = np.array(seq_label, dtype=np.float32) all_feature = seq_data all_label = seq_label kf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, test_index in kf.split(all_feature, long_label): train_data = all_feature[train_index] train_label = all_label[train_index] test_data = all_feature[test_index] test_label = all_label[test_index] break train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2) test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2) return train_data, train_label, test_data, test_label
#-----------------------------------------------test-------------------------------------------- ''' train_data, train_label, val_data, val_label, test_data, test_label, test_pid = read_data_from_pkl( ) long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') new_test = [] new_pid = [] new_label = [] for j in range(len(long_pid)): for i in range(len(test_pid)): if long_pid[j] == test_pid[i]: new_test.append(long_data[j]) new_pid.append(long_pid[j]) new_label.append(long_label[j]) out_label = ReadData.Label2OneHot(new_label) out_label = np.array(out_label, dtype=np.float32) new_test = np.array(new_test) new_pid = np.array(new_pid) test_data, test_label, test_pid = slide_and_cut(new_test, np.array(test_label), new_pid) pid_map = {} pid_set = set(new_pid) pids = list(pid_set) for i in range(len(pids)): cur_pid = str(pids[i], 'utf-8') pid_map[cur_pid] = i
if pred_1 != np.argmax(labels): if pred_2 == np.argmax(labels): y_pre = y_sec_pre return y_pre if __name__ == '__main__': short_pid, short_data, short_label = ReadData.ReadData( '../../data1/short.csv') long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv') QRS_pid, QRS_data, QRS_label = ReadData.ReadData('../../data1/QRSinfo.csv') print('=' * 60) print('pred begin') out_label = ReadData.Label2OneHot(long_label) out_label = np.array(out_label, dtype=np.float32) #res_pre = pred_resnet(long_data, long_label, long_pid) #print(len(res_pre)) #MyEval.F1Score3_num(res_pre, out_label) num_data = len(long_data) pre = [[0. for j in range(4)] for i in range(num_data)] #for i in range(num_data): res = pred_one_sample(short_data[0:40], long_data[0:1], QRS_data[0:1], long_pid[0:1], short_pid[0:40]) print(res) labels = {'N': 0, 'A': 1, 'O': 2, '~': 3} pre = [0., 0., 0., 0.] pre[labels[res]] = 1