def TrainingModels(target_label, model_file_name, training_list): '''Randomly select num_training records to train, and test others.''' qt = QTloader() record_list = qt.getreclist() testing_list = list(set(record_list) - set(training_list)) random_forest_config = dict(max_depth=10) walker = RandomWalker(target_label=target_label, random_forest_config=random_forest_config, random_pattern_file_name=os.path.join( os.path.dirname(model_file_name), 'random_pattern.json')) start_time = time.time() for record_name in training_list: print 'Collecting features from record %s.' % record_name sig = qt.load(record_name) walker.collect_training_data(sig['sig'], qt.getExpert(record_name)) print 'random forest start training(%s)...' % target_label walker.training() print 'trianing used %.3f seconds' % (time.time() - start_time) import joblib start_time = time.time() walker.save_model(model_file_name) print 'Serializing model time cost %f' % (time.time() - start_time)
def Test1(): '''Comparing to expert labels in QTdb.''' qt = QTloader() reclist = qt.getreclist() rec_ind = 0 for rec_ind in xrange(0, len(reclist)): print 'Processing record[%d] %s ...' % (rec_ind, reclist[rec_ind]) sig = qt.load(reclist[rec_ind]) raw_sig = sig['sig'] expert_labels = qt.getExpert(reclist[rec_ind]) R_pos_list = [ x[0] for x in filter(lambda item: item[1] == 'R', expert_labels) ] # Skip empty expert lists if len(R_pos_list) == 0: continue dpi = DPI() qrs_list = dpi.QRS_Detection(raw_sig) # Find FN FN_arr = GetFN(R_pos_list, qrs_list) R_pos_list = FN_arr if len(R_pos_list) > 0: plt.plot(raw_sig) amp_list = [raw_sig[x] for x in qrs_list] plt.plot(qrs_list, amp_list, 'ro', markersize=12) amp_list = [raw_sig[x] for x in R_pos_list] plt.plot(R_pos_list, amp_list, 'ys', markersize=14) plt.show()
def TestQT(record_name, save_result_folder, model_folder, random_pattern_file_name): '''Test case1.''' fs = 250.0 qt = QTloader() sig = qt.load(record_name) expert_annotations = qt.getExpert(record_name) pos_list, label_list = zip(*expert_annotations) test_range = [np.min(pos_list) - 100, np.max(pos_list) + 100] result_mat = list() print 'Lead1' raw_sig = sig['sig'] results = TestSignal(raw_sig, fs, test_range, model_folder, random_pattern_file_name) for ind in xrange(0, len(results)): results[ind] = [results[ind][0] + test_range[0], results[ind][1]] result_mat.append((record_name, results)) print 'Lead2' raw_sig = sig['sig2'] results = TestSignal(raw_sig, fs, test_range, model_folder, random_pattern_file_name) for ind in xrange(0, len(results)): results[ind] = [results[ind][0] + test_range[0], results[ind][1]] result_mat.append((record_name + '_sig2', results)) result_file_name = os.path.join(save_result_folder, '%s.json' % record_name) with open(result_file_name, 'w') as fout: json.dump(result_mat, fout, indent = 4) print 'Results saved as %s.' % result_file_name
def TEST_ExpertQRS(): recname = 'sel103' QTdb = QTloader() rawsig = QTdb.load(recname) rawsig = rawsig['sig'] MarkList = QTdb.getExpert(recname) swt = SWT_NoPredictQRS(rawsig, MarkList) swt.swt() # cDlist wtlist = swt.cDlist[-4] plt.figure(1) # plot Non QRS ECG & SWT plt.subplot(211) plt.plot(rawsig) plt.plot(wtlist) plt.grid(True) # plot Original ECG rawsig = swt.QTdb.load(recname) rawsig = rawsig['sig'] rawsig = swt.crop_data_for_swt(rawsig) coeflist = pywt.swt(rawsig, 'db6', 9) cAlist, cDlist = zip(*coeflist) wtlist = cDlist[-4] plt.subplot(212) plt.plot(rawsig) plt.plot(wtlist) plt.grid(True) plt.show()
def TrainingModels(target_label, model_file_name, training_list): '''Randomly select num_training records to train, and test others. CP: Characteristic points ''' qt = QTloader() record_list = qt.getreclist() testing_list = list(set(record_list) - set(training_list)) random_forest_config = dict(max_depth=10) walker = RandomWalker(target_label=target_label, random_forest_config=random_forest_config, random_pattern_file_name=os.path.join( os.path.dirname(model_file_name), 'random_pattern.json')) start_time = time.time() for record_name in training_list: CP_file_name = os.path.join( '/home/alex/code/Python/EcgCharacterPointMarks', target_label, '%s_poslist.json' % record_name) # Add expert marks expert_marks = qt.getExpert(record_name) CP_marks = [x for x in expert_marks if x[1] == target_label] if len(CP_marks) == 0: continue # Add manual labels if possible if os.path.exists(CP_file_name) == True: with open(CP_file_name, 'r') as fin: CP_info = json.load(fin) poslist = CP_info['poslist'] if len(poslist) == 0: continue CP_marks.extend(zip(poslist, [ target_label, ] * len(poslist))) print 'Collecting features from record %s.' % record_name sig = qt.load(record_name) walker.collect_training_data(sig['sig'], CP_marks) print 'random forest start training(%s)...' % target_label walker.training() print 'trianing used %.3f seconds' % (time.time() - start_time) import joblib start_time = time.time() walker.save_model(model_file_name) print 'Serializing model time cost %f' % (time.time() - start_time)
def ContinueAddQtTrainingSamples(walker, target_label): '''Add QT training samples.''' qt = QTloader() record_list = qt.getreclist() start_time = time.time() for record_name in record_list: # Add expert marks expert_marks = qt.getExpert(record_name) CP_marks = [x for x in expert_marks if x[1] == target_label] if len(CP_marks) == 0: continue print 'Collecting features from QT record %s.' % record_name sig = qt.load(record_name) walker.collect_training_data(sig['sig'], CP_marks)
def TrainQtRecords(self, record_list): '''API for QTdb: training model with given record_list.''' QTdb = QTloader() training_count = 1 # Extracting feature from each record. for record_name in record_list: sig_struct = QTdb.load(record_name) raw_signal = sig_struct['sig'] expert_labels = QTdb.getExpert(record_name) self.AddNewTrainingSignal(raw_signal, expert_labels) # Logging log.info('Extracted features from %s' % record_name) print '.' * training_count, '(%d/%d)' % (training_count, len(record_list)) training_count += 1 # Training with feature pool self.training()
def TrainingModels(target_label, model_file_name, training_list): '''Randomly select num_training records to train, and test others.''' qt = QTloader() record_list = qt.getreclist() testing_list = list(set(record_list) - set(training_list)) random_forest_config = dict(max_depth=10) walker = RandomWalker(target_label=target_label, random_forest_config=random_forest_config, random_pattern_file_name=os.path.join( os.path.dirname(model_file_name), 'random_pattern.json')) start_time = time.time() for record_name in training_list: Tonset_file_name = os.path.join( '/home/alex/code/Python/Tonset/results', '%s_poslist.json' % record_name) if os.path.exists(Tonset_file_name) == True: with open(Tonset_file_name, 'r') as fin: Tonset_info = json.load(fin) poslist = Tonset_info['poslist'] if len(poslist) == 0: continue Tonset_marks = zip(poslist, [ 'Tonset', ] * len(poslist)) else: expert_marks = qt.getExpert(record_name) Tonset_marks = [x for x in expert_marks if x[1] == 'Tonset'] if len(Tonset_marks) == 0: continue print 'Collecting features from record %s.' % record_name sig = qt.load(record_name) walker.collect_training_data(sig['sig'], Tonset_marks) print 'random forest start training(%s)...' % target_label walker.training() print 'trianing used %.3f seconds' % (time.time() - start_time) import joblib start_time = time.time() walker.save_model(model_file_name) print 'Serializing model time cost %f' % (time.time() - start_time)
def TestQtRecords(self, save_folder, reclist=[]): '''API for QTdb: testing given record_list.''' QTdb = QTloader() print 'Testing:' testing_count = 1 for record_name in reclist: # Logging log.info('Testing record %s' % record_name) print '.' * testing_count, '(%d/%d)' % (testing_count, len(reclist)) testing_count += 1 sig_struct = QTdb.load(record_name) expert_labels = QTdb.getExpert(record_name) # Test lead1 raw_signal = sig_struct['sig'] predict_position_list = self.testing(raw_signal, expert_labels) test_result = zip(predict_position_list, [ self.target_label, ] * len(predict_position_list)) lead_result = [record_name, test_result] lead_result_list = [] lead_result_list.append(lead_result) # Test lead2 raw_signal = sig_struct['sig2'] predict_position_list = self.testing(raw_signal, expert_labels) test_result = zip(predict_position_list, [ self.target_label, ] * len(predict_position_list)) lead_result = [record_name + '_sig2', test_result] lead_result_list.append(lead_result) # Save result. with open( os.path.join(save_folder, 'result_{}'.format(record_name)), 'w') as fout: json.dump(lead_result_list, fout, indent=4)
def Test1(target_label='P', num_training=25): '''Test case 1: random walk.''' qt = QTloader() record_list = qt.getreclist() training_list = random.sample(record_list, num_training) testing_list = list(set(record_list) - set(training_list)) random_forest_config = dict(max_depth=10) walker = RandomWalker(target_label=target_label, random_forest_config=random_forest_config) start_time = time.time() for record_name in training_list: print 'Collecting features from record %s.' % record_name sig = qt.load(record_name) walker.collect_training_data(sig['sig'], qt.getExpert(record_name)) print 'random forest start training...' walker.training() print 'trianing used %.3f seconds' % (time.time() - start_time) for record_name in testing_list: sig = qt.load(record_name) raw_sig = sig['sig'] seed_position = random.randint(100, len(raw_sig) - 100) plt.figure(1) plt.clf() plt.plot(sig['sig'], label=record_name) plt.title(target_label) for ti in xrange(0, 20): seed_position += random.randint(1, 200) print 'testing...(position: %d)' % seed_position start_time = time.time() results = walker.testing_walk(sig['sig'], seed_position, iterations=100, stepsize=10) print 'testing finished in %.3f seconds.' % (time.time() - start_time) pos_list, values = zip(*results) predict_pos = np.mean(pos_list[len(pos_list) / 2:]) # amp_list = [raw_sig[int(x)] for x in pos_list] amp_list = [] bias = raw_sig[pos_list[0]] for pos in pos_list: amp_list.append(bias) bias -= 0.01 plt.plot(predict_pos, raw_sig[int(predict_pos)], 'ro', markersize=14, label='predict position') plt.plot(pos_list, amp_list, 'r', label='walk path', markersize=3, linewidth=8, alpha=0.3) plt.xlim(min(pos_list) - 100, max(pos_list) + 100) plt.grid(True) plt.legend() plt.show(block=False) pdb.set_trace()
class HogFeatureExtractor(object): def __init__(self, target_label='P'): '''Hog 1D feature extractor. Inputs: target_label: label to detect. eg. 'T[(onset)|(offset)]{0,1}', 'P' ''' self.qt = QTloader() # Feature length self.fixed_window_length = 250 # Training Samples. self.signal_segments = [] self.training_vector = [] self.target_biases = [] self.target_label = target_label self.hog = HogClass(segment_len=20) # ML models self.gbdt = None def GetDiffFeature(self, signal_segment, diff_step=4): '''Get Difference feature.''' hog_arr = self.hog.ComputeHog(signal_segment, diff_step=diff_step, debug_plot=False) current_feature_vector = np.array([]) for hog_vec in hog_arr: current_feature_vector = np.append(current_feature_vector, hog_vec) return current_feature_vector def GetTrainingSamples(self, sig_in, expert_labels): '''Form Hog1D feature.''' # Make sure the x indexes are in ascending order. expert_labels.sort(key=lambda x: x[0]) for expert_index in xrange(0, len(expert_labels)): pos, label = expert_labels[expert_index] if label != 'R': continue # Cut out the ECG segment that end with current R peak. signal_segment, target_bias = self.CutSegment( sig_in, expert_labels, expert_index, fixed_window_length=self.fixed_window_length) # Skip invalid values if target_bias is None: continue self.signal_segments.append(signal_segment) self.target_biases.append(target_bias) # plt.plot(signal_segment) # plt.plot(target_bias, np.mean(signal_segment), marker = 'd', markersize = 12) # plt.show() # hog_arr = self.hog.ComputeHog(signal_segment, # diff_step = 4, # debug_plot = False) # # plt.plot(signal_segment) # # plt.grid(True) # # plt.show() # current_feature_vector = np.array([]) # for hog_vec in hog_arr: # current_feature_vector = np.append(current_feature_vector, # hog_vec); current_feature_vector = np.array([]) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=1)) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=4)) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=8)) self.training_vector.append(current_feature_vector) def Train(self, reclist): '''Training with Qt data.''' for rec_name in reclist: sig_struct = self.qt.load(rec_name) raw_signal = sig_struct['sig'] # Expert samples from Qt database expert_labels = self.qt.getExpert(rec_name) # Collect training vectors self.GetTrainingSamples(raw_signal, expert_labels) # Check # fixed_len = len(self.training_vector[0]) # for vec in self.training_vector: # if len(vec) != fixed_len: # print 'Error: new len:', len(vec) for vec in self.training_vector: for val in vec: if isinstance(val, float) == False: raise Exception('val = {}'.format(val)) # Training GBDT models self.gbdt = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, loss='ls').fit( self.training_vector, self.target_biases) def LoadModel(self, model_object): '''Load Model object.''' self.gbdt = model_object def Testing(self, sig_in, expert_labels): '''Testing given ECG.''' detected_positions = list() # debug # debug_count = 7 for expert_index in xrange(0, len(expert_labels)): pos, label = expert_labels[expert_index] if label != 'R': continue # debug_count -= 1 # if debug_count < 0: # break # Cut out the ECG segment that end with current R peak. signal_segment, target_bias = self.CutSegment( sig_in, expert_labels, expert_index, fixed_window_length=250) # Testing current_feature_vector = np.array([]) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=1)) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=4)) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=8)) current_feature_vector = current_feature_vector.reshape(1, -1) predict_pos = self.gbdt.predict(current_feature_vector) # print 'Predict position:', predict_pos # Display results local_pos = predict_pos + self.fixed_window_length - 1 local_pos = int(local_pos) # plt.plot(signal_segment) # plt.plot(local_pos, signal_segment[local_pos], marker = 'o', # markersize = 12) # plt.grid(True) # plt.title('Testing function') # plt.show() # Append the global position detected_positions.append(predict_pos + pos) return detected_positions def TestingQt(self, record_name): sig_struct = self.qt.load(record_name) sig_in = sig_struct['sig'] expert_labels = self.qt.getExpert(record_name) # debug debug_count = 7 for expert_index in xrange(0, len(expert_labels)): pos, label = expert_labels[expert_index] if label != 'R': continue debug_count -= 1 if debug_count < 0: break # Cut out the ECG segment that end with current R peak. signal_segment, target_bias = self.CutSegment( sig_in, expert_labels, expert_index, fixed_window_length=250) # Testing current_feature_vector = np.array([]) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=1)) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=4)) current_feature_vector = np.append( current_feature_vector, self.GetDiffFeature(signal_segment, diff_step=8)) predict_pos = self.gbdt.predict(current_feature_vector) print 'Predict position:', predict_pos # Display results local_pos = predict_pos + self.fixed_window_length - 1 local_pos = int(local_pos) plt.plot(signal_segment) plt.plot(local_pos, signal_segment[local_pos], marker='o', markersize=12) plt.grid(True) plt.title(record_name) plt.show() def CutSegment_T(self, sig_in, expert_labels, expert_index, fixed_window_length=250 * 1): '''Get equal length signal_segments starts at expert_index. Inputs: sig_in: Input ECG signal. expert_labels: Annotation list of form [(pos, label), ...] expert_index: The index of the element in expert_labels that has label 'R'. fixed_window_length : return signal's length Returns: signal_segment: Cropped signal segment. target_bias: (May be None)The bias respect to the expert_index's position. ''' current_R_pos = expert_labels[expert_index][0] ecg_segment = np.zeros(fixed_window_length) left_bound = max(0, current_R_pos - fixed_window_length + 1) right_bound = min(current_R_pos + fixed_window_length - 1, len(sig_in) - 1) len_ecg_data = abs(current_R_pos - right_bound) + 1 ecg_segment[:len_ecg_data] = np.array( sig_in[current_R_pos:current_R_pos + len_ecg_data]) previous_R_pos = None next_T_pos = None for ind in xrange(expert_index + 1, len(expert_labels)): cur_pos, cur_label = expert_labels[ind] if cur_label == 'R': if previous_R_pos is None: previous_R_pos = cur_pos else: break if cur_label == self.target_label: if next_T_pos is None: next_T_pos = cur_pos else: break if abs(current_R_pos - cur_pos) >= fixed_window_length: break if next_T_pos is not None: if abs(current_R_pos - next_T_pos) >= fixed_window_length: local_next_T_pos = None else: # Bias respect to current_R_pos local_next_T_pos = next_T_pos - current_R_pos else: local_next_T_pos = None return ecg_segment, local_next_T_pos def CutSegment(self, sig_in, expert_labels, expert_index, fixed_window_length=250 * 1): '''Get equal length signal_segments starts or ends at expert_index. Inputs: sig_in: Input ECG signal. expert_labels: Annotation list of form [(pos, label), ...] expert_index: The index of the element in expert_labels that has label 'R'. fixed_window_length : return signal's length Returns: signal_segment: Cropped signal segment. target_bias: (May be None)The bias respect to the expert_index's position. ''' # Search T wave if 'T' in self.target_label: return self.CutSegment_T(sig_in, expert_labels, expert_index, fixed_window_length=fixed_window_length) current_R_pos = expert_labels[expert_index][0] ecg_segment = np.zeros(fixed_window_length) left_bound = max(0, current_R_pos - fixed_window_length + 1) len_ecg_data = current_R_pos - left_bound + 1 ecg_segment[fixed_window_length - len_ecg_data:] = np.array( sig_in[left_bound:current_R_pos + 1]) previous_R_pos = None previous_P_pos = None for ind in xrange(expert_index - 1, -1, -1): cur_pos, cur_label = expert_labels[ind] if cur_label == 'R' and previous_R_pos is None: previous_R_pos = cur_pos if cur_label == self.target_label and previous_P_pos is None: previous_P_pos = cur_pos # Eliminate previous R wave # # plt.plot(ecg_segment) # if previous_R_pos is not None: # local_previous_R_pos = previous_R_pos - current_R_pos + fixed_window_length - 1 # if local_previous_R_pos >= 0: # plt.plot(fixed_window_length - (current_R_pos - previous_R_pos), np.mean(ecg_segment), marker = 'd', markersize = 12) # plt.show() if previous_P_pos is not None: if current_R_pos - previous_P_pos >= fixed_window_length: local_previous_P_pos = None else: # Bias respect to current_R_pos local_previous_P_pos = previous_P_pos - current_R_pos else: local_previous_P_pos = None return ecg_segment, local_previous_P_pos
def RoundTest(target_label, result_folder, num_training = 75): '''Randomly select num_training records to train, and test others.''' qt = QTloader() record_list = qt.getreclist() must_train_list = [ "sel35", "sel36", "sel31", "sel38", "sel39", "sel820", "sel51", "sele0104", "sele0107", "sel223", "sele0607", "sel102", "sele0409", "sel41", "sel40", "sel43", "sel42", "sel45", "sel48", "sele0133", "sele0116", "sel14172", "sele0111", "sel213", "sel14157", "sel301" ] num_training -= len(must_train_list) record_list = list(set(record_list) - set(must_train_list)) training_list = must_train_list if num_training > 0: training_list.extend(random.sample(record_list, num_training)) testing_list = list(set(record_list) - set(training_list)) random_forest_config = dict( max_depth = 10) walker = RandomWalker(target_label = target_label, random_forest_config = random_forest_config) start_time = time.time() for record_name in training_list: print 'Collecting features from record %s.' % record_name sig = qt.load(record_name) walker.collect_training_data(sig['sig'], qt.getExpert(record_name)) print 'random forest start training...' walker.training() print 'trianing used %.3f seconds' % (time.time() - start_time) for record_name in testing_list: print 'testing record %s...' % record_name record_result = list() sig = qt.load(record_name) raw_sig = sig['sig'] record_result.append((record_name, testing(walker, raw_sig))) raw_sig = sig['sig2'] record_result.append((record_name + '_sig2', testing(walker, raw_sig))) # Write to json with open(os.path.join(result_folder, '%s.json' % record_name), 'w') as fout: json.dump(record_result, fout, indent = 4)