def find_threshold(self, train, val, seed=None): if seed: random.seed(seed) ratio = np.arange(0.2, 0.7, 0.05) # Train training_set = pc.join_list_df(train) X_train = [x[:-1] for x in training_set] y_train = [x[-1] for x in training_set] if self.class_weight: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight='balanced') else: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight=None) svc.fit(X_train, y_train) ratios_results = [] val_set = pc.join_list_df(val) X_val = [x[:-1] for x in val_set] y_val = [x[-1] for x in val_set] border_amount = len(y_val) * 0.3 for r in ratio: self.threshold = r first_layer = self.first_layer_classifier(X_val, svc) prediction = self.second_layer_classifier(X_val, first_layer, svc) cm_d = np.sum(confusion_matrix(y_val, prediction).diagonal()) * 1.0 if cm_d / len(val) > border_amount: ratios_results.append(accuracy_score(y_val, prediction)) else: ratios_results.append(0.0) return ratio[np.argmax(ratios_results)]
def train(self,training_set_list): columns = training_set_list[0].columns training_set = pd.DataFrame(pc.join_list_df(training_set_list)) training_set.columns = columns if self.p1 <= -1: sorted_phases = get_sorted_phases(training_set) self.p1 = sorted_phases[-2]+1 self.p2 = sorted_phases[-1]+1 self.threshold = find_threshold(training_set_list,self.p1,self.p2)
def find_threshold(self, train, val, seed=None): if seed: random.seed(seed) ratio = np.arange(0.0, 1.0, 0.1) # Train training_set = pc.join_list_df(train) X_train = [x[:-1] for x in training_set] y_train = [x[-1] for x in training_set] class_weight = get_proportion(y_train) if self.class_weight: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight=class_weight) else: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight=None) svc.fit(X_train, y_train) ratios_results = [] val_set = pc.join_list_df(val) X_val = [x[:-1] for x in val_set] y_val = [x[-1] for x in val_set] border_amount = len(y_val) * 0.3 _, coeff = pc.combination_before_after(X_val, y_val, svc) self.coeff = coeff for r in ratio: self.threshold = r first_layer = self.first_layer_classifier(X_val, svc) second_layer = pc.second_layer_combination_test(X_val, coeff, svc) prediction = [] for i, element in enumerate(first_layer): if element == -1: prediction.append(second_layer[i]) else: prediction.append(first_layer[i]) cm_d = np.sum(confusion_matrix(y_val, prediction).diagonal()) * 1.0 if cm_d / len(val) > border_amount: ratios_results.append(accuracy_score(y_val, prediction)) else: ratios_results.append(0.0) return ratio[np.argmax(ratios_results)]
def train(self, training_set_list): train, val = pc.split_df_list(training_set_list, 0.2, SEED) training_set = pc.join_list_df(train) # find threshold to split using train and val set self.t = self.find_threshold(train, val, SEED) # train svm with all the data training_set = pc.join_list_df(training_set_list) X_train = [x[:-1] for x in training_set] y_train = [x[-1] for x in training_set] if self.class_weight: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight='balanced') else: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight=None) svc.fit(X_train, y_train) self.svm = svc
def train(self, training_set_list): training_set = pc.join_list_df(training_set_list) X_train = [x[:-1] for x in training_set] y_train = [x[-1] for x in training_set] class_weight = get_proportion(y_train) if self.class_weight: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight=class_weight) else: svc = SVC(kernel='linear', random_state=SEED, probability=True, class_weight=None) svc.fit(X_train, y_train) self.svm = svc
def train(self,training_set_list): columns = training_set_list[0].columns training_set = pd.DataFrame(pc.join_list_df(training_set_list)) training_set.columns = columns self.biggest_phase = get_sorted_phases(training_set)[-1]+1