def load_a_model(self): if self.display_model_name_label.text() != "No Directory selected" and self.load_model_cb.checkState(): file_path = self.open_model_btn.open_result[0] self.cv_model = load(file_path) else: QErrorMessage.qtHandler() qErrnoWarning("you didn't select the checkbox or a valid file")
def print_result(self): cv_model = self.cv_model self.model = cv_model.best_estimator_ best_estimator = self.model self.class_name = ["class " + str(i) for i in best_estimator.classes_] print(self.class_name) if best_estimator.n_features_ != self.data["train_x"].shape[1]: QErrorMessage.qtHandler() qErrnoWarning("the data shape dosen't match the model") return True print_tb_header(self.textBrowser, self.run_index) for key in cv_model.best_params_.keys(): print_to_tb(self.textBrowser,key, cv_model.best_params_[key]) for index in range(best_estimator.n_features_): print_to_tb(self.textBrowser,"the "+str(index+1)+" feature: ",self.data["train_x"].columns[index], best_estimator.feature_importances_[index]) print_to_tb(self.textBrowser, "best score during CV", cv_model.best_score_) if self.multiclass and self.plot_roc_cb.checkState(): self.plot_ROC_multiclass(False) if self.have_test: self.plot_ROC_multiclass(True) elif not self.multiclass and self.plot_roc_cb.checkState(): self.plot_ROC()
def check_data_public(data): have_test = True no_y_flag = False if data["test_y"] is None: have_test = False if data["train_y"] is None: no_y_flag = True QErrorMessage.qtHandler() qErrnoWarning("you didn't have a training target label") return no_y_flag, have_test
def train_a_model(self): train_x = self.data["train_x"] train_y = self.data["train_y"] model = None print("computing") if self.auto_distribute_rb.isChecked(): mix_nb = mixed_Naive_Bayes(self.multinomial_var_list, self.bernoulli_var_list, self.gaussian_var_list) model = mix_nb.fit( train_x, train_y.to_numpy().reshape(train_y.to_numpy().shape[0], )) else: if self.gaussian_distribute_rb.isChecked() and len( self.gaussian_var_list) > 0: gnb = My_Gaussian_Naive_Bayes() model = gnb.fit( train_x.to_numpy(), train_y.to_numpy().reshape(train_y.to_numpy().shape[0], )) elif self.multinomial_distribute_rb.isChecked() and len( self.multinomial_var_list) > 0: mnb = MultinomialNB() model = mnb.fit( train_x.to_numpy(), train_y.to_numpy().reshape(train_y.to_numpy().shape[0], )) elif self.bernoulli_distribute_rb.isChecked() and len( self.bernoulli_var_list) > 0: bnb = BernoulliNB() model = bnb.fit( train_x.to_numpy(), train_y.to_numpy().reshape(train_y.to_numpy().shape[0], )) if model: self.model = model print_log_header("Naive Bayes Classifier") print_to_log("train x's shape is {0}".format( self.data["train_x"].shape)) print_to_log("train y's shape is {0}".format( self.data["train_y"].shape)) if self.have_test: print_to_log("test x's shape is {0}".format( self.data["test_x"].shape)) print_to_log("test y's shape is {0}".format( self.data["test_y"].shape)) else: QErrorMessage.qtHandler() qErrnoWarning("you didn't select a model when training") return True print("training model finished") return False
def transform_accrod_to_type(self): train_x = self.data["train_x"] train_y = self.data["train_y"] self.multinomial_var_list = [] self.bernoulli_var_list = [] self.gaussian_var_list = [] for col in train_x.columns: if len(train_x[col].unique().tolist()) == 2: self.bernoulli_var_list.append(col) elif len(train_x[col].unique().tolist()) > 2: data_type = train_x[col].dtype if data_type == np.int_ or data_type == np.int64: self.multinomial_var_list.append(col) else: self.gaussian_var_list.append(col) print(self.multinomial_var_list) if self.gaussian_distribute_rb.isChecked(): gaussian_x = train_x[self.gaussian_var_list] if gaussian_x.shape[1] != train_x.shape[1]: QErrorMessage.qtHandler() qErrnoWarning( "some data may be droped due to data type dosen't fit,keeped feature has length of {0},you may choose auto mode" .format(len(self.gaussian_var_list))) self.data["train_x"] = gaussian_x if self.have_test: self.data["test_x"] = self.data["test_x"][ self.gaussian_var_list] elif self.multinomial_distribute_rb.isChecked(): multinomial_x = train_x[self.multinomial_var_list] if multinomial_x.shape[1] != train_x.shape[1]: QErrorMessage.qtHandler() qErrnoWarning( "some data may be droped due to data type dosen't fit,keeped feature has length of {0},you may choose auto mode" .format(len(self.multinomial_var_list))) self.data["train_x"] = multinomial_x if self.have_test: self.data["test_x"] = self.data["test_x"][ self.multinomial_var_list] elif self.bernoulli_distribute_rb.isChecked(): bernoulli_x = train_x[self.bernoulli_var_list] if bernoulli_x.shape[1] != train_x.shape[1]: QErrorMessage.qtHandler() qErrnoWarning( "some data may be droped due to data type dosen't fit,keeped feature has length of {0},you may choose auto mode" .format(len(self.bernoulli_var_list))) self.data["train_x"] = bernoulli_x if self.have_test: self.data["test_x"] = self.data["test_x"][ self.bernoulli_var_list]
def check_data_model_compatible_public(data,model,check_string,cv=False): if model is None: QErrorMessage.qtHandler() qErrnoWarning("you didn't have a fitted model") return True if data["train_x"] is None: QErrorMessage.qtHandler() qErrnoWarning("you didn't have training data") return True if cv: model = model.best_estimator_ if check_string =="n_features_": try: if model.n_features_ != data["train_x"].shape[1]: QErrorMessage.qtHandler() qErrnoWarning("the data didn't fit the model, n_features_, the model assumed {0} features, while input {1} features".format(model.n_features_, data["train_x"].shape[1])) return True except: QErrorMessage.qtHandler() qErrnoWarning("counting feature method didn't fit, n_features_, probably you load a wrong model") return True elif check_string =="coef_": try: if model.coef_.shape[1] != data["train_x"].shape[1]: QErrorMessage.qtHandler() qErrnoWarning("the data didn't fit the model, coef_, the model assumed {0} features, while input {1} features".format(model.coef_.shape[1], data["train_x"].shape[1])) return True except: QErrorMessage.qtHandler() qErrnoWarning("counting feature method didn't fit, coef_, probably you load a wrong model") return True elif check_string == "shape_fit_": try: if model.shape_fit_[1] != data["train_x"].shape[1]: QErrorMessage.qtHandler() qErrnoWarning("the data didn't fit the model, coef_, the model assumed {0} features, while input {1} features".format(model.coef_.shape[1], data["train_x"].shape[1])) return True except: QErrorMessage.qtHandler() qErrnoWarning("counting feature method didn't fit, coef_, probably you load a wrong model") return True else: QErrorMessage.qtHandler() qErrnoWarning("the model don't have a counting feature method") return True return False
def make_search_para_list(self): linear_flag = self.linear_kernel_cb.checkState() poly_flag = self.poly_kernel_cb.checkState() rbf_flag = self.rbf_kernel_cb.checkState() sigmoid_flag = self.sigmoid_kernel_cb.checkState() if not linear_flag and not poly_flag and not rbf_flag and not sigmoid_flag: QErrorMessage.qtHandler() qErrnoWarning("you didn't choose a kernel") return True if self.c_group.isEnabled(): c_start = self.c_start_sp.value() c_end = self.c_end_sp.value() c_num = self.c_num_sp.value() c_list = np.logspace(start=c_start, stop=c_end, num=c_num, base=10) if self.gamma_group.isEnabled(): gamma_start = self.gamma_start_sp.value() gamma_end = self.gamma_end_sp.value() gamma_num = self.gamma_num_sp.value() gamma_list = np.logspace(start=gamma_start, stop=gamma_end, num=gamma_num, base=10) if self.degree_group.isEnabled(): degree_start = self.degree_start_sp.value() degree_end = self.degree_end_sp.value() degree_num = self.degree_num_sp.value() if degree_num > degree_end - degree_start + 1: degree_num = degree_end - degree_start + 1 if degree_num == 0: degree_list = None else: degree_list = np.linspace(start=degree_start, stop=degree_end, num=degree_num) if self.coef_group.isEnabled(): coef_start = self.coef_start_sp.value() coef_end = self.coef_end_sp.value() coef_num = self.coef_num_sp.value() coef_list = np.logspace(start=coef_start, stop=coef_end, num=coef_num, base=10) grid_list = [] if linear_flag: linear_grid = {'kernel': ['linear'], 'C': c_list} grid_list.append(linear_grid) if poly_flag: if degree_list is None: poly_grid = { 'kernel': ['poly'], 'gamma': gamma_list, 'C': c_list } else: poly_grid = { 'kernel': ['poly'], 'gamma': gamma_list, 'C': c_list, "degree": degree_list } grid_list.append(poly_grid) if rbf_flag: rbf_grid = {'kernel': ['rbf'], 'gamma': gamma_list, 'C': c_list} grid_list.append(rbf_grid) if sigmoid_flag: sigmoid_grid = { 'kernel': ['sigmoid'], 'gamma': gamma_list, 'C': c_list } grid_list.append(sigmoid_grid) return grid_list