예제 #1
0
 def load_a_model(self):
     if self.display_model_name_label.text() != "No Directory selected" and self.load_model_cb.checkState():
         file_path = self.open_model_btn.open_result[0]
         self.cv_model = load(file_path)
     else:
         QErrorMessage.qtHandler()
         qErrnoWarning("you didn't select the checkbox or a valid file")
예제 #2
0
    def print_result(self):

        cv_model = self.cv_model
        self.model = cv_model.best_estimator_
        best_estimator = self.model

        self.class_name = ["class " + str(i) for i in best_estimator.classes_]
        print(self.class_name)
        if best_estimator.n_features_ != self.data["train_x"].shape[1]:
            QErrorMessage.qtHandler()
            qErrnoWarning("the data shape dosen't match the model")
            return True

        print_tb_header(self.textBrowser, self.run_index)
        for key in cv_model.best_params_.keys():
            print_to_tb(self.textBrowser,key, cv_model.best_params_[key])

        for index in range(best_estimator.n_features_):
            print_to_tb(self.textBrowser,"the "+str(index+1)+" feature: ",self.data["train_x"].columns[index], best_estimator.feature_importances_[index])

        print_to_tb(self.textBrowser, "best score during CV", cv_model.best_score_)
        if self.multiclass and self.plot_roc_cb.checkState():
            self.plot_ROC_multiclass(False)
            if self.have_test:
                self.plot_ROC_multiclass(True)
        elif not self.multiclass and self.plot_roc_cb.checkState():
            self.plot_ROC()
예제 #3
0
def check_data_public(data):
    have_test = True
    no_y_flag = False
    if data["test_y"] is None:
        have_test = False
    if data["train_y"] is None:
        no_y_flag = True
        QErrorMessage.qtHandler()
        qErrnoWarning("you didn't have a training target label")
    return no_y_flag, have_test
예제 #4
0
    def train_a_model(self):

        train_x = self.data["train_x"]
        train_y = self.data["train_y"]

        model = None
        print("computing")

        if self.auto_distribute_rb.isChecked():
            mix_nb = mixed_Naive_Bayes(self.multinomial_var_list,
                                       self.bernoulli_var_list,
                                       self.gaussian_var_list)
            model = mix_nb.fit(
                train_x,
                train_y.to_numpy().reshape(train_y.to_numpy().shape[0], ))
        else:
            if self.gaussian_distribute_rb.isChecked() and len(
                    self.gaussian_var_list) > 0:
                gnb = My_Gaussian_Naive_Bayes()
                model = gnb.fit(
                    train_x.to_numpy(),
                    train_y.to_numpy().reshape(train_y.to_numpy().shape[0], ))
            elif self.multinomial_distribute_rb.isChecked() and len(
                    self.multinomial_var_list) > 0:
                mnb = MultinomialNB()
                model = mnb.fit(
                    train_x.to_numpy(),
                    train_y.to_numpy().reshape(train_y.to_numpy().shape[0], ))

            elif self.bernoulli_distribute_rb.isChecked() and len(
                    self.bernoulli_var_list) > 0:
                bnb = BernoulliNB()
                model = bnb.fit(
                    train_x.to_numpy(),
                    train_y.to_numpy().reshape(train_y.to_numpy().shape[0], ))

        if model:
            self.model = model
            print_log_header("Naive Bayes Classifier")
            print_to_log("train x's shape is {0}".format(
                self.data["train_x"].shape))
            print_to_log("train y's shape is {0}".format(
                self.data["train_y"].shape))
            if self.have_test:
                print_to_log("test x's shape is {0}".format(
                    self.data["test_x"].shape))
                print_to_log("test y's shape is {0}".format(
                    self.data["test_y"].shape))
        else:
            QErrorMessage.qtHandler()
            qErrnoWarning("you didn't select a model when training")
            return True

        print("training model finished")
        return False
예제 #5
0
    def transform_accrod_to_type(self):
        train_x = self.data["train_x"]
        train_y = self.data["train_y"]

        self.multinomial_var_list = []
        self.bernoulli_var_list = []
        self.gaussian_var_list = []

        for col in train_x.columns:
            if len(train_x[col].unique().tolist()) == 2:
                self.bernoulli_var_list.append(col)
            elif len(train_x[col].unique().tolist()) > 2:
                data_type = train_x[col].dtype

                if data_type == np.int_ or data_type == np.int64:
                    self.multinomial_var_list.append(col)
                else:
                    self.gaussian_var_list.append(col)

        print(self.multinomial_var_list)

        if self.gaussian_distribute_rb.isChecked():
            gaussian_x = train_x[self.gaussian_var_list]
            if gaussian_x.shape[1] != train_x.shape[1]:
                QErrorMessage.qtHandler()
                qErrnoWarning(
                    "some data may  be droped due to data type dosen't fit,keeped feature has length of {0},you may choose auto mode"
                    .format(len(self.gaussian_var_list)))
            self.data["train_x"] = gaussian_x
            if self.have_test:
                self.data["test_x"] = self.data["test_x"][
                    self.gaussian_var_list]

        elif self.multinomial_distribute_rb.isChecked():
            multinomial_x = train_x[self.multinomial_var_list]
            if multinomial_x.shape[1] != train_x.shape[1]:
                QErrorMessage.qtHandler()
                qErrnoWarning(
                    "some data may  be droped due to data type dosen't fit,keeped feature has length of {0},you may choose auto mode"
                    .format(len(self.multinomial_var_list)))
            self.data["train_x"] = multinomial_x
            if self.have_test:
                self.data["test_x"] = self.data["test_x"][
                    self.multinomial_var_list]

        elif self.bernoulli_distribute_rb.isChecked():
            bernoulli_x = train_x[self.bernoulli_var_list]
            if bernoulli_x.shape[1] != train_x.shape[1]:
                QErrorMessage.qtHandler()
                qErrnoWarning(
                    "some data may  be droped due to data type dosen't fit,keeped feature has length of {0},you may choose auto mode"
                    .format(len(self.bernoulli_var_list)))
            self.data["train_x"] = bernoulli_x
            if self.have_test:
                self.data["test_x"] = self.data["test_x"][
                    self.bernoulli_var_list]
예제 #6
0
def check_data_model_compatible_public(data,model,check_string,cv=False):
    if model is None:
        QErrorMessage.qtHandler()
        qErrnoWarning("you didn't have a fitted model")
        return True
    if data["train_x"] is None:
        QErrorMessage.qtHandler()
        qErrnoWarning("you didn't have training data")
        return True

    if cv:
        model = model.best_estimator_

    if check_string =="n_features_":
        try:
            if model.n_features_ != data["train_x"].shape[1]:
                QErrorMessage.qtHandler()
                qErrnoWarning("the data didn't fit the model, n_features_, the model assumed {0} features, while input {1} features".format(model.n_features_, data["train_x"].shape[1]))
                return True
        except:
            QErrorMessage.qtHandler()
            qErrnoWarning("counting feature method didn't fit, n_features_, probably you load a wrong model")
            return True
    elif check_string =="coef_":
        try:
            if model.coef_.shape[1] != data["train_x"].shape[1]:
                QErrorMessage.qtHandler()
                qErrnoWarning("the data didn't fit the model, coef_, the model assumed {0} features, while input {1} features".format(model.coef_.shape[1], data["train_x"].shape[1]))
                return True
        except:
            QErrorMessage.qtHandler()
            qErrnoWarning("counting feature method didn't fit, coef_, probably you load a wrong model")
            return True
    elif check_string == "shape_fit_":
        try:
            if model.shape_fit_[1] != data["train_x"].shape[1]:
                QErrorMessage.qtHandler()
                qErrnoWarning("the data didn't fit the model, coef_, the model assumed {0} features, while input {1} features".format(model.coef_.shape[1], data["train_x"].shape[1]))
                return True
        except:
            QErrorMessage.qtHandler()
            qErrnoWarning("counting feature method didn't fit, coef_, probably you load a wrong model")
            return True

    else:
        QErrorMessage.qtHandler()
        qErrnoWarning("the model don't have a counting feature method")
        return True

    return False
    def make_search_para_list(self):
        linear_flag = self.linear_kernel_cb.checkState()
        poly_flag = self.poly_kernel_cb.checkState()
        rbf_flag = self.rbf_kernel_cb.checkState()
        sigmoid_flag = self.sigmoid_kernel_cb.checkState()
        if not linear_flag and not poly_flag and not rbf_flag and not sigmoid_flag:
            QErrorMessage.qtHandler()
            qErrnoWarning("you didn't choose a kernel")
            return True
        if self.c_group.isEnabled():
            c_start = self.c_start_sp.value()
            c_end = self.c_end_sp.value()
            c_num = self.c_num_sp.value()
            c_list = np.logspace(start=c_start, stop=c_end, num=c_num, base=10)
        if self.gamma_group.isEnabled():
            gamma_start = self.gamma_start_sp.value()
            gamma_end = self.gamma_end_sp.value()
            gamma_num = self.gamma_num_sp.value()
            gamma_list = np.logspace(start=gamma_start,
                                     stop=gamma_end,
                                     num=gamma_num,
                                     base=10)
        if self.degree_group.isEnabled():
            degree_start = self.degree_start_sp.value()
            degree_end = self.degree_end_sp.value()
            degree_num = self.degree_num_sp.value()
            if degree_num > degree_end - degree_start + 1:
                degree_num = degree_end - degree_start + 1
            if degree_num == 0:
                degree_list = None
            else:
                degree_list = np.linspace(start=degree_start,
                                          stop=degree_end,
                                          num=degree_num)

        if self.coef_group.isEnabled():
            coef_start = self.coef_start_sp.value()
            coef_end = self.coef_end_sp.value()
            coef_num = self.coef_num_sp.value()
            coef_list = np.logspace(start=coef_start,
                                    stop=coef_end,
                                    num=coef_num,
                                    base=10)
        grid_list = []
        if linear_flag:
            linear_grid = {'kernel': ['linear'], 'C': c_list}
            grid_list.append(linear_grid)
        if poly_flag:
            if degree_list is None:
                poly_grid = {
                    'kernel': ['poly'],
                    'gamma': gamma_list,
                    'C': c_list
                }
            else:
                poly_grid = {
                    'kernel': ['poly'],
                    'gamma': gamma_list,
                    'C': c_list,
                    "degree": degree_list
                }
            grid_list.append(poly_grid)
        if rbf_flag:
            rbf_grid = {'kernel': ['rbf'], 'gamma': gamma_list, 'C': c_list}
            grid_list.append(rbf_grid)
        if sigmoid_flag:
            sigmoid_grid = {
                'kernel': ['sigmoid'],
                'gamma': gamma_list,
                'C': c_list
            }
            grid_list.append(sigmoid_grid)

        return grid_list