def model_build(alg_name, params): alg = None if alg_name == 'SVM': if params['kernel'] == 'linear': alg = SVC(C=params['C'], probability=True, kernel='linear') elif params['kernel'] == 'rbf': alg = SVC(C=params['C'], gamma=params['gammas'], probability=True, kernel=params['kernel']) elif params['kernel'] == 'poly': alg = SVC(C=params['C'], degree=params['degree'], probability=True, kernel=params['kernel']) elif alg_name == 'KNN': alg = KNeighborsClassifier(n_neighbors=params['K'], weights=params['weights'], leaf_size=params['leaf_size']) elif alg_name == 'Random Forest': alg = RandomForestClassifier(n_estimators=params['n_estimators'], criterion=params['criterion'], max_features=params['max_features'], random_state=1234) elif alg_name == 'LightGBM': alg = lgb.LGBMClassifier(learning_rate=params['learning_rate'], num_leaves=params['num_leaves'], n_estimators=params['n_estimators'], objective=params['objective']) elif alg_name == 'XGBoost': alg = XGBClassifier(objective=params['objective'], eval_metrics=params['eval_metrics'], learning_rate=params['learning_rate'], max_depth=params['max_depth']) elif alg_name == 'Naive Bayes': if params['distribution'] == 'Multinomial Naive Bayes': alg = naive_bayes.MultinomialNB(alpha=params['alpha'], fit_prior=params['fit_prior']) elif params['distribution'] == 'Gaussian Naive Bayes': alg = naive_bayes.GaussianNB() elif params['distribution'] == 'Complement Naive Bayes': alg = naive_bayes.ComplementNB(alpha=params['alpha'], fit_prior=params['fit_prior'], norm=params['norm']) elif params['distribution'] == 'Bernoulli Naive Bayes': alg = naive_bayes.BernoulliNB(alpha=params['alpha'], fit_prior=params['fit_prior'], binarize=params['binarize']) elif params['distribution'] == 'Categorical Naive Bayes': alg = naive_bayes.CategoricalNB(alpha=params['alpha'], fit_prior=params['fit_prior']) return alg
X[:] = X[:] + abs(np.min(X[:])) # print('After all values made negative, X = \n', X, '\n') # split dataset into training and testing partitions train_split = 0.7 train_all, test_all = split_partition(X, y, train_split) # formally classify colours (not using Naive-Bayes) Train = Partition(train_all, n_classes) Test = Partition(test_all, n_classes) # create Naive Bayes classifier model train_all_X = train_all[:, :n_classes] train_all_y = train_all[:, n_classes] time_start = time.time() nb_model = skl_nb.CategoricalNB().fit(train_all_X, train_all_y) time_total = time.time() - time_start # make predictions test_all_X = test_all[:, :n_classes] test_all_y = test_all[:, n_classes] y_pred = nb_model.predict(test_all_X) #%% merge NB-predicted y (classifications) and X (co-ordinates) y_pred = np.reshape(y_pred, (y_pred.shape[0], 1)) Xy_pred = np.concatenate((test_all_X, y_pred), axis=1) # create separate arrays for NB-classified co-ordinates X_green_pred, X_blue_pred = split_colour(Xy_pred, n_classes) # Performance Metrics:
def convert_to_numeric_values(df): converted_df = df.copy() converted_df = converted_df.replace({"history": {"bad": 0, "fair": 1, "excellent": 2}, "income": {"low": 0, "high": 1}, "term": {3: 0, 10: 1}}) return converted_df loan_df = pd.read_csv("data/loans.csv") numeric_loan_df = convert_to_numeric_values(loan_df) print(numeric_loan_df) feature_names = loan_df.columns.values[:-1] X = numeric_loan_df[feature_names] y = numeric_loan_df["risk"] naive_bayes_model = sk_naive_bayes.CategoricalNB() naive_bayes_model.fit(X, y) X_probabilities = naive_bayes_model.predict_proba(X)[:, 1] X_probabilities_log = naive_bayes_model.predict_log_proba(X)[:, 1] loan_df["probability"] = X_probabilities loan_df["log_probability"] = X_probabilities_log print(loan_df)
labelEnc = preprocessing.LabelEncoder() yTrain = labelEnc.fit_transform(subData['insurance']) yLabel = labelEnc.inverse_transform([0, 1]) uGroup_Size = np.unique(subData['group_size']) uHomeowner = np.unique(subData['homeowner']) uMarried_Couple = np.unique(subData['married_couple']) featureCategory = [uGroup_Size, uHomeowner, uMarried_Couple] print(featureCategory) featureEnc = preprocessing.OrdinalEncoder(categories=featureCategory) xTrain = featureEnc.fit_transform( subData[['group_size', 'homeowner', 'married_couple']]) _objNB = naive_bayes.CategoricalNB(alpha=1.0e-10) thisModel = _objNB.fit(xTrain, yTrain) print('Number of samples encountered for each class during fitting') print(yLabel) print(_objNB.class_count_) print('\n') print('Probability of each class:') print(yLabel) print(np.exp(_objNB.class_log_prior_)) print('\n') #%% print('---QUESTION 1B---') RowWithColumn(rowVar=subData['insurance'],
data_train = pd.read_csv("regl_data/juegos_entrenamiento.txt", sep=' ', header=None) data_test = pd.read_csv("regl_data/juegos_validacion.txt", sep=' ', header=None) X = data_train.iloc[:, :2] y = data_train[[2]] X_test = data_test.iloc[:, :2] y_test = data_test[[2]] # entrenando el clasificador bayesiano ingenuo cat = nb.CategoricalNB() estimator = cat.fit(X, np.ravel(y)) print("params: ", estimator.get_params()) pred_train = estimator.predict(X_test) suma = (np.ravel(y_test) != pred_train).sum() prec = (100 - (suma / X.shape[0]) * 100) #new_data = cat.trans print(suma, prec) # np.random.seed(0) # W = np.random.uniform(0, 1, size=(X.shape[1], 1)) # def sigmoid(z): # return (1 / 1 + np.exp(-z))