def KNN(X, y): X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=1 / 4, random_state=0) X_train, y_train = GSMOTE.OverSample(X_t, y_t) # X_train,y_train = X_t,y_t # Fitting Simple Linear Regression to the Training set classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test).astype(int) evaluate("KNN", y_test, y_pred)
def decision_tree(X, y): X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.2, random_state=0) X_train, y_train = GSMOTE.OverSample(X_t, y_t) # Fitting Simple Linear Regression to the Training set regressor = DecisionTreeRegressor() regressor.fit(X_train, y_train) # Predicting the Test set results y_predict = regressor.predict(X_test) y_pred = np.where(y_predict > 0.5, 1, 0) evaluate("Decision Tree", y_test, y_pred)
def gradient_boosting(X, y): X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.2, random_state=0) X_train, y_train = GSMOTE.OverSample(X_t, y_t) # Fitting Gradient boosting gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.01, max_depth=3) gbc.fit(X_train, y_train) # Predicting the Test set results y_predict = gbc.predict(X_test) y_pred = np.where(y_predict.astype(int) > 0.5, 1, 0) evaluate("Gradient Boosting", y_test, y_pred)
def linear_training(X, y): X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Visualize original data vs(X_t, y_t, "Original data") # oversample X_train, y_train = GSMOTE.OverSample(X_t, y_t) # visualize oversampled data vs(X_train, y_train, "Oversampled ") # Fitting Simple Linear Regression to the Training set regressor = LinearRegression() regressor.fit(X_train, y_train) # Predicting the Test set results y_predict = regressor.predict(X_test) y_pred = np.where(y_predict > 0.5, 1, 0) evaluate("Linear Regression", y_test, y_pred)
import gsmote.comparison_testing.preprocessing as pp # Partition the dataset from sklearn.model_selection import train_test_split date_file = "../../data/KDD.csv" X, y = pp.pre_process(date_file) X, X_t, y, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Instantiate a PCA object for the sake of easy visualisation pca = PCA(n_components=2) # Fit and transform x to visualise inside a 2D feature space X_vis = pca.fit_transform(X) X_resampled, y_resampled = gs.OverSample(X, y) X_res_vis = pca.transform(X_resampled) # Two subplots, unpack the axes array immediately f, (ax1, ax2) = plt.subplots(1, 2) c0 = ax1.scatter(X_vis[y == '0', 0], X_vis[y == '0', 1], label="Class #0", alpha=0.5, marker='.') c1 = ax1.scatter(X_vis[y == '1', 0], X_vis[y == '1', 1], label="Class #1", alpha=0.5, marker='.')
date_file = "../../data/adultmini.csv".replace('\\', '/') # date_file = "content/pygsom/data/ecoli.csv".replace('\\', '/') X, y = pp.preProcess(date_file) X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Visualize original data vs(X_t, y_t, "Original data") # oversample print("Oversampling inprogress...") X_train, y_train = GSMOTE.OverSample(X_t, y_t) # visualize oversampled data print("Oversampling completed") print("Plotting oversampled data...") vs(X_train, y_train, "Oversampled ") print("Plotting completed") def linear_training(): # Fitting Simple Linear Regression to the Training set regressor = LinearRegression() regressor.fit(X_train, y_train) # Predicting the Test set results y_predict = regressor.predict(X_test)
return class_counter2.most_common(1)[0][0] if __name__ == '__main__': np.random.seed(1) df = pd.read_csv(data_filename) print(df.shape) # data_training = df.iloc[:, 1:17] # gsom = GSOM(.83, 16, max_radius=4) # gsom.fit(data_training.to_numpy(), 100, 50) # x= (data_training.to_numpy()) # gsom.predict(df,"Name","label") X, y = pp.preProcess(data_filename) X_f, y_f = GSMOTE.OverSample(X, y) y_f = y_f.astype(int) y1 = np.copy(y_f) y = np.column_stack([y1, y_f]) labels = ["Name", "label"] y = np.vstack((labels, y)) frame = pd.DataFrame(y[1:, :], columns=y[0, :]) gsom1 = GSOM(.83, X_f.shape[1], max_radius=4) gsom1.fit(X_f[:-10, :], 100, 50) gsom1.labelling_gsom(X_f[:-10, :], frame.iloc[:-10, :], "Name", "label") gsom1.finalize_gsom_label() y_pred = gsom1.predict_values(X_f[-10:, :], frame.iloc[-10:, :]) print(y_pred) print("complete")