Ejemplo n.º 1
0
def KNN(X, y):
    X_t, X_test, y_t, y_test = train_test_split(X,
                                                y,
                                                test_size=1 / 4,
                                                random_state=0)
    X_train, y_train = GSMOTE.OverSample(X_t, y_t)
    # X_train,y_train = X_t,y_t
    # Fitting Simple Linear Regression to the Training set
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test).astype(int)

    evaluate("KNN", y_test, y_pred)
Ejemplo n.º 2
0
def decision_tree(X, y):
    X_t, X_test, y_t, y_test = train_test_split(X,
                                                y,
                                                test_size=0.2,
                                                random_state=0)

    X_train, y_train = GSMOTE.OverSample(X_t, y_t)

    # Fitting Simple Linear Regression to the Training set
    regressor = DecisionTreeRegressor()
    regressor.fit(X_train, y_train)

    # Predicting the Test set results
    y_predict = regressor.predict(X_test)
    y_pred = np.where(y_predict > 0.5, 1, 0)

    evaluate("Decision Tree", y_test, y_pred)
Ejemplo n.º 3
0
def gradient_boosting(X, y):
    X_t, X_test, y_t, y_test = train_test_split(X,
                                                y,
                                                test_size=0.2,
                                                random_state=0)

    X_train, y_train = GSMOTE.OverSample(X_t, y_t)

    # Fitting Gradient boosting
    gbc = GradientBoostingClassifier(n_estimators=100,
                                     learning_rate=0.01,
                                     max_depth=3)
    gbc.fit(X_train, y_train)

    # Predicting the Test set results
    y_predict = gbc.predict(X_test)
    y_pred = np.where(y_predict.astype(int) > 0.5, 1, 0)

    evaluate("Gradient Boosting", y_test, y_pred)
Ejemplo n.º 4
0
def linear_training(X, y):
    X_t, X_test, y_t, y_test = train_test_split(X,
                                                y,
                                                test_size=0.2,
                                                random_state=0)

    # Visualize original data
    vs(X_t, y_t, "Original data")
    # oversample
    X_train, y_train = GSMOTE.OverSample(X_t, y_t)
    # visualize oversampled data
    vs(X_train, y_train, "Oversampled ")

    # Fitting Simple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)

    # Predicting the Test set results
    y_predict = regressor.predict(X_test)
    y_pred = np.where(y_predict > 0.5, 1, 0)

    evaluate("Linear Regression", y_test, y_pred)
Ejemplo n.º 5
0
import gsmote.comparison_testing.preprocessing as pp

# Partition the dataset
from sklearn.model_selection import train_test_split

date_file = "../../data/KDD.csv"
X, y = pp.pre_process(date_file)

X, X_t, y, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Instantiate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

X_resampled, y_resampled = gs.OverSample(X, y)
X_res_vis = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)

c0 = ax1.scatter(X_vis[y == '0', 0],
                 X_vis[y == '0', 1],
                 label="Class #0",
                 alpha=0.5,
                 marker='.')
c1 = ax1.scatter(X_vis[y == '1', 0],
                 X_vis[y == '1', 1],
                 label="Class #1",
                 alpha=0.5,
                 marker='.')
Ejemplo n.º 6
0
date_file = "../../data/adultmini.csv".replace('\\', '/')
# date_file = "content/pygsom/data/ecoli.csv".replace('\\', '/')

X, y = pp.preProcess(date_file)

X_t, X_test, y_t, y_test = train_test_split(X,
                                            y,
                                            test_size=0.2,
                                            random_state=0)

# Visualize original data
vs(X_t, y_t, "Original data")

# oversample
print("Oversampling inprogress...")
X_train, y_train = GSMOTE.OverSample(X_t, y_t)
# visualize oversampled data
print("Oversampling completed")
print("Plotting oversampled data...")
vs(X_train, y_train, "Oversampled ")
print("Plotting completed")


def linear_training():

    # Fitting Simple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)

    # Predicting the Test set results
    y_predict = regressor.predict(X_test)
Ejemplo n.º 7
0
        return class_counter2.most_common(1)[0][0]


if __name__ == '__main__':
    np.random.seed(1)
    df = pd.read_csv(data_filename)
    print(df.shape)

    # data_training = df.iloc[:, 1:17]
    # gsom = GSOM(.83, 16, max_radius=4)
    # gsom.fit(data_training.to_numpy(), 100, 50)
    # x= (data_training.to_numpy())
    # gsom.predict(df,"Name","label")

    X, y = pp.preProcess(data_filename)
    X_f, y_f = GSMOTE.OverSample(X, y)
    y_f = y_f.astype(int)
    y1 = np.copy(y_f)
    y = np.column_stack([y1, y_f])
    labels = ["Name", "label"]
    y = np.vstack((labels, y))
    frame = pd.DataFrame(y[1:, :], columns=y[0, :])
    gsom1 = GSOM(.83, X_f.shape[1], max_radius=4)

    gsom1.fit(X_f[:-10, :], 100, 50)
    gsom1.labelling_gsom(X_f[:-10, :], frame.iloc[:-10, :], "Name", "label")
    gsom1.finalize_gsom_label()

    y_pred = gsom1.predict_values(X_f[-10:, :], frame.iloc[-10:, :])
    print(y_pred)
    print("complete")