Beispiel #1
0
              class_counter2.most_common(1)[0][0], "Actual", afaf)
        return class_counter2.most_common(1)[0][0]


if __name__ == '__main__':
    np.random.seed(1)
    df = pd.read_csv(data_filename)
    print(df.shape)

    # data_training = df.iloc[:, 1:17]
    # gsom = GSOM(.83, 16, max_radius=4)
    # gsom.fit(data_training.to_numpy(), 100, 50)
    # x= (data_training.to_numpy())
    # gsom.predict(df,"Name","label")

    X, y = pp.preProcess(data_filename)
    X_f, y_f = GSMOTE.OverSample(X, y)
    y_f = y_f.astype(int)
    y1 = np.copy(y_f)
    y = np.column_stack([y1, y_f])
    labels = ["Name", "label"]
    y = np.vstack((labels, y))
    frame = pd.DataFrame(y[1:, :], columns=y[0, :])
    gsom1 = GSOM(.83, X_f.shape[1], max_radius=4)

    gsom1.fit(X_f[:-10, :], 100, 50)
    gsom1.labelling_gsom(X_f[:-10, :], frame.iloc[:-10, :], "Name", "label")
    gsom1.finalize_gsom_label()

    y_pred = gsom1.predict_values(X_f[-10:, :], frame.iloc[-10:, :])
    print(y_pred)
Beispiel #2
0
    def predict(self, X):
        return self.gsom.predict_values(X)

    # def score(self, X, y=None):
    #     # counts number of values bigger than mean
    #     return(sum(self.predict(X)))
    #


from sklearn.model_selection import GridSearchCV, train_test_split

date_file = "../../data/adultmini.csv".replace('\\', '/')
# date_file = "content/pygsom/data/adult.csv".replace('\\','/')

X, y = pp.preProcess(date_file)

# try different combination of hyper paramenters
parameters = [{
    'smooth_iteration': [12, 25],
    'training_iteration': [25, 50],
    'spreading_factor': [0.83, 0.53, 0.7],
    'FD': [0.1, 0.05, 0.2],
    'learning_rate': [0.3, 0.4, 0.5],
    'smooth_learning_factor': [0.8, 0.6, 0.7]
}]
gs = GridSearchCV(MeanClassifier(), parameters)
gs.fit(X, y)

params = gs.best_params_
print(params)
Beispiel #3
0
    def parse_input_zoo_data(filename, header='infer'):
        gsmote = GeometricSMOTE(random_state=1)

        #
        #     (X_train, y_train), (X_test, y_test) = mnist.load_data()
        #
        #     d1, d2, d3 = X_train.shape
        #     X_train_reshaped = X_train.reshape(d1, d2 * d3)
        #     print(X_train_reshaped[:2000, :].shape)
        #     y_train_half = y_train[:2000]
        #     classes = y_train_half.tolist()
        #     labels = y_train_half.tolist()
        #     # print(labels)
        #
        #     input_database = {
        #         0: X_train_reshaped[:2000, :]
        #     }
        #GSMOTE
        # X_f,y_f = GSMOTE.OverSample()
        #
        #
        # X_t, X_test, y_t, y_test = train_test_split(X_f, y_f, test_size=0.2, random_state=0)
        #
        #
        # classes = y_t.tolist()
        # labels = y_t.tolist()
        # input_database = {
        #     0: X_t
        # }

        X, y = pp.preProcess(filename)
        X_t, X_test, y_t, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)
        X_train, y_train = gsmote.fit_resample(X_t, y_t)
        classes = y_train.tolist()
        labels = y_train.tolist()
        input_database = {0: X_train}

        # (X_train, y_train), (X_test, y_test) = mnist.load_data()
        #
        # d1, d2, d3 = X_train.shape
        # X_train_reshaped = X_train.reshape(d1, d2 * d3)
        # print(X_train_reshaped[:2000, :].shape)
        # y_train_half = y_train[:2000]
        # classes = y_train_half.tolist()
        # labels = y_train_half.tolist()
        # # print(labels)
        #
        # input_database = {
        #     0: X_train_reshaped[:2000, :]
        # }

        #Smote
        # X_f,y_f = smote.Data_Extract(filename)
        # classes = y_f.tolist()
        # labels = y_f.tolist()
        # input_database = {
        #     0: X_f[:,:]
        # }

        # input_data = pd.read_csv(filename, header=header)
        #
        # input_database = {
        #     0: input_data.as_matrix([0,1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17,18,19,20,21,22,23,24,25,26,27,28,29])
        # }
        #
        #     (X_train, y_train), (X_test, y_test) = mnist.load_data()
        #
        #     d1, d2, d3 = X_train.shape
        #     X_train_reshaped = X_train.reshape(d1, d2 * d3)
        #     print(X_train_reshaped[:2000, :].shape)
        #     y_train_half = y_train[:2000]
        #     classes = y_train_half.tolist()
        #     labels = y_train_half.tolist()
        #     # print(labels)
        #
        #     input_database = {
        #         0: X_train_reshaped[:2000, :]
        #     }

        # input_data = pd.read_csv(filename, header=header)
        #
        # classes = input_data[17].tolist()
        # labels = input_data[0].tolist()
        # input_database = {
        #     0: input_data.as_matrix([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
        # }

        return input_database, labels, classes, X_test, y_test