class_counter2.most_common(1)[0][0], "Actual", afaf) return class_counter2.most_common(1)[0][0] if __name__ == '__main__': np.random.seed(1) df = pd.read_csv(data_filename) print(df.shape) # data_training = df.iloc[:, 1:17] # gsom = GSOM(.83, 16, max_radius=4) # gsom.fit(data_training.to_numpy(), 100, 50) # x= (data_training.to_numpy()) # gsom.predict(df,"Name","label") X, y = pp.preProcess(data_filename) X_f, y_f = GSMOTE.OverSample(X, y) y_f = y_f.astype(int) y1 = np.copy(y_f) y = np.column_stack([y1, y_f]) labels = ["Name", "label"] y = np.vstack((labels, y)) frame = pd.DataFrame(y[1:, :], columns=y[0, :]) gsom1 = GSOM(.83, X_f.shape[1], max_radius=4) gsom1.fit(X_f[:-10, :], 100, 50) gsom1.labelling_gsom(X_f[:-10, :], frame.iloc[:-10, :], "Name", "label") gsom1.finalize_gsom_label() y_pred = gsom1.predict_values(X_f[-10:, :], frame.iloc[-10:, :]) print(y_pred)
def predict(self, X): return self.gsom.predict_values(X) # def score(self, X, y=None): # # counts number of values bigger than mean # return(sum(self.predict(X))) # from sklearn.model_selection import GridSearchCV, train_test_split date_file = "../../data/adultmini.csv".replace('\\', '/') # date_file = "content/pygsom/data/adult.csv".replace('\\','/') X, y = pp.preProcess(date_file) # try different combination of hyper paramenters parameters = [{ 'smooth_iteration': [12, 25], 'training_iteration': [25, 50], 'spreading_factor': [0.83, 0.53, 0.7], 'FD': [0.1, 0.05, 0.2], 'learning_rate': [0.3, 0.4, 0.5], 'smooth_learning_factor': [0.8, 0.6, 0.7] }] gs = GridSearchCV(MeanClassifier(), parameters) gs.fit(X, y) params = gs.best_params_ print(params)
def parse_input_zoo_data(filename, header='infer'): gsmote = GeometricSMOTE(random_state=1) # # (X_train, y_train), (X_test, y_test) = mnist.load_data() # # d1, d2, d3 = X_train.shape # X_train_reshaped = X_train.reshape(d1, d2 * d3) # print(X_train_reshaped[:2000, :].shape) # y_train_half = y_train[:2000] # classes = y_train_half.tolist() # labels = y_train_half.tolist() # # print(labels) # # input_database = { # 0: X_train_reshaped[:2000, :] # } #GSMOTE # X_f,y_f = GSMOTE.OverSample() # # # X_t, X_test, y_t, y_test = train_test_split(X_f, y_f, test_size=0.2, random_state=0) # # # classes = y_t.tolist() # labels = y_t.tolist() # input_database = { # 0: X_t # } X, y = pp.preProcess(filename) X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.2, random_state=0) X_train, y_train = gsmote.fit_resample(X_t, y_t) classes = y_train.tolist() labels = y_train.tolist() input_database = {0: X_train} # (X_train, y_train), (X_test, y_test) = mnist.load_data() # # d1, d2, d3 = X_train.shape # X_train_reshaped = X_train.reshape(d1, d2 * d3) # print(X_train_reshaped[:2000, :].shape) # y_train_half = y_train[:2000] # classes = y_train_half.tolist() # labels = y_train_half.tolist() # # print(labels) # # input_database = { # 0: X_train_reshaped[:2000, :] # } #Smote # X_f,y_f = smote.Data_Extract(filename) # classes = y_f.tolist() # labels = y_f.tolist() # input_database = { # 0: X_f[:,:] # } # input_data = pd.read_csv(filename, header=header) # # input_database = { # 0: input_data.as_matrix([0,1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17,18,19,20,21,22,23,24,25,26,27,28,29]) # } # # (X_train, y_train), (X_test, y_test) = mnist.load_data() # # d1, d2, d3 = X_train.shape # X_train_reshaped = X_train.reshape(d1, d2 * d3) # print(X_train_reshaped[:2000, :].shape) # y_train_half = y_train[:2000] # classes = y_train_half.tolist() # labels = y_train_half.tolist() # # print(labels) # # input_database = { # 0: X_train_reshaped[:2000, :] # } # input_data = pd.read_csv(filename, header=header) # # classes = input_data[17].tolist() # labels = input_data[0].tolist() # input_database = { # 0: input_data.as_matrix([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]) # } return input_database, labels, classes, X_test, y_test