def test_XOR(self): X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) T = np.array([[0], [1], [1], [0]]) rbf = RBF(centers=X) # centers are data itself rbf.fit(X, T) prediction = rbf.predict(X) self.assertTrue(np.all( (prediction > 0.5) == T))
def test_sin(self): n = 10000 X = np.random.rand(n).reshape(-1,1) noise = 0.3 T = 0.5*np.sin(4*np.pi*X) + 0.5 + np.random.normal(size = n, scale = noise).reshape(-1,1) rbf = RBF(n_centers=20, activation='gaussian', sigma = 0.05) rbf.fit(X,T) Tp = rbf.predict(X) error = RMSE(Tp, T) # Xp = np.linspace(0,1,1000).reshape(-1,1) # Tp = rbf.predict(Xp) # plt.scatter(X,T) # plt.plot(Xp,Tp, c = 'y') # plt.show() epsilon = 0.005 self.assertTrue(error < noise + epsilon)
def test_reg(self): # sinusoidal function def f(x): return 0.5*np.sin(4*np.pi*x) + 0.5 # train on data noisily following f n = 80 X = np.random.rand(n).reshape(-1,1) noise = 0.05 T = f(X) + np.random.normal(size = n, scale = noise).reshape(-1,1) rbf = RBF(n_centers=20, activation='gaussian', sigma = 0.05, lambdaReg=20.) rbf.fit(X,T) xl = np.linspace(0,1,1000).reshape(-1,1) yl = rbf.predict(xl) # plt.scatter(X, T) # training data # plt.plot(xl, f(xl)) # true curve # plt.plot(xl,yl) # learned curve # plt.show() epsilon = 0.01 true_error = RMSE(yl, f(xl)) self.assertLess(true_error, noise + epsilon)
def test_sin_redundancy(self): n = 1000 X1 = np.random.rand(n).reshape(-1,1) X2 = np.random.rand(n).reshape(-1,1) # redundant dimension X = np.concatenate([X1, X2], axis = 1) noise = 0.05 T = 0.5*np.sin(4*np.pi*X1) + 0.5 + np.random.normal(size = n, scale = noise).reshape(-1,1) # rbf train rbf = RBF(n_centers=150, activation='gaussian', sigma = 0.3, lambdaReg=1e-6) rbf.fit(X,T) # predict Tp = rbf.predict(X) error = RMSE(Tp, T) # Xp1 = np.linspace(0,1,1000).reshape(-1,1) # Xp2 = np.random.rand(1000).reshape(-1,1) # random 2nd co-ordinate # Xp = np.concatenate([Xp1,Xp2], axis = 1) # Tp = rbf.predict(Xp) # plt.scatter(X1,T) # plt.plot(Xp1.reshape(-1,1) ,Tp, c = 'y') # plt.show() epsilon = 0.01 self.assertTrue(error < noise + epsilon)
import numpy as np import matplotlib.pyplot as plt from rbf import RBF # создание тестовых данных x = np.linspace(0, 10, 100) y = np.sin(x) # предсказание с помощью RBF-сети model = RBF(hidden_shape=10, sigma=1.) model.fit(x, y) y_pred = model.predict(x) # отображение на графие plt.plot(x, y, 'b-', label='тест') plt.plot(x, y_pred, 'r-', label='RBF') plt.legend(loc='upper right') plt.title('Интерполяция при использовании RBF-сети') plt.show()
# --- projetando base com eigenfaces #treino_eig, validacao_eig, teste_eig = projeta_eigenfaces(treino, validacao, teste, 0.1) # --- # --- projetando lda treino_lda, validacao_lda, teste_lda = projeta_lda(treino, validacao, teste, label_treino) # --- normalizando base de dados parser.print("Normalizando base de dados...") treino_norm, teste_norm, validacao_norm = parser.normaliza(treino_lda, teste_lda, validacao_lda) parser.print("Normalização da base de dados concluida! (treino, teste e validacao)") # --- Treina rede parser.print("Iniciando treinamento da RBF...") rbf.fit(treino_norm, validacao_norm, parser.binariza2(label_treino), parser.binariza2(label_validacao)) parser.print("Treinamento finalizado!") # --- Calculando taxa de acerto no conjunto de teste parser.print("Calculando taxa de acerto no conjunto de teste") taxa_acerto = rbf.calcula_taxa_acerto(teste_norm, parser.binariza2(label_teste)) parser.print("Taxa de acerto do fold " + str(i) + ": " + str(taxa_acerto)) taxa_acerto_folds.append(taxa_acerto) # --- parser.print("Taxa de acerto do Experimento: " + str(taxa_acerto_folds)) parser.print("Media da taxa de acerto: " + str(np.mean(taxa_acerto_folds))) parser.print("Desvio Padrão: " + str(np.std(taxa_acerto_folds)))
def main(): pm = path_manager() selected_dbs = select_db(pm.find_folders(pm.get_databases_dir())) for database in selected_dbs: # NOTE OUTPUT WILL WRITE TO A FILE, AS DEFINED BELOW: # MAKE SURE TO CREATE THIS DIRECTORY BEFORE YOU RUN, AND YOU CAN # SHOW THE FILE THAT'S CREATED IN THE VIDEO FOR OUTPUT filename = "../output/kmedoids/" + database + "_output.txt" output_file = open(filename, "w+") db = prepare_db(database, pm) k_nn = knn(5, db.get_dataset_type(), db.get_classifier_col(), db.get_classifier_attr_cols()) classes = db.get_class_list() if db.get_dataset_type( ) == 'classification' else [] class_count = len( classes) if db.get_dataset_type() == 'classification' else 1 X = process_data.shuffle_all(db.get_data(), 1) y = np.array(db.get_data())[:, db.get_classifier_col()] # RUN K-MEDOIDS ------------------------------------------------------------ print("RUNNING K-MEDOIDS") kc = kcluster(10, 10, db.get_data(), db.get_classifier_attr_cols(), 'k-medoids') indices = kc.get_medoids() centers = [db.get_data()[i] for i in indices] rbf = RBF(len(centers), class_count, output_file, 25) rbf.fit(X, centers, y, db.get_dataset_type(), classes) print("INITIAL WEIGHTS: ", rbf.weights) output_file.write("INITIAL WEIGHTS: \n") output_file.write(str(rbf.weights) + "\n") print("CENTERS: ", centers) output_file.write("FINAL WEIGHTS: \n") output_file.write(str(rbf.weights) + "\n") output_file.write("FINAL TESTS: \n") rbf.test(X, db.get_dataset_type(), y, centers, classes) print("FINALS WEIGHTS:") print(rbf.weights) # ---------------------------------------------------------------------------- # BEGIN classification FFNN if db.get_dataset_type() == 'classification': # BEGIN preprocessing process_data.FFNN_encoding(db) # (1) First layer (input layer) has 1 node per attribute. # (2) Hidden layers has arbitrary number of nodes. # (3) Output layer has 1 node per possible classification. layer_sizes = [len(db.get_attr()), 10, len(db.get_class_list())] # (3) # This number is arbitrary. # NOTICE: Tune this per dataset learning_rate = .5 ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name, db.get_data(), learning_rate) # BEGIN regression FFNN elif db.get_dataset_type() == 'regression': process_data.FFNN_encoding(db) # (1) First layer (input layer) has 1 node per attribute. # (2) Hidden layers has arbitrary number of nodes. # (3) Output layer has 1 node, just some real number. layer_sizes = [len(db.get_attr()) - 1, 5, 5, 1] learning_rate = .0001 ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name, db.get_data(), learning_rate) else: print('Database type invalid. Type = ' + db.get_dataset_type())