def propagation(a1, theta1, theta2): a1 = Data_Management.add_column_left_of_matrix(a1) a2 = g(np.dot(a1, np.transpose(theta1))) a2 = Data_Management.add_column_left_of_matrix(a2) a3 = g(np.dot(a2, np.transpose(theta2))) return a1, a2, a3
def propagation(X, theta1, theta2): hiddenLayer = g(np.dot(X, np.transpose(theta1))) hiddenLayer = Data_Management.add_column_left_of_matrix(hiddenLayer) outputLayer = g(np.dot(hiddenLayer, np.transpose(theta2))) return outputLayer
def draw_decision_boundary(theta, X, Y, orX, mu, sigma): plt.figure() x0_min, x0_max = np.min(orX), np.max(orX) arrayX = np.arange(x0_min, x0_max, 0.05) arrayX = np.reshape(arrayX, (np.shape(arrayX)[0], 1)) arrayXaux = Normalization.normalize2(generate_polynom_data(arrayX, 8), mu, sigma) arrayXaux = Data_Management.add_column_left_of_matrix(arrayXaux) theta = np.reshape(theta, (np.shape(theta)[0], 1)) arrayY = h(arrayXaux, theta.T) plt.plot(arrayX, arrayY) plt.scatter(orX, Y, 20,marker='$F$',color= "red") plt.show()
def backdrop(params_rn, num_entradas, num_ocultas, num_etiquetas, X, y, reg): """ return coste y gradiente de una red neuronal de dos capas """ theta1 = np.reshape(params_rn[:num_ocultas * (num_entradas + 1)], (num_ocultas, (num_entradas + 1))) theta2 = np.reshape(params_rn[num_ocultas * (num_entradas + 1):], (num_etiquetas, (num_ocultas + 1))) #--------------------PASO1--------------------------------------- a1, a2, a3 = propagation(X, theta1, theta2) m = np.shape(X)[0] delta_3 = a3 - y # (5000, 10) #--------------------PASO2--------------------------------------- #delta_3 = a3 - y # (5000, 10) delta_matrix_1 = np.zeros(np.shape(theta1)) delta_matrix_2 = np.zeros(np.shape(theta2)) aux1 = np.dot(delta_3, theta2) #(5000, 26) aux2 = Data_Management.add_column_left_of_matrix( derivada_de_G(np.dot(a1, np.transpose(theta1)))) delta_2 = aux1 * aux2 #(5000, 26) delta_2 = np.delete(delta_2, [0], axis=1) #(5000, 25) # #--------------------PASO4--------------------------------------- delta_matrix_1 = delta_matrix_1 + np.transpose( np.dot(np.transpose(a1), delta_2)) #(25, 401) delta_matrix_2 = delta_matrix_2 + np.transpose( np.dot(np.transpose(a2), delta_3)) #(10, 26) #--------------------PASO6--------------------------------------- delta_matrix_1 = (1 / m) * delta_matrix_1 delta_matrix_1[:, 1:] = delta_matrix_1[:, 1:] + (reg / m) * theta1[:, 1:] delta_matrix_2 = (1 / m) * delta_matrix_2 delta_matrix_2[:, 1:] = delta_matrix_2[:, 1:] + (reg / m) * theta2[:, 1:] cost = J(X, y, a3, num_etiquetas, theta1, theta2) gradient = np.concatenate( (np.ravel(delta_matrix_1), np.ravel(delta_matrix_2))) return cost, gradient
def divideRandomGroups(X, y): X, y = Data_Management.shuffle_in_unison_scary(X, y) # ---------------------------------------------------------------------------------------------------- percent_train = 0.6 percent_valid = 0.2 percent_test = 0.2 # ---------------------------------------------------------------------------------------------------- # TRAINIG GROUP t = int(np.shape(X)[0] * percent_train) trainX = X[:t] trainY = y[:t] # ---------------------------------------------------------------------------------------------------- # VALIDATION GROUP v = int(np.shape(trainX)[0] + np.shape(X)[0] * percent_valid) validationX = X[np.shape(trainX)[0]:v] validationY = y[np.shape(trainY)[0]:v] # ---------------------------------------------------------------------------------------------------- # TESTING GROUP testingX = X[np.shape(trainX)[0] + np.shape(validationX)[0]:] testingY = y[np.shape(trainY)[0] + np.shape(validationY)[0]:] return trainX, trainY, validationX, validationY, testingX, testingY
predicted_type = i return max_security, predicted_type def polynomial_features(X, grado): poly = pf(grado) return (poly.fit_transform(X)) # X, y = Data_Management.load_csv_types_features("pokemon.csv",['against_bug', 'against_dark','against_dragon','against_electric', # 'against_fairy','against_fight','against_fire','against_flying', # 'against_ghost','against_grass','against_ground','against_ice','against_normal', # 'against_poison','against_psychic','against_rock','against_steel','against_water']) X, y = Data_Management.load_csv_types_features("pokemon.csv", [feature1, feature2]) # TODO: usar el tipo2 para sacar el score tambien (si mi svm predice 1 y una de las dos y es 1, es truePositive++) y dar el resultado con solo # 1 tipo, todo lo del entrenamiento se queda igual (se entrena para un solo tipo). Luego en el score se hace eso y para predecir el tipo se queda igual. # Tambien puedo sacar dos svm, tipo primario y tipo secundario pero mas lio ? X = polynomial_features(X, grado) X, mu, sigma = Normalization.normalize_data_matrix(X[:, 1:]) X = Data_Management.add_column_left_of_matrix(X) trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups( X, y) svms = [] for j in range(18): currentTrainY = (trainY == j) * 1
labels[i] = int(round((labels[i] * sigma[0, 1]) + mu[0, 1], -1)) ax.yaxis.set_ticklabels(labels) if show_allTypes: paint_pkmTypes(X, y) else: paint_pkmTypes(X, y, types) figure.legend() plt.show() attr_names = ["percentage_male", "sp_attack"] types_to_paint = ["fire"] X, y = Data_Management.load_csv_types_features("pokemon.csv", attr_names) #normalize X, mu, sigma = Normalization.normalize_data_matrix(X) num_entradas = np.shape(X)[1] num_ocultas = 25 num_etiquetas = 18 true_score_max = float("-inf") thetaTrueMin1 = None thetaTrueMin2 = None y_transformed = transform_y(y, num_etiquetas) trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups(
sigma = possible_values[j] svm = SVC(kernel='rbf', C=C_value, gamma=(1 / (2 * sigma**2))) svm.fit(X, y.ravel()) current_score = true_score( Xval, yval, svm ) #calcula el score con los ejemplos de validacion (mayor score, mejor es el svm) if current_score > max_score: max_score = current_score best_svm = svm selected_C = C_value selected_Sigma = sigma return best_svm, selected_C, selected_Sigma X, y = Data_Management.load_csv_svm("pokemon.csv", [feature1, feature2]) X, mu, sigma = Normalization.normalize_data_matrix(X) X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups( X, y) max_score = float("-inf") best_svm = None for i in range(NUM_TRIES): #THIS IS GIVING THE SAME RESULT, ALWAYS (MAYBE SELECT C AND SIGMA RANDOMLY) seed = np.random.seed() current_svm, C, s = eleccion_parametros_C_y_Sigma(trainX, trainY, validationX, validationY, mu, sigma) current_score = true_score(testingX, testingY, current_svm) draw_decisition_boundary(testingX, testingY, current_svm, current_score,
from ML_UtilsModule import Data_Management X, y = Data_Management.load_csv("pokemon.csv") #no vale #print(X[:, 0])
arrayY = h(arrayXaux, theta.T) plt.plot(arrayX, arrayY) plt.scatter(orX, Y, 20,marker='$F$',color= "red") plt.show() def draw_plot(X, Y): plt.plot(X, Y) data = loadmat('ex5data1.mat') X, y, Xval, yval, Xtest, ytest = data['X'], data['y'], data['Xval'], data['yval'], data['Xtest'], data['ytest'] XPoly = generate_polynom_data(X, 8) XPoly, mu, sigma = normalize_matrix(XPoly) mu = np.reshape(mu, (1, np.shape(mu)[0])) sigma = np.reshape(sigma, (1, np.shape(sigma)[0])) XPoly = Data_Management.add_column_left_of_matrix(XPoly) XPolyVal = Normalization.normalize2(generate_polynom_data(Xval, 8), mu, sigma) XPolyVal = Data_Management.add_column_left_of_matrix(XPolyVal) XPolyTest = Normalization.normalize2(generate_polynom_data(Xtest, 8), mu, sigma) XPolyTest = Data_Management.add_column_left_of_matrix(XPolyTest) lambdaAux = [ 0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10 ] error_array = np.array([], dtype=float) error_array_val = np.array([], dtype=float) thetas = np.array([], dtype=float) for l in range(len(lambdaAux)): theta = np.ones(XPoly.shape[1], dtype=float) theta_min = sciMin(fun=minimizar, x0=theta, args=(XPoly, y, lambdaAux[l]),
falsePositives += 1 elif checker[i] == 0 and y[i] == 1: falseNegatives += 1 if truePositives == 0: return 0 recall = (truePositives / (truePositives + falseNegatives)) precision = (truePositives / (truePositives + falsePositives)) score = 2 * (precision * recall / (precision + recall)) return score graphic_attr_names = ["capture_rate", "base_egg_steps"] X, y = Data_Management.load_csv_svm("pokemon.csv", graphic_attr_names) X, mu, sigma = Normalization.normalize_data_matrix(X) X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups( X, y) #------------------------------------------------------------------------------------------------- allMaxPercent = [] allMaxElev = [] allMaxPoly = [] allMaxThetas = [] Xused = validationX Yused = validationY
#set the labels to non-normalized values figure.canvas.draw() labels = [item for item in plt.xticks()[0]] for i in range(len(labels)): labels[i] = int(round((labels[i] * sigma[0, 0]) + mu[0, 0], -1)) ax.xaxis.set_ticklabels(labels) labels = [item for item in plt.yticks()[0]] for i in range(len(labels)): labels[i] = int(round((labels[i] * sigma[0, 1]) + mu[0, 1], -1)) ax.yaxis.set_ticklabels(labels) plt.show() X, y = Data_Management.load_csv_svm("pokemon.csv", ["base_egg_steps", "base_happiness"]) nX, ny, ntrainX, ntrainY, nvalidationX, nvalidationY, ntestingX, ntestingY = Data_Management.divide_legendary_groups(X, y) #normalize p, X = polinomial_features(X, 5) X, mu, sigma = Normalization.normalize_data_matrix(X[:, 1:]) #ssX = Data_Management.add_column_left_of_matrix(X) X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups(X, y) num_entradas = np.shape(X)[1] num_ocultas = 25 num_etiquetas = 1 true_score_max = float("-inf")
def propagation(X, theta1, theta2): hiddenLayer = g(np.dot(X, np.transpose(theta1))) hiddenLayer = Data_Management.add_column_left_of_matrix(hiddenLayer) outputLayer = g(np.dot(hiddenLayer, np.transpose(theta2))) return outputLayer def checkLearned(y, outputLayer): maxIndexV = np.argmax(outputLayer, axis = 1) + 1 checker = (y[:,0] == maxIndexV) count = np.size(np.where(checker == True)) fin = count/np.shape(y)[0] * 100 return fin weights = loadmat('ex3weights.mat') theta1, theta2 = weights['Theta1'], weights['Theta2'] X, y = Data_Management.load_mat("ex3data1.mat") X = Data_Management.add_column_left_of_matrix(X) #añadida culumna de 1s outputLayer = propagation(X, theta1, theta2) print("Precision de la red neuronal: " + str(checkLearned(y, outputLayer)) + " %")
from sklearn.ensemble import RandomForestClassifier import numpy as np from ML_UtilsModule import Data_Management, Normalization from boruta import BorutaPy # load X and y # NOTE BorutaPy accepts numpy arrays only, hence the .values attribute X, y = Data_Management.load_csv_types_features("pokemon.csv", [ "hp", "attack", "defense", "sp_attack", "sp_defense", "speed", "height_m", "weight_kg", "percentage_male", "generation" ]) y = y.ravel() # define random forest classifier, with utilising all cores and # sampling in proportion to y labels rf = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5) # define Boruta feature selection method feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1) # find all relevant features - 5 features should be selected feat_selector.fit(X, y) # check selected features - first 5 features are selected feat_selector.support_ # check ranking of features feat_selector.ranking_ # call transform() on X to filter it down to selected features X_filtered = feat_selector.transform(X)
def checkLearned(X, y, clasificadores): result = checkNumber(X, clasificadores) maxIndexV = np.argmax(result, axis=1) checker = ((y[:, 0] % np.shape(clasificadores)[0]) == maxIndexV) count = np.size(np.where(checker == True)) fin = count / np.shape(y)[0] * 100 return fin def checkNumber(X, clasificadores): result = np.zeros((np.shape(X)[0], np.shape(clasificadores)[0])) result[:] = g( np.dot(X, np.transpose(clasificadores[:])) ) #result[0] = todo lo que piensa de cada numero respecto a si es 0 return result X, y = Data_Management.load_mat("ex3data1.mat") clasificadores = oneVSAll(X, y, 10, 2) print("Precision: " + str(checkLearned(X, y, clasificadores)) + " %") Data_Management.draw_random_examples(X) plt.show()
# #--------------------PASO6--------------------------------------- delta_matrix_1 = (1/m) * delta_matrix_1 delta_matrix_1[:, 1:] = delta_matrix_1[:, 1:] + (reg/m) * theta1[:, 1:] delta_matrix_2 = (1/m) * delta_matrix_2 delta_matrix_2[:, 1:] = delta_matrix_2[:, 1:] + (reg/m) * theta2[:, 1:] cost = J(X, y, a3, num_etiquetas, theta1, theta2) gradient = np.concatenate((np.ravel(delta_matrix_1), np.ravel(delta_matrix_2))) return cost, gradient X, y = Data_Management.load_mat("ex4data1.mat") #indexRand = np.random.randint(0, 5001, 100) #displayData.displayData(X[indexRand[:]]) #plt.show() weights = loadmat('ex4weights.mat') theta1, theta2 = weights['Theta1'], weights['Theta2'] theta1 = pesos_aleat(np.shape(theta1)[1]-1,np.shape(theta1)[0]) theta2 = pesos_aleat(np.shape(theta2)[1]-1,np.shape(theta2)[0]) #a1, a2, a3 = propagation(X, theta1, theta2) theta_vector = np.concatenate((np.ravel(theta1), np.ravel(theta2)))
xx1.ravel(), xx2.ravel()].dot(theta)) h = h.reshape(xx1.shape) # el cuarto parámetro es el valor de z cuya frontera se # quiere pintar plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='b') plt.show() def training_examples_test_with_theta(training_examples, Y, theta): test = g(np.dot(training_examples, np.transpose(theta))) test = np.around(test) test = np.reshape(test, (np.shape(test)[0], 1)) mask = (Y == test) return (len(Y[mask]) / len(Y)) * 100 data = Data_Management.load_csv(sys.argv[1]) #sys.argv[1]) X, Y, m, n = Data_Management.get_parameters_from_training_examples(data) draw_data(X, Y) theta = np.zeros([1, n + 1], dtype=float) print(theta.shape) #theta = np.ravel(theta) X = Data_Management.add_column_left_of_matrix(X) #convention in linear regr theta = tnc(func=J, x0=theta, fprime=gradient, args=(X, Y))[0] pinta_frontera_recta(X, Y, theta) print("Porcentaje de aciertos: " + str(training_examples_test_with_theta(X, Y, theta)) + "%")
pos = np.where(Y == 1) #vector with index of the Y = 1 plt.scatter(X[pos, 0], X[pos, 1], marker='*', c='y') pos = np.where(Y == 0) #vector with index of the Y = 1 plt.scatter(X[pos, 0], X[pos, 1], marker='$F$', c='r') def draw_decision_boundary(theta, X, Y, poly): x0_min, x0_max = X[:,0].min(), X[:,0].max() x1_min, x1_max = X[:,1].min(), X[:,1].max() xx1, xx2 = np.meshgrid(np.linspace(x0_min, x0_max), np.linspace(x1_min, x1_max)) sigm = g(poly.fit_transform(np.c_[ xx1.ravel(), xx2.ravel()]).dot(theta)) sigm = sigm.reshape(xx1.shape) plt.contour(xx1, xx2, sigm, [0.5], linewidths = 1, colors = 'g') def draw(theta, X, Y, poly): plt.figure() draw_data(X, Y) draw_decision_boundary(theta, X, Y, poly) plt.show() data = Data_Management.load_csv(sys.argv[1]) #sys.argv[1]) X, Y, m, n = Data_Management.get_parameters_from_training_examples(data) poly, X_poly = polinomial_features(X, int(sys.argv[2])) theta = np.zeros([1, np.shape(X_poly)[1]], dtype=float) theta = tnc(func=J, x0=theta, fprime=gradient, args=(X_poly,Y))[0] draw(theta, X, Y, poly)
plot_decision_boundary(X, y, model, self.epoch_count, self.count, cmap='RdBu') score, acc = model.evaluate(X, y, verbose=0) print("error " + str(score) + ", accuracy " + str(acc)) self.count = self.count + 1 self.epoch_count = self.epoch_count + 1 return self.epoch_count if __name__ == "__main__": X, y = datasets.make_moons(n_samples=1000, noise=0.1, random_state=0) X, y = Data_Management.load_csv_svm("pokemon.csv", ['weight_kg', 'height_m']) X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups( X, y) y = y.ravel() trainY = trainY.ravel() # Create a directory where image will be saved os.makedirs("images_new", exist_ok=True) # Define our model object model = Sequential() # Add layers to our model model.add( Dense(units=25, input_shape=(2, ),