def test_something(self): N = 5 lr_prediction_error = [] lra_prediction_error = [] ds = DatasetUtils(path="../clase3/resources/dataset.csv") ds_train, ds_test = ds.split(80) for j in range(1, N): x_train = ds_train[:, 1:2] x_test = ds_test[:, 1:2] for i in range(1, j): x_train = np.append(x_train, ds_train[:, 1:2]**(i + 1), axis=1) x_test = np.append(x_test, ds_test[:, 1:2]**(i + 1), axis=1) y_test = ds_test[:, 2:3] y_train = ds_train[:, 2:3] lr = LinearRegression() lr.fit(x_train, y_train) y_lr_predict = lr.predict(x_test) lr_prediction_error.append(MSE()(y_test, y_lr_predict)) lra = LinearRegressionAffine() lra.fit(x_train, y_train) y_lra_predict = lra.predict(x_test) lra_prediction_error.append(MSE()(y_test, y_lra_predict)) plot.figure("MSE") # plot.plot(range(1, N), lr_prediction_error) plot.plot(range(1, N), lra_prediction_error) plot.xlabel("Cantidad de dimensiones") plot.ylabel("MSE") plot.show()
def test_something(self): ds = DatasetUtils(path="resources/dataset.csv") x_train, x_test = ds.split(80) y_real = x_test[:, 2] lr = LinearRegression() lr.fit(x_train[:, 1], x_train[:, 2]) y_lr_predict = lr.predict(x_test[:, 1]) lra = LinearRegressionAffine() lra.fit(x_train[:, 1], x_train[:, 2]) y_lra_predict = lra.predict(x_test[:, 1]) plot.scatter(x_test[:, 1], y_real, c="blue") plot.scatter(x_test[:, 1], y_lr_predict, c="red") plot.scatter(x_test[:, 1], y_lra_predict, c="yellow") plot.show() metrics = [MSE(), MedianError(), MeanError()] print("Metric", "LinearRegression", "LinearRegressionAffine") for metric in metrics: print(metric.get_name(), metric(target=y_real, prediction=y_lr_predict), metric(target=y_real, prediction=y_lra_predict)) # Metric LinearRegression LinearRegressionAffine # MSE 3.64650918978914 3.4685120979301836 # MedianError 0.007340468339615436 -0.01885682436420999 # MeanError 0.021328249515553076 -0.0025382219110967837 self.assertTrue(True)
def test_ejercicio2y3(self): x = C.ClusterUtils().build_synthetic_cluster(self.CLUSTER_DIM, self.CLUSTER_N_CENTROIDS, self.SAMPLES) uniform = np.random.uniform(0, 1, (self.SAMPLES, self.CLUSTER_DIM)) uniform = np.where(uniform < 0.1, np.NaN, 0) x = x + uniform print(x) DatasetUtils.save_dataset(x, self.SAVE_PATH)
def test_neuralNetwork(self): dataset = DatasetUtils(path='./resources/train_data.csv') train, test = dataset.split(80) y_train = train[:, -1] x_train = train[:, :-1] y_test = test[:, -1][:, None] x_test = test[:, :-1] nn = NeuralNetwork(layers=[ Layer(input_size=2, neurons=3, activation_function=SigmoidActivationFunction()), Layer(input_size=3, neurons=2, activation_function=SigmoidActivationFunction()), Layer(input_size=2, neurons=1, activation_function=SigmoidActivationFunction()) ]) train_error, validation_error = nn.fit(x_train, y_train, n_epochs=3000, learning_rate=0.005, batch_size=16) y_predict = nn.predict(x_test) metrics = [Recall(), Selectivity(), Precision()] for metric in metrics: print(metric.get_name() + ":", metric(y_test, y_predict)) # Recall: 95.74468085106383 # Selectivity: 98.83720930232558 # Precision: 94.73684210526315 plt.figure() plt.plot(train_error, c='red', label='train') plt.plot(validation_error, c='blue', label='validation') plt.legend() # La grafica esta guradada en ./resources/trainning_error.png plt.figure() index_1 = (y_test == 1).reshape(len(y_test)) index_0 = (y_test == 0).reshape(len(y_test)) plt.scatter(x_test[index_1, 0], x_test[index_1, 1], c='red') plt.scatter(x_test[index_0, 0], x_test[index_0, 1], c='blue') PlotUtils().plotDecisionBoundry(x_test, nn, plt) plt.show() if __name__ == '__main__': unittest.main()
def test_with_keras(self): # Comentar para usar GPU tf.config.experimental.set_visible_devices([], 'GPU') dataset = DatasetUtils(path='./resources/train_data.csv') train, test = dataset.split(80) y_train = train[:, -1] x_train = train[:, :-1] y_test = test[:, -1][:, None] x_test = test[:, :-1] model = Sequential() model.add(Dense(128, input_dim=x_train.shape[1])) model.add(Dense(128, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(1, activation="sigmoid")) metrics = [ keras.metrics.Precision(name="precision"), keras.metrics.Recall(name="recall"), keras.metrics.BinaryAccuracy(name="Binary Accuracy") ] model.compile(optimizer=keras.optimizers.Adam(1e-4), loss="binary_crossentropy", metrics=metrics) history = model.fit(x_train, y_train, batch_size=16, epochs=200, verbose=1, validation_data=(x_test, y_test)) PlotUtils().plotHistory(history=history, metrics=metrics, plt=plt) plt.figure() index_1 = (y_test == 1).reshape(len(y_test)) index_0 = (y_test == 0).reshape(len(y_test)) plt.scatter(x_test[index_1, 0], x_test[index_1, 1], c='red') plt.scatter(x_test[index_0, 0], x_test[index_0, 1], c='blue') PlotUtils().plotDecisionBoundry(x_test, model, plt) plt.show()
def test_ejercicio4(self): ds = DatasetUtils(path="clase_8_dataset.csv") ds_train, ds_test = ds.split(80) x_train = ds_train[:, 0:1] y_train = ds_train[:, 1:2] x_test = ds_test[:, 0:1] y_test = ds_test[:, 1:2] order = 3 lr = LinearRegressionAffine(order=order, algorithm=MiniBatchGradientDescent( learning_rate=0.00000000000003, n_epochs=30000, n_batches=30)) lr.fit(x_train, y_train) plot.figure() print("[test_ejercicio4] error", lr.algorithm.error) plot.plot(lr.algorithm.error, c='green', label="Error de entrenamiento") plot.plot(lr.algorithm.validationError, label="Error sobre validación") plot.ylim(top=1000, bottom=0) plot.legend() y_predict = lr.predict(x_test) print("[test_ejercicio4] MSE", MSE()(y_test, y_predict)) plot.figure() plot.scatter(x_test, y_test, c='red', label="target") plot.scatter(x_test, y_predict, c='blue', label="prediccion") x_axis = np.linspace(-400, 400, 1000) x = x_axis[:, None] for i in range(1, order): x = np.append(x, x[:, 0:1]**(i + 1), axis=1) x = np.append(x, np.ones((x.shape[0], 1)), axis=1) model = (lr.model.T @ x.T).T plot.plot(x_axis, model, c='green', label="modelo") plot.legend() plot.show()
def test_ejercicios4a7(self): x = DatasetUtils.load_dataset(self.SAVE_PATH) x_average = np.nanmean(x, axis=0) x = np.where(np.isnan(x), x_average, x) x_l2_norm = np.linalg.norm(x, axis=0) x_mean = x.mean(axis=0) x_std = x.std(axis=0)
def fit(self, x: np.ndarray, y: np.ndarray, n_epochs: int = 40, learning_rate: float = 0.01, batch_size: int = 32): y_shaped = y if y.ndim == 1: y_shaped = y[:, None] train, validation = DatasetUtils(np.concatenate((x, y_shaped), axis=1)).split(88) x_train = train[:, 0:x.shape[1]] y_train = train[:, x.shape[1]:] x_validation = validation[:, 0:x.shape[1]] y_validation = validation[:, x.shape[1]:] n_batches = len(y_train) // batch_size train_error = np.empty((n_epochs, n_batches, batch_size)) validation_error = np.empty((n_epochs, n_batches, len(x_validation))) for epoch in range(n_epochs): for i in range(0, batch_size * n_batches, batch_size): x_batch = x_train[i:(i + batch_size)] y_batch = y_train[i:(i + batch_size)] # Trainning (z_s, a_s) = self.algorithm.forward(x_batch) y_predict = a_s[-1] batch_error = -2 * (y_batch - y_predict) train_error[epoch, int(i / batch_size), :] = batch_error[:, 0] # Validation (_, a_validation_s) = self.algorithm.forward(x_validation) batch_validation_error = -y_validation + a_validation_s[-1] validation_error[ epoch, int(i / batch_size), :] = batch_validation_error[:, 0] (delta_w, delta_b) = self.algorithm.backwards(x_batch, z_s, batch_error) self.algorithm.update(delta_w, delta_b, learning_rate) train_error = np.sum(np.sum(train_error, axis=2), axis=1) / ( train_error.shape[2] * train_error.shape[1]) validation_error = np.sum(np.sum(validation_error, axis=2), axis=1) / ( validation_error.shape[2] * validation_error.shape[1]) return train_error, validation_error
def test_something(self): N_SAMPLES = 150 NOISE = 1 N_ORDERS = 10 EPOCHS = 50 x = np.linspace(0, 2 * np.pi, N_SAMPLES)[:, None] sin = np.sin(x) dataset = np.append(x, sin + np.random.normal(0, NOISE, N_SAMPLES)[:, None], axis=1) x_test: np.ndarray y_predictions = np.zeros(((N_SAMPLES * 0.2).__round__(), EPOCHS, N_ORDERS)) all_x_test = np.zeros(((N_SAMPLES * 0.2).__round__(), N_ORDERS)) lr_models = np.empty((EPOCHS, N_ORDERS), dtype=LinearRegressionAffine) errors = np.zeros((EPOCHS, N_ORDERS)) * np.nan for i in range(0, N_ORDERS): train, test = DatasetUtils(dataset).split(80) x_train = train[:, 0] y_train = train[:, 1] x_test = test[:, 0] y_test = test[:, 1] all_x_test[:, i] = x_test for j in range(0, EPOCHS): lr = LinearRegressionAffine(order=i + 1) lr.fit(x_train, y_train) lr_models[j, i] = lr y_predict = lr.predict(x_test) y_predictions[:, j, i:(i + 1)] = y_predict errors[j, i] = MSE().__call__(y_test[:, None], y_predict) order = np.argmin(np.nanmean(errors, axis=0)) print("El menor MSE se da para el polinomio de grado ", order) target = all_x_test[:, order] targetAxisSorted = np.argsort(target) fig, axs = plot.subplots(2, 1) epochExample = 0 axs[0].plot(y_predictions[targetAxisSorted, epochExample, order], label="mejor prediccion orden " + str(order), ) axs[0].plot(np.sin(np.sort(target)), 'o', label="target") axs[0].legend() axs[1].plot(np.nanmean(errors, axis=0), label="MSE con k-folds") plot.xlabel("Ordel del polinomio") axs[1].legend() plot.show()
def test_ejercicio9(self): x = C.ClusterUtils().build_synthetic_cluster(self.CLUSTER_DIM, self.CLUSTER_N_CENTROIDS, self.SAMPLES, centroids_distance=50) exp = RandomUtils.exp(500, self.SAMPLES).reshape(self.SAMPLES, 1) x = np.append(x, exp, axis=1) x_pca = DatasetUtils(x).pca(2) plot.show() plot.scatter(x_pca[:, 0], x_pca[:, 1])
def test_ejercicio3(self): ds = DatasetUtils(path="clase_8_dataset.csv") ds_train, ds_test = ds.split(80) x_train = ds_train[:, 0:1] y_train = ds_train[:, 1:2] x_test = ds_test[:, 0:1] y_test = ds_test[:, 1:2] plot.figure() minError = 1000000 minErrorOrder = -1 # El problema de regresion lineal se resuelve analiticamente a partir de tratar de minimizar # el error cuadratico medio, con lo cual es logico luego testear los modelos con esa medida del error error = MSE() models = [] for i in range(1, 5): lr = LinearRegressionAffine(order=i, algorithm=LeastSquares()) validation_mean_error, validation_min_error, lr = DatasetUtils.k_folds( x=x_train, y=y_train, k=5, ml_object=lr, error=error) models.append(lr) plot.scatter(i, validation_min_error, c='red', label='Modelo con menor error') plot.scatter(i, validation_mean_error, c='blue', label='Media del error') y_predict = lr.predict(x_test) plot.scatter(i, error(target=y_test, prediction=y_predict), c='green', label='Error de prediccion') if minError > validation_min_error: minError = validation_min_error minErrorOrder = i print("[test_something] El orden del modelo con menor error fue el ", minErrorOrder) # [test_something] El orden del modelo con menor error fue el 3 bestModel = models[minErrorOrder - 1] print("[test_ejercicio3] Mejor modelo", bestModel.model) print("[test_ejercicio3] Mejor modelo MSE", error(target=y_test, prediction=bestModel.predict(x_test))) plot.figure() plot.scatter(x_test, y_test, c='red', label="target") plot.scatter(x_test, bestModel.predict(x_test), c='blue', label="prediccion") x_axis = np.linspace(-400, 400, 1000) x = x_axis[:, None] for i in range(1, minErrorOrder): x = np.append(x, x[:, 0:1]**(i + 1), axis=1) x = np.append(x, np.ones((x.shape[0], 1)), axis=1) model = (bestModel.model.T @ x.T).T plot.plot(x_axis, model, c='green', label="modelo") plot.legend() plot.show()
def test_ejercicio7y8(self): x = DatasetUtils.load_dataset(self.SAVE_PATH) exp = RandomUtils.exp(0.1, self.SAMPLES).reshape(self.SAMPLES, 1) exp = np.append(x, exp, axis=1) plot.hist(exp[:, -1], 50) plot.show()
def test_something(self): ds = DatasetUtils(path="clase_6_dataset.txt") ds_train, ds_test = ds.split(80) x_train = ds_train[:, 0:2] y_train = ds_train[:, 2:3] lc = LinearClassfication() lc.fit(x_train, y_train) lc_sgd = LinearClassfication(algorithm=StochasticGradientDescent( learning_rate=0.015, n_epochs=23000, prediction_function=SigmoidPrediction())) lc_sgd.fit(x_train, y_train) lc_mbgd = LinearClassfication(algorithm=MiniBatchGradientDescent( learning_rate=0.001, n_epochs=100000, n_batches=16, prediction_function=SigmoidPrediction())) lc_mbgd.fit(x_train, y_train) x_test = ds_test[:, 0:2] y_test = ds_test[:, 2:3] y_predict_gd = lc.predict(x_test) y_predict_sgd = lc_sgd.predict(x_test) y_predict_mbgd = lc_mbgd.predict(x_test) percentage = PercentageError() quantity = QuantityError() print("Aciertos GD %: ", percentage(y_test, y_predict_gd), "%") print("Error GD (cantidad de fallos GD): ", quantity(y_test, y_predict_gd)) print("Aciertos SGD %: ", percentage(y_test, y_predict_sgd), "%") print("Error SGD (cantidad de fallos GD): ", quantity(y_test, y_predict_sgd)) print("Aciertos MBGD %: ", percentage(y_test, y_predict_mbgd), "%") print("Error MBGD (cantidad de fallos GD): ", quantity(y_test, y_predict_mbgd)) plot.figure() ones_index = (y_test == 1)[:, 0] zeros_index = (y_test == 0)[:, 0] plot.plot(x_test[ones_index, 0], x_test[ones_index, 1], 'o') plot.plot(x_test[zeros_index, 0], x_test[zeros_index, 1], 'x') x = np.linspace(-0, 200, len(x_train)) y = -lc.model[1] / lc.model[0] * x - lc.model[2] / lc.model[ 0] + np.log(1) y_sgd = -lc_sgd.model[1] / lc_sgd.model[0] * x - lc_sgd.model[ 2] / lc_sgd.model[0] + np.log(1) y_mbgd = -lc_mbgd.model[1] / lc_mbgd.model[0] * x - lc_mbgd.model[ 2] / lc_mbgd.model[0] + np.log(1) plot.plot(x, y, c='green', label='Modelo gradiente descendiente') plot.plot(x, y_sgd, c='blue', label='Modelo gradiente descendiente estocastico') plot.plot(x, y_mbgd, c='red', label='Modelo mini batch gradiente descendiente') plot.legend() plot.figure() ones_index = (y_train == 1)[:, 0] zeros_index = (y_train == 0)[:, 0] plot.plot(x_train[ones_index, 0], x_train[ones_index, 1], 'o') plot.plot(x_train[zeros_index, 0], x_train[zeros_index, 1], 'x') x = np.linspace(-0, 200, len(x_train)) y = -lc.model[1] / lc.model[0] * x - lc.model[2] / lc.model[ 0] + np.log(1) y_sgd = -lc_sgd.model[1] / lc_sgd.model[0] * x - lc_sgd.model[ 2] / lc_sgd.model[0] + np.log(1) y_mbgd = -lc_mbgd.model[1] / lc_mbgd.model[0] * x - lc_mbgd.model[ 2] / lc_mbgd.model[0] + np.log(1) plot.plot(x, y, c='green', label='Modelo gradiente descendiente') plot.plot(x, y_sgd, c='blue', label='Modelo gradiente descendiente estocastico') plot.plot(x, y_mbgd, c='red', label='Modelo mini batch gradiente descendiente ') plot.legend() plot.figure() plot.plot(lc.algorithm.error, c='green', label="gd") plot.plot(lc_sgd.algorithm.error, c='blue', label="sgd") plot.plot(lc_mbgd.algorithm.error, c='red', label="mbgd") plot.legend() plot.show()