Python DatasetUtilsの例、src.utils.DatasetUtils.DatasetUtils Pythonの例

コード例 #1

0

ファイルを表示

    def test_something(self):
        N = 5
        lr_prediction_error = []
        lra_prediction_error = []
        ds = DatasetUtils(path="../clase3/resources/dataset.csv")

        ds_train, ds_test = ds.split(80)

        for j in range(1, N):
            x_train = ds_train[:, 1:2]
            x_test = ds_test[:, 1:2]
            for i in range(1, j):
                x_train = np.append(x_train, ds_train[:, 1:2]**(i + 1), axis=1)
                x_test = np.append(x_test, ds_test[:, 1:2]**(i + 1), axis=1)

            y_test = ds_test[:, 2:3]
            y_train = ds_train[:, 2:3]
            lr = LinearRegression()
            lr.fit(x_train, y_train)
            y_lr_predict = lr.predict(x_test)
            lr_prediction_error.append(MSE()(y_test, y_lr_predict))

            lra = LinearRegressionAffine()
            lra.fit(x_train, y_train)
            y_lra_predict = lra.predict(x_test)
            lra_prediction_error.append(MSE()(y_test, y_lra_predict))

        plot.figure("MSE")
        # plot.plot(range(1, N), lr_prediction_error)
        plot.plot(range(1, N), lra_prediction_error)
        plot.xlabel("Cantidad de dimensiones")
        plot.ylabel("MSE")
        plot.show()

コード例 #2

0

ファイルを表示

    def test_something(self):
        ds = DatasetUtils(path="resources/dataset.csv")

        x_train, x_test = ds.split(80)
        y_real = x_test[:, 2]

        lr = LinearRegression()
        lr.fit(x_train[:, 1], x_train[:, 2])
        y_lr_predict = lr.predict(x_test[:, 1])

        lra = LinearRegressionAffine()
        lra.fit(x_train[:, 1], x_train[:, 2])
        y_lra_predict = lra.predict(x_test[:, 1])

        plot.scatter(x_test[:, 1], y_real, c="blue")
        plot.scatter(x_test[:, 1], y_lr_predict, c="red")
        plot.scatter(x_test[:, 1], y_lra_predict, c="yellow")
        plot.show()

        metrics = [MSE(), MedianError(), MeanError()]

        print("Metric", "LinearRegression", "LinearRegressionAffine")
        for metric in metrics:
            print(metric.get_name(),
                  metric(target=y_real, prediction=y_lr_predict),
                  metric(target=y_real, prediction=y_lra_predict))

        # Metric        LinearRegression        LinearRegressionAffine
        # MSE           3.64650918978914        3.4685120979301836
        # MedianError   0.007340468339615436    -0.01885682436420999
        # MeanError     0.021328249515553076    -0.0025382219110967837
        self.assertTrue(True)

コード例 #3

0

ファイルを表示

 def test_ejercicio2y3(self):
     x = C.ClusterUtils().build_synthetic_cluster(self.CLUSTER_DIM,
                                                  self.CLUSTER_N_CENTROIDS,
                                                  self.SAMPLES)
     uniform = np.random.uniform(0, 1, (self.SAMPLES, self.CLUSTER_DIM))
     uniform = np.where(uniform < 0.1, np.NaN, 0)
     x = x + uniform
     print(x)
     DatasetUtils.save_dataset(x, self.SAVE_PATH)

コード例 #4

0

ファイルを表示

    def test_neuralNetwork(self):
        dataset = DatasetUtils(path='./resources/train_data.csv')
        train, test = dataset.split(80)

        y_train = train[:, -1]
        x_train = train[:, :-1]

        y_test = test[:, -1][:, None]
        x_test = test[:, :-1]

        nn = NeuralNetwork(layers=[
            Layer(input_size=2,
                  neurons=3,
                  activation_function=SigmoidActivationFunction()),
            Layer(input_size=3,
                  neurons=2,
                  activation_function=SigmoidActivationFunction()),
            Layer(input_size=2,
                  neurons=1,
                  activation_function=SigmoidActivationFunction())
        ])

        train_error, validation_error = nn.fit(x_train,
                                               y_train,
                                               n_epochs=3000,
                                               learning_rate=0.005,
                                               batch_size=16)

        y_predict = nn.predict(x_test)

        metrics = [Recall(), Selectivity(), Precision()]
        for metric in metrics:
            print(metric.get_name() + ":", metric(y_test, y_predict))
        # Recall: 95.74468085106383
        # Selectivity: 98.83720930232558
        # Precision: 94.73684210526315

        plt.figure()
        plt.plot(train_error, c='red', label='train')
        plt.plot(validation_error, c='blue', label='validation')
        plt.legend()
        # La grafica esta guradada en ./resources/trainning_error.png
        plt.figure()

        index_1 = (y_test == 1).reshape(len(y_test))
        index_0 = (y_test == 0).reshape(len(y_test))
        plt.scatter(x_test[index_1, 0], x_test[index_1, 1], c='red')
        plt.scatter(x_test[index_0, 0], x_test[index_0, 1], c='blue')

        PlotUtils().plotDecisionBoundry(x_test, nn, plt)

        plt.show()

        if __name__ == '__main__':
            unittest.main()

コード例 #5

0

ファイルを表示

    def test_with_keras(self):
        # Comentar para usar GPU
        tf.config.experimental.set_visible_devices([], 'GPU')
        dataset = DatasetUtils(path='./resources/train_data.csv')
        train, test = dataset.split(80)

        y_train = train[:, -1]
        x_train = train[:, :-1]

        y_test = test[:, -1][:, None]
        x_test = test[:, :-1]

        model = Sequential()

        model.add(Dense(128, input_dim=x_train.shape[1]))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(16, activation='relu'))
        model.add(Dense(1, activation="sigmoid"))

        metrics = [
            keras.metrics.Precision(name="precision"),
            keras.metrics.Recall(name="recall"),
            keras.metrics.BinaryAccuracy(name="Binary Accuracy")
        ]

        model.compile(optimizer=keras.optimizers.Adam(1e-4),
                      loss="binary_crossentropy",
                      metrics=metrics)
        history = model.fit(x_train,
                            y_train,
                            batch_size=16,
                            epochs=200,
                            verbose=1,
                            validation_data=(x_test, y_test))

        PlotUtils().plotHistory(history=history, metrics=metrics, plt=plt)

        plt.figure()
        index_1 = (y_test == 1).reshape(len(y_test))
        index_0 = (y_test == 0).reshape(len(y_test))
        plt.scatter(x_test[index_1, 0], x_test[index_1, 1], c='red')
        plt.scatter(x_test[index_0, 0], x_test[index_0, 1], c='blue')

        PlotUtils().plotDecisionBoundry(x_test, model, plt)

        plt.show()

コード例 #6

0

ファイルを表示

    def test_ejercicio4(self):
        ds = DatasetUtils(path="clase_8_dataset.csv")
        ds_train, ds_test = ds.split(80)

        x_train = ds_train[:, 0:1]
        y_train = ds_train[:, 1:2]

        x_test = ds_test[:, 0:1]
        y_test = ds_test[:, 1:2]

        order = 3
        lr = LinearRegressionAffine(order=order,
                                    algorithm=MiniBatchGradientDescent(
                                        learning_rate=0.00000000000003,
                                        n_epochs=30000,
                                        n_batches=30))

        lr.fit(x_train, y_train)

        plot.figure()
        print("[test_ejercicio4] error", lr.algorithm.error)
        plot.plot(lr.algorithm.error,
                  c='green',
                  label="Error de entrenamiento")
        plot.plot(lr.algorithm.validationError, label="Error sobre validación")
        plot.ylim(top=1000, bottom=0)
        plot.legend()

        y_predict = lr.predict(x_test)
        print("[test_ejercicio4] MSE", MSE()(y_test, y_predict))

        plot.figure()
        plot.scatter(x_test, y_test, c='red', label="target")
        plot.scatter(x_test, y_predict, c='blue', label="prediccion")

        x_axis = np.linspace(-400, 400, 1000)
        x = x_axis[:, None]
        for i in range(1, order):
            x = np.append(x, x[:, 0:1]**(i + 1), axis=1)
        x = np.append(x, np.ones((x.shape[0], 1)), axis=1)

        model = (lr.model.T @ x.T).T
        plot.plot(x_axis, model, c='green', label="modelo")
        plot.legend()
        plot.show()

コード例 #7

0

ファイルを表示

    def test_ejercicios4a7(self):
        x = DatasetUtils.load_dataset(self.SAVE_PATH)
        x_average = np.nanmean(x, axis=0)

        x = np.where(np.isnan(x), x_average, x)

        x_l2_norm = np.linalg.norm(x, axis=0)
        x_mean = x.mean(axis=0)
        x_std = x.std(axis=0)

コード例 #8

0

ファイルを表示

    def fit(self,
            x: np.ndarray,
            y: np.ndarray,
            n_epochs: int = 40,
            learning_rate: float = 0.01,
            batch_size: int = 32):

        y_shaped = y
        if y.ndim == 1:
            y_shaped = y[:, None]

        train, validation = DatasetUtils(np.concatenate((x, y_shaped),
                                                        axis=1)).split(88)

        x_train = train[:, 0:x.shape[1]]
        y_train = train[:, x.shape[1]:]

        x_validation = validation[:, 0:x.shape[1]]
        y_validation = validation[:, x.shape[1]:]

        n_batches = len(y_train) // batch_size
        train_error = np.empty((n_epochs, n_batches, batch_size))
        validation_error = np.empty((n_epochs, n_batches, len(x_validation)))
        for epoch in range(n_epochs):
            for i in range(0, batch_size * n_batches, batch_size):
                x_batch = x_train[i:(i + batch_size)]
                y_batch = y_train[i:(i + batch_size)]

                # Trainning
                (z_s, a_s) = self.algorithm.forward(x_batch)

                y_predict = a_s[-1]
                batch_error = -2 * (y_batch - y_predict)
                train_error[epoch, int(i / batch_size), :] = batch_error[:, 0]

                # Validation
                (_, a_validation_s) = self.algorithm.forward(x_validation)
                batch_validation_error = -y_validation + a_validation_s[-1]
                validation_error[
                    epoch, int(i / batch_size), :] = batch_validation_error[:,
                                                                            0]

                (delta_w,
                 delta_b) = self.algorithm.backwards(x_batch, z_s, batch_error)
                self.algorithm.update(delta_w, delta_b, learning_rate)

        train_error = np.sum(np.sum(train_error, axis=2), axis=1) / (
            train_error.shape[2] * train_error.shape[1])
        validation_error = np.sum(np.sum(validation_error, axis=2), axis=1) / (
            validation_error.shape[2] * validation_error.shape[1])

        return train_error, validation_error

コード例 #9

0

ファイルを表示

    def test_something(self):
        N_SAMPLES = 150
        NOISE = 1
        N_ORDERS = 10
        EPOCHS = 50

        x = np.linspace(0, 2 * np.pi, N_SAMPLES)[:, None]
        sin = np.sin(x)
        dataset = np.append(x, sin + np.random.normal(0, NOISE, N_SAMPLES)[:, None], axis=1)

        x_test: np.ndarray

        y_predictions = np.zeros(((N_SAMPLES * 0.2).__round__(), EPOCHS, N_ORDERS))
        all_x_test = np.zeros(((N_SAMPLES * 0.2).__round__(), N_ORDERS))
        lr_models = np.empty((EPOCHS, N_ORDERS), dtype=LinearRegressionAffine)
        errors = np.zeros((EPOCHS, N_ORDERS)) * np.nan

        for i in range(0, N_ORDERS):
            train, test = DatasetUtils(dataset).split(80)
            x_train = train[:, 0]
            y_train = train[:, 1]

            x_test = test[:, 0]
            y_test = test[:, 1]
            all_x_test[:, i] = x_test
            for j in range(0, EPOCHS):
                lr = LinearRegressionAffine(order=i + 1)
                lr.fit(x_train, y_train)
                lr_models[j, i] = lr

                y_predict = lr.predict(x_test)
                y_predictions[:, j, i:(i + 1)] = y_predict

                errors[j, i] = MSE().__call__(y_test[:, None], y_predict)

        order = np.argmin(np.nanmean(errors, axis=0))
        print("El menor MSE se da para el polinomio de grado ", order)

        target = all_x_test[:, order]
        targetAxisSorted = np.argsort(target)

        fig, axs = plot.subplots(2, 1)
        epochExample = 0
        axs[0].plot(y_predictions[targetAxisSorted, epochExample, order],
                    label="mejor prediccion orden " + str(order), )
        axs[0].plot(np.sin(np.sort(target)), 'o', label="target")
        axs[0].legend()

        axs[1].plot(np.nanmean(errors, axis=0), label="MSE con k-folds")
        plot.xlabel("Ordel del polinomio")
        axs[1].legend()
        plot.show()

コード例 #10

0

ファイルを表示

    def test_ejercicio9(self):
        x = C.ClusterUtils().build_synthetic_cluster(self.CLUSTER_DIM,
                                                     self.CLUSTER_N_CENTROIDS,
                                                     self.SAMPLES,
                                                     centroids_distance=50)

        exp = RandomUtils.exp(500, self.SAMPLES).reshape(self.SAMPLES, 1)
        x = np.append(x, exp, axis=1)

        x_pca = DatasetUtils(x).pca(2)

        plot.show()
        plot.scatter(x_pca[:, 0], x_pca[:, 1])

コード例 #11

0

ファイルを表示

    def test_ejercicio3(self):
        ds = DatasetUtils(path="clase_8_dataset.csv")
        ds_train, ds_test = ds.split(80)

        x_train = ds_train[:, 0:1]
        y_train = ds_train[:, 1:2]

        x_test = ds_test[:, 0:1]
        y_test = ds_test[:, 1:2]

        plot.figure()
        minError = 1000000
        minErrorOrder = -1

        # El problema de regresion lineal se resuelve analiticamente a partir de tratar de minimizar
        # el error cuadratico medio, con lo cual es logico luego testear los modelos con esa medida del error
        error = MSE()
        models = []
        for i in range(1, 5):
            lr = LinearRegressionAffine(order=i, algorithm=LeastSquares())
            validation_mean_error, validation_min_error, lr = DatasetUtils.k_folds(
                x=x_train, y=y_train, k=5, ml_object=lr, error=error)

            models.append(lr)
            plot.scatter(i,
                         validation_min_error,
                         c='red',
                         label='Modelo con menor error')
            plot.scatter(i,
                         validation_mean_error,
                         c='blue',
                         label='Media del error')

            y_predict = lr.predict(x_test)

            plot.scatter(i,
                         error(target=y_test, prediction=y_predict),
                         c='green',
                         label='Error de prediccion')

            if minError > validation_min_error:
                minError = validation_min_error
                minErrorOrder = i

        print("[test_something] El orden del modelo con menor error fue el ",
              minErrorOrder)
        # [test_something] El orden del modelo con menor error fue el  3

        bestModel = models[minErrorOrder - 1]
        print("[test_ejercicio3] Mejor modelo", bestModel.model)
        print("[test_ejercicio3] Mejor modelo MSE",
              error(target=y_test, prediction=bestModel.predict(x_test)))
        plot.figure()
        plot.scatter(x_test, y_test, c='red', label="target")
        plot.scatter(x_test,
                     bestModel.predict(x_test),
                     c='blue',
                     label="prediccion")

        x_axis = np.linspace(-400, 400, 1000)
        x = x_axis[:, None]
        for i in range(1, minErrorOrder):
            x = np.append(x, x[:, 0:1]**(i + 1), axis=1)
        x = np.append(x, np.ones((x.shape[0], 1)), axis=1)

        model = (bestModel.model.T @ x.T).T
        plot.plot(x_axis, model, c='green', label="modelo")
        plot.legend()
        plot.show()

コード例 #12

0

ファイルを表示

 def test_ejercicio7y8(self):
     x = DatasetUtils.load_dataset(self.SAVE_PATH)
     exp = RandomUtils.exp(0.1, self.SAMPLES).reshape(self.SAMPLES, 1)
     exp = np.append(x, exp, axis=1)
     plot.hist(exp[:, -1], 50)
     plot.show()

コード例 #13

0

ファイルを表示

    def test_something(self):
        ds = DatasetUtils(path="clase_6_dataset.txt")
        ds_train, ds_test = ds.split(80)

        x_train = ds_train[:, 0:2]
        y_train = ds_train[:, 2:3]

        lc = LinearClassfication()
        lc.fit(x_train, y_train)

        lc_sgd = LinearClassfication(algorithm=StochasticGradientDescent(
            learning_rate=0.015,
            n_epochs=23000,
            prediction_function=SigmoidPrediction()))
        lc_sgd.fit(x_train, y_train)

        lc_mbgd = LinearClassfication(algorithm=MiniBatchGradientDescent(
            learning_rate=0.001,
            n_epochs=100000,
            n_batches=16,
            prediction_function=SigmoidPrediction()))
        lc_mbgd.fit(x_train, y_train)

        x_test = ds_test[:, 0:2]
        y_test = ds_test[:, 2:3]

        y_predict_gd = lc.predict(x_test)
        y_predict_sgd = lc_sgd.predict(x_test)
        y_predict_mbgd = lc_mbgd.predict(x_test)

        percentage = PercentageError()
        quantity = QuantityError()
        print("Aciertos GD %: ", percentage(y_test, y_predict_gd), "%")
        print("Error GD (cantidad de fallos GD): ",
              quantity(y_test, y_predict_gd))

        print("Aciertos SGD %: ", percentage(y_test, y_predict_sgd), "%")
        print("Error SGD (cantidad de fallos GD): ",
              quantity(y_test, y_predict_sgd))

        print("Aciertos MBGD %: ", percentage(y_test, y_predict_mbgd), "%")
        print("Error MBGD (cantidad de fallos GD): ",
              quantity(y_test, y_predict_mbgd))

        plot.figure()
        ones_index = (y_test == 1)[:, 0]
        zeros_index = (y_test == 0)[:, 0]
        plot.plot(x_test[ones_index, 0], x_test[ones_index, 1], 'o')
        plot.plot(x_test[zeros_index, 0], x_test[zeros_index, 1], 'x')

        x = np.linspace(-0, 200, len(x_train))
        y = -lc.model[1] / lc.model[0] * x - lc.model[2] / lc.model[
            0] + np.log(1)
        y_sgd = -lc_sgd.model[1] / lc_sgd.model[0] * x - lc_sgd.model[
            2] / lc_sgd.model[0] + np.log(1)
        y_mbgd = -lc_mbgd.model[1] / lc_mbgd.model[0] * x - lc_mbgd.model[
            2] / lc_mbgd.model[0] + np.log(1)

        plot.plot(x, y, c='green', label='Modelo gradiente descendiente')
        plot.plot(x,
                  y_sgd,
                  c='blue',
                  label='Modelo gradiente descendiente estocastico')
        plot.plot(x,
                  y_mbgd,
                  c='red',
                  label='Modelo mini batch gradiente descendiente')
        plot.legend()

        plot.figure()
        ones_index = (y_train == 1)[:, 0]
        zeros_index = (y_train == 0)[:, 0]
        plot.plot(x_train[ones_index, 0], x_train[ones_index, 1], 'o')
        plot.plot(x_train[zeros_index, 0], x_train[zeros_index, 1], 'x')

        x = np.linspace(-0, 200, len(x_train))
        y = -lc.model[1] / lc.model[0] * x - lc.model[2] / lc.model[
            0] + np.log(1)
        y_sgd = -lc_sgd.model[1] / lc_sgd.model[0] * x - lc_sgd.model[
            2] / lc_sgd.model[0] + np.log(1)
        y_mbgd = -lc_mbgd.model[1] / lc_mbgd.model[0] * x - lc_mbgd.model[
            2] / lc_mbgd.model[0] + np.log(1)
        plot.plot(x, y, c='green', label='Modelo gradiente descendiente')
        plot.plot(x,
                  y_sgd,
                  c='blue',
                  label='Modelo gradiente descendiente estocastico')
        plot.plot(x,
                  y_mbgd,
                  c='red',
                  label='Modelo mini batch gradiente descendiente ')
        plot.legend()

        plot.figure()
        plot.plot(lc.algorithm.error, c='green', label="gd")
        plot.plot(lc_sgd.algorithm.error, c='blue', label="sgd")
        plot.plot(lc_mbgd.algorithm.error, c='red', label="mbgd")
        plot.legend()
        plot.show()