예제 #1
0
class TestAutoEncoder(object):
    def __init__(self, network_params=None, dropout_in=0.0):
        logger.info("Testeo de AutoEncoder con datos de Iris")
        # Datos
        logger.info("Cargando datos...")
        dataset = LocalLabeledDataSet()
        path = os.path.abspath(os.path.join(os.path.realpath(__file__),
                                            os.path.pardir,
                                            os.pardir,
                                            'examples/datasets/iris.csv'))
        dataset.load_file(path)
        self.train, self.valid, self.test = dataset.split_data([.5, .3, .2])
        self.train = self.train.collect()
        self.valid = self.valid.collect()

        # Modelo
        if network_params is None:
            network_params = NetworkParameters(units_layers=[4, 8], activation=['ReLU', 'ReLU'],
                                               classification=False)
        self.model = AutoEncoder(network_params, dropout_in=dropout_in)

    def _fit(self, opt_params=None, stops=None, mini_batch=30, parallelism=2):
        if opt_params is None:
            opt_params = OptimizerParameters(algorithm='Adadelta')
        if stops is None:
            stops = [criterion['MaxIterations'](30),
                     criterion['AchieveTolerance'](0.95, key='hits')]
        logger.info("Entrenando modelo...")
        hits_valid = self.model.fit(self.train, self.valid, valid_iters=1, mini_batch=mini_batch,
                                    parallelism=parallelism, stops=stops,optimizer_params=opt_params,
                                    keep_best=True, reproducible=True)
        return hits_valid

    def _test_backprop(self, x, y):
        # Dado que esta funcion se llama en una transformación de RDD, no es rastreada por Python
        # por lo que es mejor hacerle unittest de forma que sea trazada en el coverage code.
        cost, (nabla_w, nabla_b) = self.model._backprop(x, y)
        assert type(cost) is float or isinstance(cost, np.float)
        assert type(nabla_w[0]) is LocalNeurons
        assert type(nabla_b[0]) is LocalNeurons

    def test_fitting(self, opt_params=None, stops=None, mini_batch=30, parallelism=1):
        logger.info("Testeando backpropagation en AutoEncoders...")
        x = self.train[0].features
        y = self.train[0].label
        self._test_backprop(x, y)

        hits_valid = self._fit(opt_params=opt_params, stops=stops, mini_batch=mini_batch, parallelism=parallelism)
        logger.info("Asegurando salidas correctas...")
        assert hits_valid > 0.8
        hits_test = self.model.evaluate(self.test, predictions=False, measure='R2')
        assert hits_test > 0.8
        logger.info("OK")
예제 #2
0
    def __init__(self, network_params=None, dropout_in=0.0):
        logger.info("Testeo de AutoEncoder con datos de Iris")
        # Datos
        logger.info("Cargando datos...")
        dataset = LocalLabeledDataSet()
        path = os.path.abspath(os.path.join(os.path.realpath(__file__),
                                            os.path.pardir,
                                            os.pardir,
                                            'examples/datasets/iris.csv'))
        dataset.load_file(path)
        self.train, self.valid, self.test = dataset.split_data([.5, .3, .2])
        self.train = self.train.collect()
        self.valid = self.valid.collect()

        # Modelo
        if network_params is None:
            network_params = NetworkParameters(units_layers=[4, 8], activation=['ReLU', 'ReLU'],
                                               classification=False)
        self.model = AutoEncoder(network_params, dropout_in=dropout_in)