Esempio n. 1
0
    def __init__(self, type_model=None, net_params=None, opt_params=None, n_layers=0, n_iter=10, seed=123):

        # Datos
        logger.info("Cargando datos...")
        data = load_iris()
        dataset = LocalLabeledDataSet(data)
        self.train, self.valid, self.test = dataset.split_data([.5, .3, .2])
        self.train = self.train.collect()
        self.valid = self.valid.collect()
        self.test = self.test.collect()

        # Configuracion de optimizacion
        if opt_params is None:  # Por defecto, se utiliza Adadelta
            stops = [criterion['MaxIterations'](10),
                     criterion['AchieveTolerance'](0.95, key='hits')]
            options = {'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8}
            opt_params = OptimizerParameters(algorithm='Adadelta', stops=stops,
                                             options=options, merge_criter='w_avg')
        self.opt_params = opt_params

        # Configuracion de modelo a optimizar
        if type_model is None:
            type_model = NeuralNetwork
        self.type_model = type_model
        if net_params is None:
            net_params = NetworkParameters(units_layers=[4, 10, 3], activation=False, dropout_ratios=True,
                                           classification=True, strength_l1=True, strength_l2=True, seed=seed)
        self.net_params = net_params

        # Configuracion del Random Search
        self.rnd_search = RandomSearch(self.net_params, n_layers, n_iter, net_domain=None, seed=seed)
Esempio n. 2
0
    def test_parallelism(self, mini_batch=10):
        logger.info("Testeando variantes del nivel de paralelismo...")

        # Datos
        logger.info("Datos utilizados: Iris")
        data = load_iris()
        dataset = LocalLabeledDataSet(data)
        self.train, self.valid, self.test = dataset.split_data([.5, .3, .2])
        self.valid = self.valid.collect()

        # Optimizacion
        options = {'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8}
        opt_params = OptimizerParameters(algorithm='Adadelta', options=options)
        stops = [criterion['MaxIterations'](10)]

        # Niveles de paralelismo
        parallelism = [-1, 0, 2]

        for p in parallelism:
            logger.info("Seteando paralelismo en %i", p)
            hits_valid = self._fit(opt_params=opt_params, stops=stops, mini_batch=mini_batch, parallelism=p)
            logger.info("Asegurando salidas correctas...")
            assert hits_valid > 0.7

            hits_test, pred_test = self.model.evaluate(self.test, predictions=True, measure='R2')
            assert hits_test > 0.7

            logger.info("OK")
        return
Esempio n. 3
0
def test_pca():
    logger.info("Testeando PCA...")
    data = load_iris()

    # Testeo de funcionalidades
    features = map(lambda lp: lp.features, data)
    pca = PCA(features, threshold_k=0.99)
    assert pca.k == 2
    transformed = pca.transform(k=3, standarize=True, whitening=True)
    assert len(transformed[0]) == 3

    # Testeo de soporte para DataSets
    local_data = LocalLabeledDataSet(data)
    pca_loc = PCA(local_data.features)
    distributed_data = DistributedLabeledDataSet(data)
    pca_dist = PCA(distributed_data.features.collect())
    assert np.array_equiv(pca_loc.transform(k=3, data=local_data).features,
                          pca_dist.transform(k=3, data=distributed_data).features.collect())
Esempio n. 4
0
def test_pca():
    logger.info("Testeando PCA...")
    data = load_iris()

    # Testeo de funcionalidades
    features = map(lambda lp: lp.features, data)
    pca = PCA(features, threshold_k=0.99)
    assert pca.k == 2
    transformed = pca.transform(k=3, standarize=True, whitening=True)
    assert len(transformed[0]) == 3

    # Testeo de soporte para DataSets
    local_data = LocalLabeledDataSet(data)
    pca_loc = PCA(local_data.features)
    distributed_data = DistributedLabeledDataSet(data)
    pca_dist = PCA(distributed_data.features.collect())
    assert np.array_equiv(
        pca_loc.transform(k=3, data=local_data).features,
        pca_dist.transform(k=3, data=distributed_data).features.collect())
Esempio n. 5
0
def test_stdscaler():
    logger.info("Testeando StandardScaler...")
    data = load_iris()

    # Testeo de funcionalidades
    features = map(lambda lp: lp.features, data)
    stdsc = StandardScaler(mean=True, std=True)
    stdsc.fit(features)
    transformed = stdsc.transform(features)
    assert np.isclose(np.mean(transformed), 0, rtol=1e-8)

    # Testeo de soporte para DataSets
    local_data = LocalLabeledDataSet(data)
    stdsc.fit(local_data.features)
    local_transformed = stdsc.transform(local_data)
    distributed_data = DistributedLabeledDataSet(data)
    stdsc.fit(distributed_data)
    distrib_transformed = stdsc.transform(distributed_data)
    assert np.allclose(local_transformed.features, distrib_transformed.features.collect())
Esempio n. 6
0
def test_stdscaler():
    logger.info("Testeando StandardScaler...")
    data = load_iris()

    # Testeo de funcionalidades
    features = map(lambda lp: lp.features, data)
    stdsc = StandardScaler(mean=True, std=True)
    stdsc.fit(features)
    transformed = stdsc.transform(features)
    assert np.isclose(np.mean(transformed), 0, rtol=1e-8)

    # Testeo de soporte para DataSets
    local_data = LocalLabeledDataSet(data)
    stdsc.fit(local_data.features)
    local_transformed = stdsc.transform(local_data)
    distributed_data = DistributedLabeledDataSet(data)
    stdsc.fit(distributed_data)
    distrib_transformed = stdsc.transform(distributed_data)
    assert np.allclose(local_transformed.features,
                       distrib_transformed.features.collect())
Esempio n. 7
0
    def test_parallelism(self, mini_batch=10):
        logger.info("Testeando variantes del nivel de paralelismo...")

        # Datos
        logger.info("Datos utilizados: Iris")
        data = load_iris()
        dataset = LocalLabeledDataSet(data)
        self.train, self.valid, self.test = dataset.split_data([.5, .3, .2])
        self.valid = self.valid.collect()

        # Optimizacion
        options = {
            'step-rate': 1.0,
            'decay': 0.995,
            'momentum': 0.3,
            'offset': 1e-8
        }
        opt_params = OptimizerParameters(algorithm='Adadelta', options=options)
        stops = [criterion['MaxIterations'](10)]

        # Niveles de paralelismo
        parallelism = [-1, 0, 2]

        for p in parallelism:
            logger.info("Seteando paralelismo en %i", p)
            hits_valid = self._fit(opt_params=opt_params,
                                   stops=stops,
                                   mini_batch=mini_batch,
                                   parallelism=p)
            logger.info("Asegurando salidas correctas...")
            assert hits_valid > 0.7

            hits_test, pred_test = self.model.evaluate(self.test,
                                                       predictions=True,
                                                       measure='R2')
            assert hits_test > 0.7

            logger.info("OK")
        return
Esempio n. 8
0
    def __init__(self, opt_params=None):
        logger.info("Testeo de Optimizer con datos de Iris")
        # Datos
        logger.info("Cargando datos...")
        data = load_iris()
        dataset = LocalLabeledDataSet(data)
        self.train, self.valid, self.test = dataset.split_data([.5, .3, .2])
        self.train = self.train.collect()
        self.valid = self.valid.collect()
        self.test = self.test.collect()

        # Configuracion de optimizacion
        if opt_params is None:  # Por defecto, se utiliza Adadelta
            stops = [criterion['MaxIterations'](10),
                     criterion['AchieveTolerance'](0.95, key='hits')]
            options = {'step-rate': 1.0, 'decay': 0.99, 'momentum': 0.3, 'offset': 1e-8}
            opt_params = OptimizerParameters(algorithm='Adadelta', stops=stops,
                                             options=options, merge_criter='w_avg')
        self.opt_params = opt_params

        # Configuracion de modelo
        net_params = NetworkParameters(units_layers=[4, 10, 3], activation='ReLU', strength_l1=1e-5, strength_l2=3e-4,
                                       dropout_ratios=[0.2, 0.0], classification=True)
        self.model = NeuralNetwork(net_params)
Esempio n. 9
0
from learninspy.utils.plots import plot_fitting
from learninspy.utils.fileio import get_logger

import os

logger = get_logger(name='learninspy-demo_iris')

# Aca conviene hacer de demo:
# *Examinar diferencias en resultados con diferentes funciones de consenso
# *Explorar criterios de corte
# ** MaxIterations de 5 a 10 cambia mucho la duracion final

# -- 1) Carga de datos

logger.info("Cargando datos de Iris ...")
dataset = load_iris()
dataset = LocalLabeledDataSet(dataset)
rows, cols = dataset.shape
logger.info("Dimension de datos: %i x %i", rows, cols)

train, valid, test = dataset.split_data([0.7, 0.1,
                                         0.2])  # Particiono en conjuntos

# -- 2) Selección de parámetros

# --- 2.a) Parámetros de red neuronal
net_params = NetworkParameters(units_layers=[4, 8, 3],
                               dropout_ratios=[0.0, 0.0],
                               activation='ReLU',
                               strength_l1=1e-5,
                               strength_l2=3e-4,
Esempio n. 10
0
from learninspy.utils.fileio import get_logger

import os

logger = get_logger(name='learninspy-demo_iris')

# Aca conviene hacer de demo:
# *Examinar diferencias en resultados con diferentes funciones de consenso
# *Explorar criterios de corte
# ** MaxIterations de 5 a 10 cambia mucho la duracion final


# -- 1) Carga de datos

logger.info("Cargando datos de Iris ...")
dataset = load_iris()
dataset = LocalLabeledDataSet(dataset)
rows, cols = dataset.shape
logger.info("Dimension de datos: %i x %i", rows, cols)

train, valid, test = dataset.split_data([0.7, 0.1, 0.2])  # Particiono en conjuntos

# -- 2) Selección de parámetros

# --- 2.a) Parámetros de red neuronal
net_params = NetworkParameters(units_layers=[4, 8, 3], dropout_ratios=[0.0, 0.0],
                               activation='ReLU', strength_l1=1e-5, strength_l2=3e-4,
                               classification=True, seed=123)

# --- 2.b) Parámetros de optimización
local_stops = [criterion['MaxIterations'](10),