def __init__(self, type_model=None, net_params=None, opt_params=None, n_layers=0, n_iter=10, seed=123): # Datos logger.info("Cargando datos...") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.train = self.train.collect() self.valid = self.valid.collect() self.test = self.test.collect() # Configuracion de optimizacion if opt_params is None: # Por defecto, se utiliza Adadelta stops = [criterion['MaxIterations'](10), criterion['AchieveTolerance'](0.95, key='hits')] options = {'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8} opt_params = OptimizerParameters(algorithm='Adadelta', stops=stops, options=options, merge_criter='w_avg') self.opt_params = opt_params # Configuracion de modelo a optimizar if type_model is None: type_model = NeuralNetwork self.type_model = type_model if net_params is None: net_params = NetworkParameters(units_layers=[4, 10, 3], activation=False, dropout_ratios=True, classification=True, strength_l1=True, strength_l2=True, seed=seed) self.net_params = net_params # Configuracion del Random Search self.rnd_search = RandomSearch(self.net_params, n_layers, n_iter, net_domain=None, seed=seed)
def test_parallelism(self, mini_batch=10): logger.info("Testeando variantes del nivel de paralelismo...") # Datos logger.info("Datos utilizados: Iris") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.valid = self.valid.collect() # Optimizacion options = {'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8} opt_params = OptimizerParameters(algorithm='Adadelta', options=options) stops = [criterion['MaxIterations'](10)] # Niveles de paralelismo parallelism = [-1, 0, 2] for p in parallelism: logger.info("Seteando paralelismo en %i", p) hits_valid = self._fit(opt_params=opt_params, stops=stops, mini_batch=mini_batch, parallelism=p) logger.info("Asegurando salidas correctas...") assert hits_valid > 0.7 hits_test, pred_test = self.model.evaluate(self.test, predictions=True, measure='R2') assert hits_test > 0.7 logger.info("OK") return
def test_pca(): logger.info("Testeando PCA...") data = load_iris() # Testeo de funcionalidades features = map(lambda lp: lp.features, data) pca = PCA(features, threshold_k=0.99) assert pca.k == 2 transformed = pca.transform(k=3, standarize=True, whitening=True) assert len(transformed[0]) == 3 # Testeo de soporte para DataSets local_data = LocalLabeledDataSet(data) pca_loc = PCA(local_data.features) distributed_data = DistributedLabeledDataSet(data) pca_dist = PCA(distributed_data.features.collect()) assert np.array_equiv(pca_loc.transform(k=3, data=local_data).features, pca_dist.transform(k=3, data=distributed_data).features.collect())
def test_pca(): logger.info("Testeando PCA...") data = load_iris() # Testeo de funcionalidades features = map(lambda lp: lp.features, data) pca = PCA(features, threshold_k=0.99) assert pca.k == 2 transformed = pca.transform(k=3, standarize=True, whitening=True) assert len(transformed[0]) == 3 # Testeo de soporte para DataSets local_data = LocalLabeledDataSet(data) pca_loc = PCA(local_data.features) distributed_data = DistributedLabeledDataSet(data) pca_dist = PCA(distributed_data.features.collect()) assert np.array_equiv( pca_loc.transform(k=3, data=local_data).features, pca_dist.transform(k=3, data=distributed_data).features.collect())
def test_stdscaler(): logger.info("Testeando StandardScaler...") data = load_iris() # Testeo de funcionalidades features = map(lambda lp: lp.features, data) stdsc = StandardScaler(mean=True, std=True) stdsc.fit(features) transformed = stdsc.transform(features) assert np.isclose(np.mean(transformed), 0, rtol=1e-8) # Testeo de soporte para DataSets local_data = LocalLabeledDataSet(data) stdsc.fit(local_data.features) local_transformed = stdsc.transform(local_data) distributed_data = DistributedLabeledDataSet(data) stdsc.fit(distributed_data) distrib_transformed = stdsc.transform(distributed_data) assert np.allclose(local_transformed.features, distrib_transformed.features.collect())
def test_parallelism(self, mini_batch=10): logger.info("Testeando variantes del nivel de paralelismo...") # Datos logger.info("Datos utilizados: Iris") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.valid = self.valid.collect() # Optimizacion options = { 'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8 } opt_params = OptimizerParameters(algorithm='Adadelta', options=options) stops = [criterion['MaxIterations'](10)] # Niveles de paralelismo parallelism = [-1, 0, 2] for p in parallelism: logger.info("Seteando paralelismo en %i", p) hits_valid = self._fit(opt_params=opt_params, stops=stops, mini_batch=mini_batch, parallelism=p) logger.info("Asegurando salidas correctas...") assert hits_valid > 0.7 hits_test, pred_test = self.model.evaluate(self.test, predictions=True, measure='R2') assert hits_test > 0.7 logger.info("OK") return
def __init__(self, opt_params=None): logger.info("Testeo de Optimizer con datos de Iris") # Datos logger.info("Cargando datos...") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.train = self.train.collect() self.valid = self.valid.collect() self.test = self.test.collect() # Configuracion de optimizacion if opt_params is None: # Por defecto, se utiliza Adadelta stops = [criterion['MaxIterations'](10), criterion['AchieveTolerance'](0.95, key='hits')] options = {'step-rate': 1.0, 'decay': 0.99, 'momentum': 0.3, 'offset': 1e-8} opt_params = OptimizerParameters(algorithm='Adadelta', stops=stops, options=options, merge_criter='w_avg') self.opt_params = opt_params # Configuracion de modelo net_params = NetworkParameters(units_layers=[4, 10, 3], activation='ReLU', strength_l1=1e-5, strength_l2=3e-4, dropout_ratios=[0.2, 0.0], classification=True) self.model = NeuralNetwork(net_params)
from learninspy.utils.plots import plot_fitting from learninspy.utils.fileio import get_logger import os logger = get_logger(name='learninspy-demo_iris') # Aca conviene hacer de demo: # *Examinar diferencias en resultados con diferentes funciones de consenso # *Explorar criterios de corte # ** MaxIterations de 5 a 10 cambia mucho la duracion final # -- 1) Carga de datos logger.info("Cargando datos de Iris ...") dataset = load_iris() dataset = LocalLabeledDataSet(dataset) rows, cols = dataset.shape logger.info("Dimension de datos: %i x %i", rows, cols) train, valid, test = dataset.split_data([0.7, 0.1, 0.2]) # Particiono en conjuntos # -- 2) Selección de parámetros # --- 2.a) Parámetros de red neuronal net_params = NetworkParameters(units_layers=[4, 8, 3], dropout_ratios=[0.0, 0.0], activation='ReLU', strength_l1=1e-5, strength_l2=3e-4,
from learninspy.utils.fileio import get_logger import os logger = get_logger(name='learninspy-demo_iris') # Aca conviene hacer de demo: # *Examinar diferencias en resultados con diferentes funciones de consenso # *Explorar criterios de corte # ** MaxIterations de 5 a 10 cambia mucho la duracion final # -- 1) Carga de datos logger.info("Cargando datos de Iris ...") dataset = load_iris() dataset = LocalLabeledDataSet(dataset) rows, cols = dataset.shape logger.info("Dimension de datos: %i x %i", rows, cols) train, valid, test = dataset.split_data([0.7, 0.1, 0.2]) # Particiono en conjuntos # -- 2) Selección de parámetros # --- 2.a) Parámetros de red neuronal net_params = NetworkParameters(units_layers=[4, 8, 3], dropout_ratios=[0.0, 0.0], activation='ReLU', strength_l1=1e-5, strength_l2=3e-4, classification=True, seed=123) # --- 2.b) Parámetros de optimización local_stops = [criterion['MaxIterations'](10),