def __init__(self, type_model=None, net_params=None, opt_params=None, n_layers=0, n_iter=10, seed=123): # Datos logger.info("Cargando datos...") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.train = self.train.collect() self.valid = self.valid.collect() self.test = self.test.collect() # Configuracion de optimizacion if opt_params is None: # Por defecto, se utiliza Adadelta stops = [criterion['MaxIterations'](10), criterion['AchieveTolerance'](0.95, key='hits')] options = {'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8} opt_params = OptimizerParameters(algorithm='Adadelta', stops=stops, options=options, merge_criter='w_avg') self.opt_params = opt_params # Configuracion de modelo a optimizar if type_model is None: type_model = NeuralNetwork self.type_model = type_model if net_params is None: net_params = NetworkParameters(units_layers=[4, 10, 3], activation=False, dropout_ratios=True, classification=True, strength_l1=True, strength_l2=True, seed=seed) self.net_params = net_params # Configuracion del Random Search self.rnd_search = RandomSearch(self.net_params, n_layers, n_iter, net_domain=None, seed=seed)
def test_parallelism(self, mini_batch=10): logger.info("Testeando variantes del nivel de paralelismo...") # Datos logger.info("Datos utilizados: Iris") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.valid = self.valid.collect() # Optimizacion options = {'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8} opt_params = OptimizerParameters(algorithm='Adadelta', options=options) stops = [criterion['MaxIterations'](10)] # Niveles de paralelismo parallelism = [-1, 0, 2] for p in parallelism: logger.info("Seteando paralelismo en %i", p) hits_valid = self._fit(opt_params=opt_params, stops=stops, mini_batch=mini_batch, parallelism=p) logger.info("Asegurando salidas correctas...") assert hits_valid > 0.7 hits_test, pred_test = self.model.evaluate(self.test, predictions=True, measure='R2') assert hits_test > 0.7 logger.info("OK") return
def __init__(self, network_params=None, dropout_in=0.0): logger.info("Testeo de AutoEncoder con datos de Iris") # Datos logger.info("Cargando datos...") dataset = LocalLabeledDataSet() path = os.path.abspath(os.path.join(os.path.realpath(__file__), os.path.pardir, os.pardir, 'examples/datasets/iris.csv')) dataset.load_file(path) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.train = self.train.collect() self.valid = self.valid.collect() # Modelo if network_params is None: network_params = NetworkParameters(units_layers=[4, 8], activation=['ReLU', 'ReLU'], classification=False) self.model = AutoEncoder(network_params, dropout_in=dropout_in)
def __init__(self, network_params=None): logger.info("Testeo de NeuralNetwork con datos de Combined Cycle Power Plant") # Datos logger.info("Cargando datos...") data = load_ccpp() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.valid = self.valid.collect() # Modelo if network_params is None: network_params = NetworkParameters(units_layers=[4, 30, 1], activation='ReLU', classification=False, seed=123) self.model = NeuralNetwork(network_params) # Seteo a mano self.model.set_l1(5e-7) self.model.set_l2(3e-4) self.model.set_dropout_ratios([0.0, 0.0])
def __init__(self, network_params=None): logger.info( "Testeo de NeuralNetwork con datos de Combined Cycle Power Plant") # Datos logger.info("Cargando datos...") data = load_ccpp() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.valid = self.valid.collect() # Modelo if network_params is None: network_params = NetworkParameters(units_layers=[4, 30, 1], activation='ReLU', classification=False, seed=123) self.model = NeuralNetwork(network_params) # Seteo a mano self.model.set_l1(5e-7) self.model.set_l2(3e-4) self.model.set_dropout_ratios([0.0, 0.0])
def test_parallelism(self, mini_batch=10): logger.info("Testeando variantes del nivel de paralelismo...") # Datos logger.info("Datos utilizados: Iris") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.valid = self.valid.collect() # Optimizacion options = { 'step-rate': 1.0, 'decay': 0.995, 'momentum': 0.3, 'offset': 1e-8 } opt_params = OptimizerParameters(algorithm='Adadelta', options=options) stops = [criterion['MaxIterations'](10)] # Niveles de paralelismo parallelism = [-1, 0, 2] for p in parallelism: logger.info("Seteando paralelismo en %i", p) hits_valid = self._fit(opt_params=opt_params, stops=stops, mini_batch=mini_batch, parallelism=p) logger.info("Asegurando salidas correctas...") assert hits_valid > 0.7 hits_test, pred_test = self.model.evaluate(self.test, predictions=True, measure='R2') assert hits_test > 0.7 logger.info("OK") return
def __init__(self, opt_params=None): logger.info("Testeo de Optimizer con datos de Iris") # Datos logger.info("Cargando datos...") data = load_iris() dataset = LocalLabeledDataSet(data) self.train, self.valid, self.test = dataset.split_data([.5, .3, .2]) self.train = self.train.collect() self.valid = self.valid.collect() self.test = self.test.collect() # Configuracion de optimizacion if opt_params is None: # Por defecto, se utiliza Adadelta stops = [criterion['MaxIterations'](10), criterion['AchieveTolerance'](0.95, key='hits')] options = {'step-rate': 1.0, 'decay': 0.99, 'momentum': 0.3, 'offset': 1e-8} opt_params = OptimizerParameters(algorithm='Adadelta', stops=stops, options=options, merge_criter='w_avg') self.opt_params = opt_params # Configuracion de modelo net_params = NetworkParameters(units_layers=[4, 10, 3], activation='ReLU', strength_l1=1e-5, strength_l2=3e-4, dropout_ratios=[0.2, 0.0], classification=True) self.model = NeuralNetwork(net_params)
def test_pca(): logger.info("Testeando PCA...") data = load_iris() # Testeo de funcionalidades features = map(lambda lp: lp.features, data) pca = PCA(features, threshold_k=0.99) assert pca.k == 2 transformed = pca.transform(k=3, standarize=True, whitening=True) assert len(transformed[0]) == 3 # Testeo de soporte para DataSets local_data = LocalLabeledDataSet(data) pca_loc = PCA(local_data.features) distributed_data = DistributedLabeledDataSet(data) pca_dist = PCA(distributed_data.features.collect()) assert np.array_equiv( pca_loc.transform(k=3, data=local_data).features, pca_dist.transform(k=3, data=distributed_data).features.collect())
def test_stdscaler(): logger.info("Testeando StandardScaler...") data = load_iris() # Testeo de funcionalidades features = map(lambda lp: lp.features, data) stdsc = StandardScaler(mean=True, std=True) stdsc.fit(features) transformed = stdsc.transform(features) assert np.isclose(np.mean(transformed), 0, rtol=1e-8) # Testeo de soporte para DataSets local_data = LocalLabeledDataSet(data) stdsc.fit(local_data.features) local_transformed = stdsc.transform(local_data) distributed_data = DistributedLabeledDataSet(data) stdsc.fit(distributed_data) distrib_transformed = stdsc.transform(distributed_data) assert np.allclose(local_transformed.features, distrib_transformed.features.collect())
from learninspy.core.optimization import OptimizerParameters from learninspy.core.stops import criterion from learninspy.utils.data import LocalLabeledDataSet, load_ccpp from learninspy.utils.evaluation import RegressionMetrics from learninspy.utils.plots import plot_fitting from learninspy.utils.fileio import get_logger import os logger = get_logger(name='learninspy-demo_ccpp') # -- 1.a) Carga de datos logger.info("Cargando datos de Combined Cycle Power Plant ...") dataset = load_ccpp() dataset = LocalLabeledDataSet(dataset) rows, cols = dataset.shape logger.info("Dimension de datos: %i x %i", rows, cols) train, valid, test = dataset.split_data([0.5, 0.3, 0.2]) # Particiono en conjuntos # -- 1.b) Normalización """ std = StandardScaler() std.fit(train) train = std.transform(train) valid = std.transform(valid) test = std.transform(test) """ # -- 2) Selección de parámetros