def _incremental_validation(dataset, settings_fn, val_fold, **params): """ Trains a model incrementally so a new layers is appended at the end of the network only if it decreases the generalization error and returns the stats for the best setting found """ dataset_location = get_data_location(dataset, folded=True) n_folds = settings_fn(dataset_location).get_fold_num() folds_set = range(n_folds) folder = tempfile.mkdtemp() if 'tune_folder' not in params \ else os.path.join(params.get('tune_folder'), str(_get_millis_time())) prev_err, prev_folder = float('inf'), None epochs, best = [], None for layer in range(1, params.get('max_layers') + 1): logger.debug('[%d] Starting layerwise incremental training' % layer) current_folder = os.path.join(folder, 'layer_%d' % layer) model = DeepNetworkValidation(settings_fn, dataset_location, folder=current_folder) fitted = model.fit(train_folds=[x for x in folds_set if x != val_fold], val_folds=[val_fold], num_layers=layer, train_only=layer, restore_folder=prev_folder, restore_layers=[x for x in range(1, layer)], layerwise=False, **params) logger.debug('[{}] Training got: {}'.format(layer, fitted)) if prev_err > fitted['val_error']: # Update previous fit prev_err = fitted['val_error'] prev_folder = current_folder # Update best model info best = fitted best.update({'num_layers': layer}) epochs.append(best['epoch']) logger.debug('[%d] Training improved. Going for next layer...' % layer) else: # Layer did not improve, let's keep layer - 1 layers logger.debug('[%d] Training stagnated. Stopping...' % layer) break del best['epoch'] best.update({'train_epochs': epochs}) if params.get('tune_folder', None) is None: shutil.rmtree(folder) return best
def _incremental_training(dataset, settings_fn, train_folder, num_layers, train_epochs, **params): dataset_location = get_data_location(dataset, folded=True) prev_folder = None for layer in range(1, num_layers + 1): epochs_layer = train_epochs[layer - 1] # Store in subfolders all trainings except from last one if layer == num_layers: current_folder = train_folder else: current_folder = os.path.join(train_folder, 'layer_%d' % layer) logger.info('[%d] Training up to epoch %d in folder %s' % (layer, epochs_layer, current_folder)) model = DeepNetworkTraining(settings_fn=settings_fn, data_location=dataset_location, folder=current_folder) training_stats = model.fit(num_layers=layer, train_only=layer, max_epochs=epochs_layer, switch_epochs=None, restore_folder=prev_folder, restore_layers=[x for x in range(1, layer)], **params) prev_folder = current_folder return training_stats
def _cross_validate(dataset, settings_fn, **params): """ Returns the average metric over the folds for the given execution setting """ dataset_location = get_data_location(dataset, folded=True) n_folds = settings_fn(dataset_location).get_fold_num() folds_set = range(n_folds) results = [] logger.debug('Starting evaluation on {} ...'.format(params)) for val_fold in folds_set: best = _incremental_validation(dataset, settings_fn, val_fold, **params) results.append(best) avg_results = _average_results(results) logger.info('Finished cross validaton on: {} \n'.format(params)) logger.info('Results: {} \n'.format(avg_results)) return { 'loss': avg_results['val_error'], 'averaged': avg_results, 'parameters': params, 'all': results, 'status': STATUS_OK }
def fine_tune_training(dataset, settings_fn, run_folder, fine_tune, num_layers, **params): model = DeepNetworkTraining(settings_fn=settings_fn, data_location=get_data_location(dataset, folded=True), folder=run_folder) last_epoch = params['train_epochs'][-1] if isinstance(fine_tune, FineTuningType.ExtraLayerwise): logger.info('Layerwise fine-tuning: training %d' % fine_tune.epochs_per_layer + ' epochs per layer using %s policy' % fine_tune.policy) switches = [ last_epoch + fine_tune.epochs_per_layer * i for i in range(1, num_layers) ] return model.fit(num_layers=num_layers, max_epochs=last_epoch + fine_tune.epochs_per_layer * num_layers, switch_epochs=switches, switch_policy=fine_tune.policy, restore_folder=run_folder, restore_layers=[x for x in range(1, num_layers + 1)], **params) elif isinstance(fine_tune, FineTuningType.ExtraEpoch): logger.info('Traditional fine-tuning: training %d' % fine_tune.epochs + ' extra epochs for the whole network') return model.fit(num_layers=num_layers, max_epochs=last_epoch + fine_tune.epochs, restore_folder=run_folder, restore_layers=[x for x in range(1, num_layers + 1)], **params) else: raise ValueError('Unknown refining type {}'.format(fine_tune))
def _simple_evaluate(dataset, settings_fn, **params): """ Returns the metrics for a single early stopping run """ data_location = get_data_location(dataset, folded=True) n_folds = settings_fn(data_location).get_fold_num() validation_fold = np.random.randint(n_folds) best = _incremental_validation(dataset, settings_fn, validation_fold, **params) logger.info('Finished evaluation on {}'.format(params)) logger.info('Obtained results {}'.format(best)) return { 'loss': best['val_error'], 'averaged': best, 'parameters': params, 'status': STATUS_OK }
'network_fn': cnn_kernel_example_layout_fn, } settings = datasets.FashionMnistSettings dataset = datasets.Datasets.FASHION_MNIST if mode == 0: logger.info('Running training ...') if os.path.isdir(folder): shutil.rmtree(folder) m = DeepNetworkTraining(folder=folder, settings_fn=settings, data_location=get_data_location(dataset, folded=True)) m.fit(switch_epochs=[20, 40, 60], **params) elif mode == 1: logger.info('Running training with early stop on validation ...') if os.path.isdir(folder): shutil.rmtree(folder) m = DeepNetworkValidation(folder=folder, settings_fn=settings, data_location=get_data_location(dataset, folded=True)) m.fit(train_folds=range(9), val_folds=[9], layerwise=True, **params)
from protodata.data_ops import TrainMode, DataMode from widedeep.model.model_base import LinearModel, MLP from widedeep.model.joint_model import JointRegressor from widedeep.ops.losses import Optimizers, MeanSquared import widedeep.utils as ut import widedeep.ops.metrics as me import tensorflow as tf logger = ut.get_logger('data') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string("data_location", get_data_location(Datasets.DIABETES), "Where data is stored") flags.DEFINE_integer("batch_size", 32, "Batch size to use.") flags.DEFINE_string("network", ut.NetworkModels.MLP, "Network to use for MLP, if used") flags.DEFINE_integer("summaries", 50, "Steps between summaries.") flags.DEFINE_integer("checkpoints", 100, "Steps between model checkpoints.") flags.DEFINE_integer("steps", 200, "Steps to train.") flags.DEFINE_float("gpu_frac", 0.70, "Percentage of GPU memory to use.")
from protodata.datasets import Datasets from protodata.data_ops import TrainMode, DataMode from protodata.utils import get_data_location import tensorflow as tf logger = utils.get_logger('data') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string( "data_location", get_data_location(Datasets.DIABETES), "Where data is stored" ) flags.DEFINE_integer( "batch_size", 64, "Batch size to use." ) flags.DEFINE_integer( "validate_steps", 100, "Batch size to use for validation" )
from protodata.image_ops import DataSpec import tensorflow as tf import scipy import logging logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) dataset = datasets.Datasets.CIFAR10 settings_fn = datasets.Cifar10Settings folded = True tf.app.flags.DEFINE_string('data_location', get_data_location(dataset, folded=folded), 'Path where to build dataset') tf.app.flags.DEFINE_string('batch_size', 5, 'Number of instances per batch') tf.app.flags.DEFINE_integer( 'memory_factor', 1, 'Factor related to the capacity of the queue' + ' (~GB). The higher this amount, the more mixed' + 'the data but the slower the processing time') tf.app.flags.DEFINE_bool('show_image', True, 'Used for datasets which include images') tf.app.flags.DEFINE_integer('reader_threads', 1, 'Number of threads to read the instances.')
import widedeep.ops.metrics as metrics import widedeep.utils as utils from protodata.datasets.airbnb import AirbnbSettings from protodata.datasets import Datasets from protodata.data_ops import TrainMode, DataMode from protodata.utils import get_data_location import tensorflow as tf logger = utils.get_logger('data') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string("data_location", get_data_location(Datasets.AIRBNB_PRICE), "Where data is stored") flags.DEFINE_integer("batch_size", 64, "Batch size to use.") flags.DEFINE_integer("validate_steps", 100, "Batch size to use for validation") flags.DEFINE_integer("validate_interval", 3000, "Batch size to use for validation") flags.DEFINE_integer("summaries", 100, "Steps between summaries") flags.DEFINE_integer("checkpoints", 5000000, "Steps between model checkpoints") flags.DEFINE_integer("steps", 200000, "Steps to train")
from protodata.quantize import Quantize from widedeep.model.model_base import LinearModel, MLP from widedeep.model.joint_model import JointClassifier from widedeep.ops.losses import Optimizers, CrossEntropy import widedeep.utils as utils import widedeep.ops.metrics as metrics import tensorflow as tf logger = utils.get_logger('data') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string("data_location", get_data_location(Datasets.BOSTON), "Where data is stored") flags.DEFINE_integer("batch_size", 32, "Batch size to use.") flags.DEFINE_string("network", utils.NetworkModels.MLP, "Network to use for MLP, if used") flags.DEFINE_integer("summaries", 50, "Steps between summaries.") flags.DEFINE_integer("checkpoints", 100, "Steps between model checkpoints.") flags.DEFINE_integer("steps", 5000, "Steps to train.") flags.DEFINE_float("gpu_frac", 0.70, "Percentage of GPU memory to use.")
from protodata.serialization_ops import DataSerializer from protodata.datasets import AirbnbSerialize, Datasets from protodata.utils import get_tmp_data_location, get_data_location import tensorflow as tf import os # Note AIRBNB.AVAILABLE can be also generated in the notebooks DATASET = Datasets.AIRBNB_PRICE # Data paths tf.app.flags.DEFINE_string('raw_data_location', get_tmp_data_location(DATASET), 'Where raw data is located') tf.app.flags.DEFINE_string('data_location', get_data_location(DATASET), 'Where to store data') # Data parameters tf.app.flags.DEFINE_integer( 'nq', None, 'Number of quantiles for numeric normalization.' 'Set to None to use zscores') tf.app.flags.DEFINE_integer('subset', 3000, 'Number of instances to use. Set to None for all') tf.app.flags.DEFINE_float('train_ratio', 0.80, 'Ratio of training instances') tf.app.flags.DEFINE_float('val_ratio', 0.10, 'Ratio of validation instances') # Serialization parameters tf.app.flags.DEFINE_integer('train_shards', 64,
from protodata.datasets import Datasets from protodata.data_ops import TrainMode, DataMode from protodata.utils import get_data_location from protodata.image_ops import get_image_specs import tensorflow as tf logger = utils.get_logger('data') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string( "data_location", get_data_location(Datasets.AIRBNB_PRICE), "Where data is stored" ) flags.DEFINE_integer( "batch_size", 64, "Batch size to use." ) flags.DEFINE_integer( "validate_steps", 100, "Batch size to use for validation" )
from widedeep.ops.metrics import Accuracy, AccuracyRandom from protodata.image_ops import get_image_specs from protodata.data_ops import TrainMode, DataMode from protodata.datasets import Datasets from protodata.datasets.mnist import MnistSettings from protodata.utils import get_data_location import tensorflow as tf logger = ut.get_logger('data') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string("data_location", get_data_location(Datasets.MNIST), "Where data is stored") flags.DEFINE_integer("batch_size", 16, "Batch size to use.") flags.DEFINE_integer("validate_steps", 32, "Batch size to use for validation") flags.DEFINE_integer("validate_interval", 500, "Batch size to use for validation") flags.DEFINE_string("network", ut.NetworkModels.MNIST, "Network to use") flags.DEFINE_integer("summaries", 50, "Steps between summaries.") flags.DEFINE_integer("checkpoints", 500, "Steps between model checkpoints.")
def _run_setting(dataset, settings_fn, best_params, folder=None, n_runs=10, test_batch_size=1, fine_tune=None): """ Fits a model with the training set and evaluates it on the test for a given number of times. Then returns the summarized metrics on the test set. """ if folder is None: out_folder = tempfile.mkdtemp() else: out_folder = folder total_stats = [] for i in range(n_runs): # Train model for current simulation run_folder = os.path.join(out_folder, str(_get_millis_time())) logger.info('Running training [{}] in {}'.format(i, run_folder)) before = time.time() _, fit_loss, fit_error, fit_l2 = _incremental_training( dataset, settings_fn, run_folder, **best_params) if fine_tune is not None: _, fit_loss, fit_error, fit_l2 = fine_tune_training( dataset, settings_fn, run_folder, fine_tune, **best_params) diff = time.time() - before logger.info('Running prediction [%s] from on %s' % (i, run_folder)) run_stats = { 'train_loss': fit_loss, 'train_error': fit_error, 'train_l2': fit_l2, 'time(s)': diff } # Evaluate test for current simulation model = DeepNetworkTraining(folder=run_folder, settings_fn=settings_fn, data_location=get_data_location( dataset, folded=True)) test_params = best_params.copy() del test_params['batch_size'] test_stats = model.predict(batch_size=test_batch_size, **test_params) run_stats.update(test_stats) logger.info('Training [{}] got results {}'.format(i, run_stats)) total_stats.append(run_stats) if folder is None: shutil.rmtree(out_folder) return total_stats