def test_non_ints(self): with six.assertRaisesRegex( self, TypeError, "min_value must be an int: -inf"): hp.IntInterval(float("-inf"), 0) with six.assertRaisesRegex( self, TypeError, "max_value must be an int: 'eleven'"): hp.IntInterval(7, "eleven")
def evaluate(self): if self._test: if self._test_labels is not None: predictions = self._pred_model.predict(self._test_ds, steps=self._test_steps) test_accuracy = np.mean(predictions == self._test_labels) self._record_scalars(epoch_end_test_accuracy=test_accuracy) if self._old_test_labels is not None: nmi = normalized_mutual_info_score(self._test_labels, self._old_test_labels, average_method="arithmetic") self._record_scalars(test_nmi=nmi) if self._downstream_labels is not None: # choose the hyperparameters to record if not hasattr(self, "_hparams_config"): from tensorboard.plugins.hparams import api as hp hparams = { hp.HParam("pca_dim", hp.IntInterval(0, 1000000)):self.config["pca_dim"], hp.HParam("k", hp.IntInterval(1, 1000000)):self.config["k"], hp.HParam("mult", hp.IntInterval(1, 1000000)):self.config["mult"], hp.HParam("sobel", hp.Discrete([True, False])):self.input_config["sobel"] } for e, d in enumerate(self.config["dense"]): hparams[hp.HParam("dense_%s"%e, hp.IntInterval(1, 1000000))] = d else: hparams=None self._linear_classification_test(hparams)
def define_hparams(): global HP_conv_count, HP_first_filter, HP_kernel_size global HP_strides, HP_maxpool, HP_learning_rate, HP_optimizer global HP_augment, HP_adversarial, HP_adv_step_size HP_conv_count = hp.HParam('conv_count', hp.IntInterval(3, 4)) HP_first_filter = hp.HParam('first_filter', hp.Discrete([16, 32])) # 64, 256, 512, 1024: OOM HP_kernel_size = hp.HParam('kernel_size', hp.Discrete([2, 3, 5])) # HP_kernel_size = hp.HParam('kernel_size', hp.Discrete([5])) HP_strides = hp.HParam('strides', hp.Discrete([1, 2, 3])) # HP_strides = hp.HParam('strides', hp.Discrete([1])) HP_maxpool = hp.HParam('maxpool', hp.Discrete([True, False])) HP_learning_rate = hp.HParam('learning_rate', hp.Discrete([-3])) # HP_augment = hp.HParam('augment', hp.Discrete([True, False])) HP_adversarial = hp.HParam('adversarial', hp.Discrete([False])) HP_adv_step_size = hp.HParam('adv_step_size', hp.RealInterval(1e-2, 2e-1)) HP_optimizer = hp.HParam('optimizer', hp.Discrete(['adam'])) global HPARAMS_LIST HPARAMS_LIST = [ HP_conv_count, HP_first_filter, HP_kernel_size, HP_strides, HP_maxpool, HP_learning_rate, HP_optimizer, HP_adversarial, HP_adv_step_size ] if all_var_dict['gan_ds_list'] != []: global HP_generative_ratio, HP_dcgan_ratio HP_generative_ratio = hp.HParam('generative_ratio', hp.IntInterval(0, 5)) HP_dcgan_ratio = hp.HParam('dcgan_ratio', hp.RealInterval(0., 1.)) HPARAMS_LIST.extend([HP_generative_ratio, HP_dcgan_ratio])
def evaluate(self): if self._test: # compute average cosine similarity of target and online networks self._record_scalars( online_target_cosine_similarity=self._compare_model_weights()) # proj = self._models["online"].predict(self._test_ds) acs = _average_cosine_sim(proj) self._record_scalars(test_proj_avg_cosine_sim=acs) """ for x,y in self._test_ds: loss, sim = self._test_loss(x,y) test_loss += loss.numpy() self._record_scalars(test_loss=test_loss) # I'm commenting out this tensorboard image- takes up a lot of # space but doesn't seem to add much #self._record_images(scalar_products=tf.expand_dims(tf.expand_dims(sim,-1), 0)) """ # if the user passed out-of-sample data to test- compute # alignment and uniformity measures alignment, uniformity = _compute_alignment_and_uniformity( self._test_ds, self._models["online"]) self._record_scalars(alignment=alignment, uniformity=uniformity, metric=True) if self._downstream_labels is not None: # choose the hyperparameters to record if not hasattr(self, "_hparams_config"): from tensorboard.plugins.hparams import api as hp hparams = { hp.HParam("tau", hp.RealInterval(0., 1.)): self.config["tau"], hp.HParam("num_hidden", hp.IntInterval(1, 1000000)): self.config["num_hidden"], hp.HParam("output_dim", hp.IntInterval(1, 1000000)): self.config["output_dim"], hp.HParam("lr", hp.RealInterval(0., 10000.)): self.config["lr"], hp.HParam("lr_decay", hp.RealInterval(0., 10000.)): self.config["lr_decay"], hp.HParam("decay_type", hp.Discrete(["cosine", "exponential"])): self.config["decay_type"], hp.HParam("weight_decay", hp.RealInterval(0., 10000.)): self.config["weight_decay"] } for k in self.augment_config: if isinstance(self.augment_config[k], float): hparams[hp.HParam(k, hp.RealInterval( 0., 10000.))] = self.augment_config[k] else: hparams = None self._linear_classification_test(hparams)
def _create_hparams_config(searchspace): hparams = [] for key, val in searchspace.names().items(): if val == "DOUBLE": hparams.append( hp.HParam( key, hp.RealInterval(float(searchspace.get(key)[0]), float(searchspace.get(key)[1])), )) elif val == "INTEGER": hparams.append( hp.HParam( key, hp.IntInterval( searchspace.get(key)[0], searchspace.get(key)[1]), )) elif val == "DISCRETE": hparams.append(hp.HParam(key, hp.Discrete(searchspace.get(key)))) elif val == "CATEGORICAL": hparams.append(hp.HParam(key, hp.Discrete(searchspace.get(key)))) return hparams
def define_hparams(): global HP_model_body_from, HP_learning_rate , HP_adversarial global HP_optimizer, HP_adv_step_size HP_model_body_from = hp.HParam('model_body_from', hp.Discrete([ # 'MobileNetV2', # 'InceptionV3', # 'ResNet50V2', # 'ResNet50', 'DenseNet121', # 'MobileNet', # 'InceptionResNetV2', # 'VGG16', # 'Xception', ])) # HP_learning_rate = hp.HParam('learning_rate', hp.IntInterval(-5, -3)) HP_learning_rate = hp.HParam('learning_rate', hp.Discrete([-4, -3])) HP_adversarial = hp.HParam('adversarial', hp.Discrete([True])) HP_adv_step_size = hp.HParam('adv_step_size', hp.RealInterval(0.18, 0.19)) HP_optimizer = hp.HParam('optimizer', hp.Discrete([ 'adam', # 'adagrad', # 'Adadelta', # 'SGD', ])) global HPARAMS_LIST HPARAMS_LIST = [ HP_model_body_from, HP_optimizer, HP_learning_rate, HP_adversarial, HP_adv_step_size] if all_var_dict['gan_ds_list'] != []: global HP_generative_ratio, HP_dcgan_ratio HP_generative_ratio = hp.HParam('generative_ratio', hp.IntInterval(0, 5)) HP_dcgan_ratio = hp.HParam('dcgan_ratio', hp.RealInterval(0., 1.)) HPARAMS_LIST.extend([HP_generative_ratio, HP_dcgan_ratio])
def get_tensorboard_hparams(rotate_scheme=('random', 'pca-xy'), maybe_reflect_x=(False, True), jitter=True, scale=True, rigid_transform=True, perlin=True): from tensorboard.plugins.hparams import api as hp hparams = {} if isinstance(rotate_scheme, (list, tuple)): hparams['augment_cloud.rotate_scheme'] = hp.HParam( 'rotate_scheme', hp.Discrete(rotate_scheme)) if isinstance(maybe_reflect_x, (list, tuple)): hparams['train/augment_cloud.maybe_reflect_x'] = hp.HParam( 'maybe_reflect_x', hp.Discrete(maybe_reflect_x)) if jitter: hparams['train/augment_cloud.jitter_stddev'] = hp.HParam( 'jitter_stddev', hp.RealInterval(1e-5, 1e-1)) if scale: hparams['train/augment_cloud.scale_stddev'] = hp.HParam( 'scale', hp.RealInterval(1e-5, 2e-1)) if rigid_transform: hparams['train/augment_cloud.rigid_transform_stddev'] = hp.HParam( 'rigid_transform', hp.RealInterval(1e-5, 1e-1)) if perlin: hparams['train/augment_cloud.perlin_grid_shape'] = hp.HParam( 'perlin_grid_shape', hp.IntInterval(2, 5)) hparams['train/augment_cloud.perlin_stddev'] = hp.HParam( 'perlin_stddev', hp.RealInterval(1e-3, 0.5)) return hparams
def __init__(self, logdir: str, hparams: Dict[str, Union[Tuple[float, float], List]], metrics: Dict[str, str]): self._hparams = [] for name, param in hparams.items(): if isinstance(param, Tuple): min, max = param if isinstance(min, float): self._hparams.append( hp.HParam( name, hp.RealInterval(min_value=min, max_value=max))) elif isinstance(min, int): self._hparams.append( hp.HParam(name, hp.IntInterval(min_value=min, max_value=max))) elif isinstance(param, List): self._hparams.append(hp.HParam(name, hp.Discrete(param))) self._metrics = metrics self._writer = tf.summary.create_file_writer(logdir=logdir) with self._writer.as_default(): hp.hparams_config( hparams=self._hparams, metrics=[ hp.Metric(name, display_name=display) for name, display in metrics.items() ], )
def evaluate(self): if self._downstream_labels is not None: # choose the hyperparameters to record if not hasattr(self, "_hparams_config"): from tensorboard.plugins.hparams import api as hp hparams = { hp.HParam("temperature", hp.RealInterval(0., 10000.)): self.config["temperature"], hp.HParam("num_hidden", hp.IntInterval(1, 1000000)): self.config["num_hidden"], hp.HParam("output_dim", hp.IntInterval(1, 1000000)): self.config["output_dim"] } else: hparams = None self._linear_classification_test(hparams)
def evaluate(self): b = tf.expand_dims(tf.expand_dims(self._buffer,0),-1) self._record_images(buffer=b) if self._downstream_labels is not None: # choose the hyperparameters to record if not hasattr(self, "_hparams_config"): from tensorboard.plugins.hparams import api as hp hparams = { hp.HParam("tau", hp.RealInterval(0., 10000.)):self.config["tau"], hp.HParam("alpha", hp.RealInterval(0., 1.)):self.config["alpha"], hp.HParam("batches_in_buffer", hp.IntInterval(1, 1000000)):self.config["batches_in_buffer"], hp.HParam("output_dim", hp.IntInterval(1, 1000000)):self.config["output_dim"], hp.HParam("sobel", hp.Discrete([True, False])):self.input_config["sobel"] } else: hparams=None self._linear_classification_test(hparams)
def __init__(self, num_session_groups=10, HP_CONV_LAYERS=hp.HParam("conv_layers", hp.IntInterval(1, 3)), HP_CONV_KERNEL_SIZE=hp.HParam("conv_kernel_size", hp.Discrete([3, 5])), HP_DENSE_LAYERS=hp.HParam("dense_layers", hp.IntInterval(1, 3)), HP_DROPOUT=hp.HParam("dropout", hp.RealInterval(0.1, 0.4)), HP_OPTIMIZER=hp.HParam("optimizer", hp.Discrete(["adam", "adagrad"]))): self.HP_CONV_LAYERS = HP_CONV_LAYERS self.HP_CONV_KERNEL_SIZE = HP_CONV_KERNEL_SIZE self.HP_DENSE_LAYERS = HP_DENSE_LAYERS self.HP_DROPOUT = HP_DROPOUT self.HP_OPTIMIZER = HP_OPTIMIZER self.num_session_groups = num_session_groups self.HPARAMS = [ HP_CONV_LAYERS, HP_CONV_KERNEL_SIZE, HP_DENSE_LAYERS, HP_DROPOUT, HP_OPTIMIZER, ] self.METRICS = [ hp.Metric( "epoch_accuracy", group="train", display_name="accuracy (train)", ), hp.Metric( "epoch_loss", group="train", display_name="loss (train)", ), hp.Metric( "epoch_accuracy", group="validation", display_name="accuracy (val.)", ), hp.Metric( "epoch_loss", group="validation", display_name="loss (val.)", ) ]
def log_hyperparameters(): """ Blueprint for hyperparameter and metric logging in tensorboard during hyperparameter tuning Returns: logparams (list): List containing the hyperparameters to log in tensorboard. metrics (list): List containing the metrics to log in tensorboard. """ logparams = [ hp.HParam( "latent_dim", hp.Discrete([2, 4, 6, 8, 12, 16]), display_name="latent_dim", description="encoding size dimensionality", ), hp.HParam( "n_components", hp.IntInterval(min_value=1, max_value=25), display_name="n_components", description="latent component number", ), hp.HParam( "gram_weight", hp.RealInterval(min_value=0.0, max_value=1.0), display_name="gram_weight", description="weight of the gram loss", ), ] metrics = [ hp.Metric( "val_number_of_populated_clusters", display_name="number of populated clusters", ), hp.Metric( "val_reconstruction_loss", display_name="reconstruction loss", ), hp.Metric( "val_gram_loss", display_name="gram loss", ), hp.Metric( "val_vq_loss", display_name="vq loss", ), hp.Metric( "val_total_loss", display_name="total loss", ), ] return logparams, metrics
def convert_hyperparams_to_hparams( hyperparams: hp_module.HyperParameters ) -> Dict[hparams_api.HParam, Any]: """Converts KerasTuner HyperParameters to TensorBoard HParams. Args: hyperparams: A KerasTuner HyperParameters instance Returns: A dict that maps TensorBoard HParams to current values. """ hparams = {} for hp in hyperparams.space: hparams_value = {} try: hparams_value = hyperparams.get(hp.name) except ValueError: continue hparams_domain = {} if isinstance(hp, hp_module.Choice): hparams_domain = hparams_api.Discrete(hp.values) elif isinstance(hp, hp_module.Int): if hp.step is None or hp.step == 1: hparams_domain = hparams_api.IntInterval( hp.min_value, hp.max_value) else: # Note: `hp.max_value` is inclusive, unlike the end index # of Python `range()`, which is exclusive values = list(range(hp.min_value, hp.max_value + 1, hp.step)) hparams_domain = hparams_api.Discrete(values) elif isinstance(hp, hp_module.Float): if hp.step is None: hparams_domain = hparams_api.RealInterval( hp.min_value, hp.max_value) else: # Note: `hp.max_value` is inclusive, which is also # the default for Numpy's linspace num_samples = int((hp.max_value - hp.min_value) / hp.step) end_value = hp.min_value + (num_samples * hp.step) values = np.linspace(hp.min_value, end_value, num_samples + 1).tolist() hparams_domain = hparams_api.Discrete(values) elif isinstance(hp, hp_module.Boolean): hparams_domain = hparams_api.Discrete([True, False]) elif isinstance(hp, hp_module.Fixed): hparams_domain = hparams_api.Discrete([hp.value]) else: raise ValueError( "`HyperParameter` type not recognized: {}".format(hp)) hparams_key = hparams_api.HParam(hp.name, hparams_domain) hparams[hparams_key] = hparams_value return hparams
def evaluate(self): if self._test: # compute features- in memory for now f_x = [] f_y = [] for x, y in self._test_ds: f_x.append(self.flatten(self.fcn(x))) f_y.append(self.flatten(self.fcn(y))) f_x = tf.concat(f_x, 0) f_y = tf.concat(f_y, 0) # compute P for each head for e, h in enumerate(self.heads): P = compute_p(f_x, f_y, h) self._record_images(**{"P_head_%s"%e:P}) for e, h in enumerate(self.heads_oc): P = compute_p(f_x, f_y, h) self._record_images(**{"P_oc_head_%s"%e:P}) if self._downstream_labels is not None: # choose the hyperparameters to record if not hasattr(self, "_hparams_config"): from tensorboard.plugins.hparams import api as hp hparams = { hp.HParam("k", hp.IntInterval(0, 1000000), description="output dimension for clustering head"):self.config["k"], hp.HParam("h", hp.IntInterval(0, 1000000), description="number of clustering sub-heads"):self.config["h"], hp.HParam("k_oc", hp.IntInterval(0, 1000000), description="output dimension for overclustering head"):self.config["k_oc"], hp.HParam("entropy_weight", hp.RealInterval(0., 1000000.), description="additional weight factor for entropy in loss function"):self.config["entropy_weight"], hp.HParam("r", hp.IntInterval(0, 1000000), description="number of times to repeat each image sequentially in the data pipeline"):self.config["r"], hp.HParam("sobel", hp.Discrete([True, False]), description="whether Sobel filtering was applied to inputs"):self.input_config["sobel"] } else: hparams=None self._linear_classification_test(hparams)
def _initialize_model(self, writer): HP_DENSE_NEURONS = hp.HParam("dense_neurons", hp.IntInterval(4, 16)) self.hparams = { "optimizer": "adam", HP_DENSE_NEURONS: 8, } self.model = tf.keras.models.Sequential([ tf.keras.layers.Dense(self.hparams[HP_DENSE_NEURONS], input_shape=(1,)), tf.keras.layers.Dense(1, activation="sigmoid"), ]) self.model.compile(loss="mse", optimizer=self.hparams["optimizer"]) self.callback = hp.KerasCallback(writer, self.hparams, group_name="psl27")
def kt_to_hparam(hp: kt.HyperParameter) -> hp_lib.HParam: if isinstance(hp, kt.engine.hyperparameters.Float): domain = hp_lib.RealInterval(hp.min_value, hp.max_value) elif isinstance(hp, kt.engine.hyperparameters.Int): domain = hp_lib.IntInterval(hp.min_value, hp.max_value) elif isinstance(hp, kt.engine.hyperparameters.Boolean): domain = hp_lib.Discrete([False, True], dtype=bool) elif isinstance(hp, kt.engine.hyperparameters.Fixed): domain = hp_lib.Discrete([hp.value]) elif isinstance(hp, kt.engine.hyperparameters.Choice): domain = hp_lib.Discrete(hp.values) else: raise TypeError(f"Unsupposed hyperparamter type {hp}") return hp_lib.HParam(hp.name, domain)
def __init__(self, *, input_size, n_classes, activations=("relu", ), losses=("categorical_crossentropy", )): self.input_size = input_size self.n_classes = n_classes self._hparam = { "n_layers": hp.HParam("n_layers", domain=hp.IntInterval(1, 100), display_name="Depth"), "layer_size": hp.HParam("layer_size", domain=hp.IntInterval(1, 100), display_name="Width"), "layer_activation": hp.HParam("layer_activation", domain=hp.Discrete(activations), display_name="Activation Fct."), "reg": hp.HParam("reg", domain=hp.RealInterval(0., 1.), display_name="Reg."), "loss_function": hp.HParam("loss_function", domain=hp.Discrete(losses), display_name="Loss Fct.") } self._metrics = { "accuracy": hp.Metric("accuracy", display_name="Accuracy") } self._model = None
def _add_distributions( self, distributions: Dict[str, optuna.distributions.BaseDistribution]) -> None: real_distributions = ( optuna.distributions.UniformDistribution, optuna.distributions.LogUniformDistribution, optuna.distributions.DiscreteUniformDistribution, optuna.distributions.FloatDistribution, ) int_distributions = ( optuna.distributions.IntUniformDistribution, optuna.distributions.IntLogUniformDistribution, optuna.distributions.IntDistribution, ) categorical_distributions = ( optuna.distributions.CategoricalDistribution, ) supported_distributions = (real_distributions + int_distributions + categorical_distributions) for param_name, param_distribution in distributions.items(): if isinstance(param_distribution, real_distributions): self._hp_params[param_name] = hp.HParam( param_name, hp.RealInterval(float(param_distribution.low), float(param_distribution.high)), ) elif isinstance(param_distribution, int_distributions): self._hp_params[param_name] = hp.HParam( param_name, hp.IntInterval(param_distribution.low, param_distribution.high), ) elif isinstance(param_distribution, categorical_distributions): self._hp_params[param_name] = hp.HParam( param_name, hp.Discrete(param_distribution.choices), ) else: distribution_list = [ distribution.__name__ for distribution in supported_distributions ] raise NotImplementedError( "The distribution {} is not implemented. " "The parameter distribution should be one of the {}". format(param_distribution, distribution_list))
def _add_distributions( self, distributions: Dict[str, optuna.distributions.BaseDistribution]) -> None: for param_name, param_distribution in distributions.items(): if isinstance(param_distribution, optuna.distributions.UniformDistribution): self._hp_params[param_name] = hp.HParam( param_name, hp.RealInterval(param_distribution.low, param_distribution.high)) elif isinstance(param_distribution, optuna.distributions.LogUniformDistribution): self._hp_params[param_name] = hp.HParam( param_name, hp.RealInterval(param_distribution.low, param_distribution.high)) elif isinstance(param_distribution, optuna.distributions.DiscreteUniformDistribution): self._hp_params[param_name] = hp.HParam( param_name, hp.Discrete(param_distribution.low, param_distribution.high)) elif isinstance(param_distribution, optuna.distributions.IntUniformDistribution): self._hp_params[param_name] = hp.HParam( param_name, hp.IntInterval(param_distribution.low, param_distribution.high)) elif isinstance(param_distribution, optuna.distributions.CategoricalDistribution): self._hp_params[param_name] = hp.HParam( param_name, hp.Discrete(param_distribution.choices)) else: distribution_list = [ optuna.distributions.UniformDistribution.__name__, optuna.distributions.LogUniformDistribution.__name__, optuna.distributions.DiscreteUniformDistribution.__name__, optuna.distributions.IntUniformDistribution.__name__, optuna.distributions.CategoricalDistribution.__name__, ] raise NotImplementedError( "The distribution {} is not implemented. " "The parameter distribution should be one of the {}". format(param_distribution, distribution_list))
def convert_hyperparams_to_hparams(hyperparams): """Converts KerasTuner HyperParameters to TensorBoard HParams.""" hparams = {} for hp in hyperparams.space: hparams_value = {} try: hparams_value = hyperparams.get(hp.name) except ValueError: continue hparams_domain = {} if isinstance(hp, hp_module.Choice): hparams_domain = hparams_api.Discrete(hp.values) elif isinstance(hp, hp_module.Int): if hp.step is not None and hp.step != 1: # Note: `hp.max_value` is inclusive, unlike the end index # of Python `range()`, which is exclusive values = list(range(hp.min_value, hp.max_value + 1, hp.step)) hparams_domain = hparams_api.Discrete(values) else: hparams_domain = hparams_api.IntInterval( hp.min_value, hp.max_value) elif isinstance(hp, hp_module.Float): if hp.step is not None: # Note: `hp.max_value` is inclusive, unlike the end index # of Numpy's arange(), which is exclusive values = np.arange(hp.min_value, hp.max_value + 1e-7, step=hp.step).tolist() hparams_domain = hparams_api.Discrete(values) else: hparams_domain = hparams_api.RealInterval( hp.min_value, hp.max_value) elif isinstance(hp, hp_module.Boolean): hparams_domain = hparams_api.Discrete([True, False]) elif isinstance(hp, hp_module.Fixed): hparams_domain = hparams_api.Discrete([hp.value]) else: raise ValueError( "`HyperParameter` type not recognized: {}".format(hp)) hparams_key = hparams_api.HParam(hp.name, hparams_domain) hparams[hparams_key] = hparams_value return hparams
def hparam_from_config_space_item(self, c_item): name = c_item['config']['name'] p_type = c_item['class_name'] cfg = c_item['config'] # parm = hp.Discrete() hparam = None if p_type == 'Int': hparam = hp.HParam(name=name, domain=hp.IntInterval(cfg['min_value'], cfg['max_value'])) elif p_type == 'Float': hparam = hp.HParam(name=name, domain=hp.RealInterval(cfg['min_value'], cfg['max_value'])) elif p_type == 'Fixed': hparam = hp.HParam(name=name, domain=hp.Discrete([cfg['value']])) elif p_type == 'Choice': hparam = hp.HParam(name=name, domain=hp.Discrete(cfg['values'])) return hparam
def _configure_hparams(logdir, dicts, metrics=["linear_classification_accuracy", "alignment", "uniformity"]): """ Set up the tensorboard hyperparameter interface :logdir: string; path to log directory :dicts: list of dictionaries containing hyperparameter values :metrics: list of strings; metric names """ metrics = [hp.Metric(m) for m in metrics] params = {} # for each parameter dictionary for d in dicts: # for each parameter: for k in d: # is it a categorical? if k in SPECIAL_HPARAMS: params[hp.HParam(k, hp.Discrete(SPECIAL_HPARAMS[k]))] = d[k] elif isinstance(d[k], bool): params[hp.HParam(k, hp.Discrete([True, False]))] = d[k] elif isinstance(d[k], int): params[hp.HParam(k, hp.IntInterval(1, 1000000))] = d[k] elif isinstance(d[k], float): params[hp.HParam(k, hp.RealInterval(0., 10000000.))] = d[k] # hparams_config = hp.hparams_config( hparams=list(params.keys()), metrics=metrics) # get a name for the run base_dir, run_name = os.path.split(logdir) if len(run_name) == 0: base_dir, run_name = os.path.split(base_dir) # record hyperparamers hp.hparams(params, trial_id=run_name)
def test_backward_endpoints(self): with six.assertRaisesRegex( self, ValueError, "123 > 45"): hp.IntInterval(123, 45)
def random_hparam_search(cfg, data, callbacks, log_dir): ''' Conduct a random hyperparameter search over the ranges given for the hyperparameters in config.yml and log results in TensorBoard. Model is trained x times for y random combinations of hyperparameters. :param cfg: Project config :param data: Dict containing the partitioned datasets :param callbacks: List of callbacks for Keras model (excluding TensorBoard) :param log_dir: Base directory in which to store logs :return: (Last model trained, resultant test set metrics, test data generator) ''' # Define HParam objects for each hyperparameter we wish to tune. hp_ranges = cfg['HP_SEARCH']['RANGES'] HPARAMS = [] HPARAMS.append(hp.HParam('KERNEL_SIZE', hp.Discrete(hp_ranges['KERNEL_SIZE']))) HPARAMS.append(hp.HParam('MAXPOOL_SIZE', hp.Discrete(hp_ranges['MAXPOOL_SIZE']))) HPARAMS.append(hp.HParam('INIT_FILTERS', hp.Discrete(hp_ranges['INIT_FILTERS']))) HPARAMS.append(hp.HParam('FILTER_EXP_BASE', hp.IntInterval(hp_ranges['FILTER_EXP_BASE'][0], hp_ranges['FILTER_EXP_BASE'][1]))) HPARAMS.append(hp.HParam('NODES_DENSE0', hp.Discrete(hp_ranges['NODES_DENSE0']))) HPARAMS.append(hp.HParam('CONV_BLOCKS', hp.IntInterval(hp_ranges['CONV_BLOCKS'][0], hp_ranges['CONV_BLOCKS'][1]))) HPARAMS.append(hp.HParam('DROPOUT', hp.Discrete(hp_ranges['DROPOUT']))) HPARAMS.append(hp.HParam('LR', hp.RealInterval(hp_ranges['LR'][0], hp_ranges['LR'][1]))) HPARAMS.append(hp.HParam('OPTIMIZER', hp.Discrete(hp_ranges['OPTIMIZER']))) HPARAMS.append(hp.HParam('L2_LAMBDA', hp.Discrete(hp_ranges['L2_LAMBDA']))) HPARAMS.append(hp.HParam('BATCH_SIZE', hp.Discrete(hp_ranges['BATCH_SIZE']))) HPARAMS.append(hp.HParam('IMB_STRATEGY', hp.Discrete(hp_ranges['IMB_STRATEGY']))) # Define test set metrics that we wish to log to TensorBoard for each training run HP_METRICS = [hp.Metric(metric, display_name='Test ' + metric) for metric in cfg['HP_SEARCH']['METRICS']] # Configure TensorBoard to log the results with tf.summary.create_file_writer(log_dir).as_default(): hp.hparams_config(hparams=HPARAMS, metrics=HP_METRICS) # Complete a number of training runs at different hparam values and log the results. repeats_per_combo = cfg['HP_SEARCH']['REPEATS'] # Number of times to train the model per combination of hparams num_combos = cfg['HP_SEARCH']['COMBINATIONS'] # Number of random combinations of hparams to attempt num_sessions = num_combos * repeats_per_combo # Total number of runs in this experiment model_type = 'DCNN_BINARY' if cfg['TRAIN']['CLASS_MODE'] == 'binary' else 'DCNN_MULTICLASS' trial_id = 0 for group_idx in range(num_combos): rand = random.Random() HPARAMS = {h: h.domain.sample_uniform(rand) for h in HPARAMS} hparams = {h.name: HPARAMS[h] for h in HPARAMS} # To pass to model definition for repeat_idx in range(repeats_per_combo): trial_id += 1 print("Running training session %d/%d" % (trial_id, num_sessions)) print("Hparam values: ", {h.name: HPARAMS[h] for h in HPARAMS}) trial_logdir = os.path.join(log_dir, str(trial_id)) # Need specific logdir for each trial callbacks_hp = callbacks + [TensorBoard(log_dir=trial_logdir, profile_batch=0, write_graph=False)] # Set values of hyperparameters for this run in config file. for h in hparams: if h in ['LR', 'L2_LAMBDA']: val = 10 ** hparams[h] # These hyperparameters are sampled on the log scale. else: val = hparams[h] cfg['NN'][model_type][h] = val # Set some hyperparameters that are not specified in model definition. cfg['TRAIN']['BATCH_SIZE'] = hparams['BATCH_SIZE'] cfg['TRAIN']['IMB_STRATEGY'] = hparams['IMB_STRATEGY'] # Run a training session and log the performance metrics on the test set to HParams dashboard in TensorBoard with tf.summary.create_file_writer(trial_logdir).as_default(): hp.hparams(HPARAMS, trial_id=str(trial_id)) model, test_metrics, test_generator = train_model(cfg, data, callbacks_hp, verbose=0) for metric in HP_METRICS: if metric._tag in test_metrics: tf.summary.scalar(metric._tag, test_metrics[metric._tag], step=1) # Log test metric return
import tensorflow as tf from typing import Dict, Any from tensorboard.plugins.hparams import api as hp HP_EPOCHS = hp.HParam('epochs', hp.IntInterval(1, 50)) HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([64, 128, 256, 512, 1024, 2048])) HPARAMS = [HP_EPOCHS, HP_BATCH_SIZE] METRICS = [ hp.Metric('epoch_loss', group='train', display_name='epoch_loss (train)'), hp.Metric('epoch_loss', group='validation', display_name='epoch_loss (validation)'), hp.Metric('epoch_auc', group='train', display_name='epoch_auc (train)'), hp.Metric('epoch_auc', group='validation', display_name='epoch_auc (validation)'), ] def hparams_init( epochs: int, batch_size: int, log_dir: str, ) -> Dict[hp.HParam, Any]: # log directories with tf.summary.create_file_writer(log_dir).as_default(): hp.hparams_config(hparams=HPARAMS, metrics=METRICS) # hyperparameter setting
600, "Summaries will be written every n steps, where n is the value of " "this flag.", ) flags.DEFINE_integer( "num_epochs", 5, "Number of epochs per trial.", ) # We'll use MNIST for this example. DATASET = tf.keras.datasets.mnist INPUT_SHAPE = (28, 28) OUTPUT_CLASSES = 10 HP_CONV_LAYERS = hp.HParam("conv_layers", hp.IntInterval(1, 3)) HP_CONV_KERNEL_SIZE = hp.HParam("conv_kernel_size", hp.Discrete([3, 5])) HP_DENSE_LAYERS = hp.HParam("dense_layers", hp.IntInterval(1, 3)) HP_DROPOUT = hp.HParam("dropout", hp.RealInterval(0.1, 0.4)) HP_OPTIMIZER = hp.HParam("optimizer", hp.Discrete(["adam", "adagrad"])) HPARAMS = [ HP_CONV_LAYERS, HP_CONV_KERNEL_SIZE, HP_DENSE_LAYERS, HP_DROPOUT, HP_OPTIMIZER, ] METRICS = [ hp.Metric(
def create_callbacks(model, training_model, prediction_model, validation_generator, args): """ Creates the callbacks to use during training. Args model: The base model. training_model: The model that is used for training. prediction_model: The model that should be used for validation. validation_generator: The generator for creating validation data. args: parseargs args object. Returns: A list of callbacks used for training. """ callbacks = [] tensorboard_callback = None if args.tensorboard_dir: makedirs(args.tensorboard_dir) tensorboard_callback = keras.callbacks.TensorBoard( log_dir=args.tensorboard_dir, update_freq='epoch', histogram_freq=0, batch_size=args.batch_size, write_graph=False, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None) HP_MIN_SIDE = hp.HParam('image_max_side', hp.IntInterval(1500, 1800)) HP_MAX_SIDE = hp.HParam('image_min_side', hp.IntInterval(100, 800)) HP_BACKBONE = hp.HParam( 'backbone', hp.Discrete([ 'resnet50', 'resnet152', 'mobilenet128_0.75', 'mobilenet224_1.0', 'retinanet', 'densenet', 'vgg16', 'vgg19', 'EfficientNetB7', 'EfficientNetB6', 'EfficientNet5' ])) HP_AUGMENTATION = hp.HParam('augmentation', hp.Discrete(['true', 'false'])) HP_FREEZEBACKBONE = hp.HParam('backbone_frozen', hp.Discrete(['true', 'false'])) HP_ANCHOROPTI = hp.HParam('anchors_optimized', hp.Discrete(['true', 'false'])) HP_DATEINT = hp.HParam('date_asint', hp.IntInterval(1, 30000000000000)) HP_NORESIZE = hp.HParam('no_resize', hp.Discrete(['true', 'false'])) HP_AUGMENTATION_FACTOR = hp.HParam('augmentation_factor', hp.RealInterval(0.0, 10.0)) HP_VISUAL_AUG_FACTOR = hp.HParam('visual_aug_factor', hp.RealInterval(0.0, 10.0)) HP_NUM_TRAIN_IMAGES = hp.HParam('num_train_imgs', hp.IntInterval(1, 10000)) HP_PRETRAINED_ON = hp.HParam( 'pretrained_on', hp.Discrete(['Wider Person', 'ImageNet', 'Coco'])) HP_LR = hp.HParam('learning_rate', hp.RealInterval(1e-6, 1e-3)) if args.no_resize: no_resize_flag = 'true' else: no_resize_flag = 'false' if args.config: anchors_opti = 'true' else: anchors_opti = 'false' if args.freeze_backbone: frozen = 'true' else: frozen = 'false' if args.random_transform: aug = 'true' else: aug = 'false' if args.weights is not None: pretrained = 'coco' elif args.snapshot is not None: pretrained = 'Wider Person' else: pretrained = 'Imagenet' hparams = { HP_MIN_SIDE: args.image_min_side, HP_MAX_SIDE: args.image_max_side, HP_BACKBONE: args.backbone, HP_AUGMENTATION: aug, HP_FREEZEBACKBONE: frozen, HP_ANCHOROPTI: anchors_opti, HP_DATEINT: int(datetime.now().strftime("%m%d%H")), HP_VISUAL_AUG_FACTOR: args.visual_aug_factor, HP_AUGMENTATION_FACTOR: args.augmentation_factor, HP_NORESIZE: no_resize_flag, HP_NUM_TRAIN_IMAGES: args.num_train_imgs, HP_PRETRAINED_ON: pretrained, HP_LR: args.lr, } callbacks.append(hp.KerasCallback(args.tensorboard_dir, hparams)) if args.evaluation and validation_generator: evaluation = Evaluate(validation_generator, tensorboard=tensorboard_callback, weighted_average=args.weighted_average) evaluation = RedirectModel(evaluation, prediction_model) callbacks.append(evaluation) # save the model if args.snapshots: # ensure directory created first; otherwise h5py will error after epoch. makedirs(args.snapshot_path) time = datetime.now().strftime("%d-%m-%Y_%H%-M%-S") print('Time now and foldername in Tensorboard: ', time) checkpoint = keras.callbacks.ModelCheckpoint(os.path.join( args.snapshot_path, '{daytime}_{backbone}_{{epoch:02d}}_{num_trainer}.h5'.format( daytime=time, backbone=args.backbone, num_trainer=args.num_trainer)), verbose=1, save_best_only=True, monitor="mAP", mode='max') checkpoint = RedirectModel(checkpoint, model) callbacks.append(checkpoint) if args.tensorboard_dir: callbacks.append(tensorboard_callback) return callbacks
def main(): parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, LoggingArguments)) model_args, data_args, train_args, log_args = parser.parse_args_into_dataclasses( ) tf.random.set_seed(train_args.seed) tf.autograph.set_verbosity(0) # Settings init parse_bool = lambda arg: arg == "true" do_gradient_accumulation = train_args.gradient_accumulation_steps > 1 do_xla = not parse_bool(train_args.skip_xla) do_eager = parse_bool(train_args.eager) skip_sop = parse_bool(train_args.skip_sop) skip_mlm = parse_bool(train_args.skip_mlm) pre_layer_norm = parse_bool(model_args.pre_layer_norm) fast_squad = parse_bool(log_args.fast_squad) dummy_eval = parse_bool(log_args.dummy_eval) squad_steps = get_squad_steps(log_args.extra_squad_steps) is_sagemaker = data_args.fsx_prefix.startswith("/opt/ml") disable_tqdm = is_sagemaker global max_grad_norm max_grad_norm = train_args.max_grad_norm # Horovod init hvd.init() gpus = tf.config.list_physical_devices("GPU") for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: tf.config.set_visible_devices(gpus[hvd.local_rank()], "GPU") # XLA, AutoGraph tf.config.optimizer.set_jit(do_xla) tf.config.experimental_run_functions_eagerly(do_eager) if hvd.rank() == 0: # Run name should only be used on one process to avoid race conditions current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") platform = "sm" if is_sagemaker else "eks" if skip_sop: loss_str = "-skipsop" elif skip_mlm: loss_str = "-skipmlm" else: loss_str = "" metadata = (f"{model_args.model_type}" f"-{model_args.model_size}" f"-{model_args.load_from}" f"-{hvd.size()}gpus" f"-{train_args.batch_size}batch" f"-{train_args.gradient_accumulation_steps}accum" f"-{train_args.learning_rate}maxlr" f"-{train_args.end_learning_rate}endlr" f"-{train_args.learning_rate_decay_power}power" f"-{train_args.max_grad_norm}maxgrad" f"-{train_args.optimizer}opt" f"-{train_args.total_steps}steps" f"-{data_args.max_seq_length}seq" f"-{data_args.max_predictions_per_seq}preds" f"-{'preln' if pre_layer_norm else 'postln'}" f"{loss_str}" f"-{model_args.hidden_dropout_prob}dropout" f"-{train_args.seed}seed") run_name = f"{current_time}-{platform}-{metadata}-{train_args.name if train_args.name else 'unnamed'}" # Logging should only happen on a single process # https://stackoverflow.com/questions/9321741/printing-to-screen-and-writing-to-a-file-at-the-same-time level = logging.INFO format = "%(asctime)-15s %(name)-12s: %(levelname)-8s %(message)s" handlers = [ logging.FileHandler( f"{data_args.fsx_prefix}/logs/albert/{run_name}.log"), TqdmLoggingHandler(), ] logging.basicConfig(level=level, format=format, handlers=handlers) # Check that arguments passed in properly, only after registering the alert_func and logging assert not (skip_sop and skip_mlm), "Cannot use --skip_sop and --skip_mlm" wrap_global_functions(do_gradient_accumulation) if model_args.model_type == "albert": model_desc = f"albert-{model_args.model_size}-v2" elif model_args.model_type == "bert": model_desc = f"bert-{model_args.model_size}-uncased" config = AutoConfig.from_pretrained(model_desc) config.pre_layer_norm = pre_layer_norm config.hidden_dropout_prob = model_args.hidden_dropout_prob model = TFAutoModelForPreTraining.from_config(config) # Create optimizer and enable AMP loss scaling. schedule = LinearWarmupPolyDecaySchedule( max_learning_rate=train_args.learning_rate, end_learning_rate=train_args.end_learning_rate, warmup_steps=train_args.warmup_steps, total_steps=train_args.total_steps, power=train_args.learning_rate_decay_power, ) if train_args.optimizer == "lamb": opt = LAMB( learning_rate=schedule, weight_decay_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"], ) elif train_args.optimizer == "adam": opt = AdamW(weight_decay=0.0, learning_rate=schedule) opt = tf.train.experimental.enable_mixed_precision_graph_rewrite( opt, loss_scale="dynamic") gradient_accumulator = GradientAccumulator() loaded_opt_weights = None if model_args.load_from == "scratch": pass elif model_args.load_from.startswith("huggingface"): assert (model_args.model_type == "albert" ), "Only loading pretrained albert models is supported" huggingface_name = f"albert-{model_args.model_size}-v2" if model_args.load_from == "huggingface": albert = TFAlbertModel.from_pretrained(huggingface_name, config=config) model.albert = albert else: model_ckpt, opt_ckpt = get_checkpoint_paths_from_prefix( model_args.checkpoint_path) model = TFAutoModelForPreTraining.from_config(config) if hvd.rank() == 0: model.load_weights(model_ckpt) loaded_opt_weights = np.load(opt_ckpt, allow_pickle=True) # We do not set the weights yet, we have to do a first step to initialize the optimizer. # Train filenames are [1, 2047], Val filenames are [0]. Note the different subdirectories # Move to same folder structure and remove if/else if model_args.model_type == "albert": train_glob = f"{data_args.fsx_prefix}/albert_pretraining/tfrecords/train/max_seq_len_{data_args.max_seq_length}_max_predictions_per_seq_{data_args.max_predictions_per_seq}_masked_lm_prob_15/albert_*.tfrecord" validation_glob = f"{data_args.fsx_prefix}/albert_pretraining/tfrecords/validation/max_seq_len_{data_args.max_seq_length}_max_predictions_per_seq_{data_args.max_predictions_per_seq}_masked_lm_prob_15/albert_*.tfrecord" if model_args.model_type == "bert": train_glob = f"{data_args.fsx_prefix}/bert_pretraining/max_seq_len_{data_args.max_seq_length}_max_predictions_per_seq_{data_args.max_predictions_per_seq}_masked_lm_prob_15/training/*.tfrecord" validation_glob = f"{data_args.fsx_prefix}/bert_pretraining/max_seq_len_{data_args.max_seq_length}_max_predictions_per_seq_{data_args.max_predictions_per_seq}_masked_lm_prob_15/validation/*.tfrecord" train_filenames = glob.glob(train_glob) validation_filenames = glob.glob(validation_glob) train_dataset = get_mlm_dataset( filenames=train_filenames, max_seq_length=data_args.max_seq_length, max_predictions_per_seq=data_args.max_predictions_per_seq, batch_size=train_args.batch_size, ) # Of shape [batch_size, ...] # Batch of batches, helpful for gradient accumulation. Shape [grad_steps, batch_size, ...] train_dataset = train_dataset.batch(train_args.gradient_accumulation_steps) # One iteration with 10 dupes, 8 nodes seems to be 60-70k steps. train_dataset = train_dataset.prefetch(buffer_size=8) # Validation should only be done on one node, since Horovod doesn't allow allreduce on a subset of ranks if hvd.rank() == 0: validation_dataset = get_mlm_dataset( filenames=validation_filenames, max_seq_length=data_args.max_seq_length, max_predictions_per_seq=data_args.max_predictions_per_seq, batch_size=train_args.batch_size, ) # validation_dataset = validation_dataset.batch(1) validation_dataset = validation_dataset.prefetch(buffer_size=8) pbar = tqdm.tqdm(train_args.total_steps, disable=disable_tqdm) summary_writer = None # Only create a writer if we make it through a successful step logger.info(f"Starting training, job name {run_name}") i = 0 start_time = time.perf_counter() for batch in train_dataset: learning_rate = schedule(step=tf.constant(i, dtype=tf.float32)) loss_scale = opt.loss_scale() loss, mlm_loss, mlm_acc, sop_loss, sop_acc, grad_norm, weight_norm = train_step( model=model, opt=opt, gradient_accumulator=gradient_accumulator, batch=batch, gradient_accumulation_steps=train_args.gradient_accumulation_steps, skip_sop=skip_sop, skip_mlm=skip_mlm, ) # Don't want to wrap broadcast_variables() in a tf.function, can lead to asynchronous errors if i == 0: if hvd.rank() == 0 and loaded_opt_weights is not None: opt.set_weights(loaded_opt_weights) hvd.broadcast_variables(model.variables, root_rank=0) hvd.broadcast_variables(opt.variables(), root_rank=0) i = opt.get_weights()[0] - 1 is_final_step = i >= train_args.total_steps - 1 do_squad = i in squad_steps or is_final_step # Squad requires all the ranks to train, but results are only returned on rank 0 if do_squad: squad_results = get_squad_results_while_pretraining( model=model, model_size=model_args.model_size, fsx_prefix=data_args.fsx_prefix, step=i, fast=log_args.fast_squad, dummy_eval=log_args.dummy_eval, ) if hvd.rank() == 0: squad_exact, squad_f1 = squad_results["exact"], squad_results[ "f1"] logger.info( f"SQuAD step {i} -- F1: {squad_f1:.3f}, Exact: {squad_exact:.3f}" ) # Re-wrap autograph so it doesn't get arg mismatches wrap_global_functions(do_gradient_accumulation) if hvd.rank() == 0: do_log = i % log_args.log_frequency == 0 do_checkpoint = ( (i > 0) and (i % log_args.checkpoint_frequency == 0)) or is_final_step do_validation = ( (i > 0) and (i % log_args.validation_frequency == 0)) or is_final_step pbar.update(1) description = f"Loss: {loss:.3f}, MLM: {mlm_loss:.3f}, SOP: {sop_loss:.3f}, MLM_acc: {mlm_acc:.3f}, SOP_acc: {sop_acc:.3f}" pbar.set_description(description) if do_log: elapsed_time = time.perf_counter() - start_time if i == 0: logger.info(f"First step: {elapsed_time:.3f} secs") else: it_per_sec = log_args.log_frequency / elapsed_time logger.info( f"Train step {i} -- {description} -- It/s: {it_per_sec:.2f}" ) start_time = time.perf_counter() if do_checkpoint: checkpoint_prefix = f"{data_args.fsx_prefix}/checkpoints/albert/{run_name}-step{i}" model_ckpt = f"{checkpoint_prefix}.ckpt" opt_ckpt = f"{checkpoint_prefix}-opt.npy" logger.info( f"Saving model at {model_ckpt}, optimizer at {opt_ckpt}") model.save_weights(model_ckpt) # model.load_weights(model_ckpt) opt_weights = opt.get_weights() np.save(opt_ckpt, opt_weights) # opt.set_weights(opt_weights) if do_validation: val_loss, val_mlm_loss, val_mlm_acc, val_sop_loss, val_sop_acc = run_validation( model=model, validation_dataset=validation_dataset, skip_sop=skip_sop, skip_mlm=skip_mlm, ) description = f"Loss: {val_loss:.3f}, MLM: {val_mlm_loss:.3f}, SOP: {val_sop_loss:.3f}, MLM_acc: {val_mlm_acc:.3f}, SOP_acc: {val_sop_acc:.3f}" logger.info(f"Validation step {i} -- {description}") # Create summary_writer after the first step if summary_writer is None: summary_writer = tf.summary.create_file_writer( f"{data_args.fsx_prefix}/logs/albert/{run_name}") with summary_writer.as_default(): HP_MODEL_TYPE = hp.HParam("model_type", hp.Discrete(["albert", "bert"])) HP_MODEL_SIZE = hp.HParam("model_size", hp.Discrete(["base", "large"])) HP_LEARNING_RATE = hp.HParam("learning_rate", hp.RealInterval(1e-5, 1e-1)) HP_BATCH_SIZE = hp.HParam("global_batch_size", hp.IntInterval(1, 64)) HP_PRE_LAYER_NORM = hp.HParam("pre_layer_norm", hp.Discrete([True, False])) HP_HIDDEN_DROPOUT = hp.HParam("hidden_dropout") hparams = [ HP_MODEL_TYPE, HP_MODEL_SIZE, HP_BATCH_SIZE, HP_LEARNING_RATE, HP_PRE_LAYER_NORM, HP_HIDDEN_DROPOUT, ] HP_F1 = hp.Metric("squad_f1") HP_EXACT = hp.Metric("squad_exact") HP_MLM = hp.Metric("val_mlm_acc") HP_SOP = hp.Metric("val_sop_acc") HP_TRAIN_LOSS = hp.Metric("train_loss") HP_VAL_LOSS = hp.Metric("val_loss") metrics = [ HP_TRAIN_LOSS, HP_VAL_LOSS, HP_F1, HP_EXACT, HP_MLM, HP_SOP ] hp.hparams_config( hparams=hparams, metrics=metrics, ) hp.hparams( { HP_MODEL_TYPE: model_args.model_type, HP_MODEL_SIZE: model_args.model_size, HP_LEARNING_RATE: train_args.learning_rate, HP_BATCH_SIZE: train_args.batch_size * hvd.size(), HP_PRE_LAYER_NORM: model_args.pre_layer_norm == "true", HP_HIDDEN_DROPOUT: model_args.hidden_dropout_prob, }, trial_id=run_name, ) # Log to TensorBoard with summary_writer.as_default(): tf.summary.scalar("weight_norm", weight_norm, step=i) tf.summary.scalar("loss_scale", loss_scale, step=i) tf.summary.scalar("learning_rate", learning_rate, step=i) tf.summary.scalar("train_loss", loss, step=i) tf.summary.scalar("train_mlm_loss", mlm_loss, step=i) tf.summary.scalar("train_mlm_acc", mlm_acc, step=i) tf.summary.scalar("train_sop_loss", sop_loss, step=i) tf.summary.scalar("train_sop_acc", sop_acc, step=i) tf.summary.scalar("grad_norm", grad_norm, step=i) if do_validation: tf.summary.scalar("val_loss", val_loss, step=i) tf.summary.scalar("val_mlm_loss", val_mlm_loss, step=i) tf.summary.scalar("val_mlm_acc", val_mlm_acc, step=i) tf.summary.scalar("val_sop_loss", val_sop_loss, step=i) tf.summary.scalar("val_sop_acc", val_sop_acc, step=i) if do_squad: tf.summary.scalar("squad_f1", squad_f1, step=i) tf.summary.scalar("squad_exact", squad_exact, step=i) i += 1 if is_final_step: break if hvd.rank() == 0: pbar.close() logger.info(f"Finished pretraining, job name {run_name}")
def test_summary_pb(self): hparams = [ hp.HParam("learning_rate", hp.RealInterval(1e-2, 1e-1)), hp.HParam("dense_layers", hp.IntInterval(2, 7)), hp.HParam("optimizer", hp.Discrete(["adam", "sgd"])), hp.HParam("who_knows_what"), hp.HParam( "magic", hp.Discrete([False, True]), display_name="~*~ Magic ~*~", description="descriptive", ), ] metrics = [ hp.Metric("samples_per_second"), hp.Metric(group="train", tag="batch_loss", display_name="loss (train)"), hp.Metric( group="validation", tag="epoch_accuracy", display_name="accuracy (val.)", description="Accuracy on the _validation_ dataset.", dataset_type=hp.Metric.VALIDATION, ), ] experiment = hp.Experiment( hparams=hparams, metrics=metrics, user="******", description="nothing to see here; move along", time_created_secs=1555624767, ) self.assertEqual(experiment.hparams, hparams) self.assertEqual(experiment.metrics, metrics) self.assertEqual(experiment.user, "zalgo"), self.assertEqual(experiment.description, "nothing to see here; move along") self.assertEqual(experiment.time_created_secs, 1555624767) expected_experiment_pb = api_pb2.Experiment() text_format.Merge( """ description: "nothing to see here; move along" user: "******" time_created_secs: 1555624767.0 hparam_infos { name: "learning_rate" type: DATA_TYPE_FLOAT64 domain_interval { min_value: 0.01 max_value: 0.1 } } hparam_infos { name: "dense_layers" type: DATA_TYPE_FLOAT64 domain_interval { min_value: 2 max_value: 7 } } hparam_infos { name: "optimizer" type: DATA_TYPE_STRING domain_discrete { values { string_value: "adam" } values { string_value: "sgd" } } } hparam_infos { name: "who_knows_what" } hparam_infos { name: "magic" type: DATA_TYPE_BOOL display_name: "~*~ Magic ~*~" description: "descriptive" domain_discrete { values { bool_value: false } values { bool_value: true } } } metric_infos { name { tag: "samples_per_second" } } metric_infos { name { group: "train" tag: "batch_loss" } display_name: "loss (train)" } metric_infos { name { group: "validation" tag: "epoch_accuracy" } display_name: "accuracy (val.)" description: "Accuracy on the _validation_ dataset." dataset_type: DATASET_VALIDATION } """, expected_experiment_pb, ) actual_summary_pb = experiment.summary_pb() plugin_content = actual_summary_pb.value[0].metadata.plugin_data.content self.assertEqual( metadata.parse_experiment_plugin_data(plugin_content), expected_experiment_pb, )
def test_convert_hyperparams_to_hparams(): def _check_hparams_equal(hp1, hp2): assert (hparams_api.hparams_pb( hp1, start_time_secs=0).SerializeToString() == hparams_api.hparams_pb( hp2, start_time_secs=0).SerializeToString()) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Choice("learning_rate", [1e-4, 1e-3, 1e-2]) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, { hparams_api.HParam("learning_rate", hparams_api.Discrete([1e-4, 1e-3, 1e-2])): 1e-4 }, ) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Int("units", min_value=2, max_value=16) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, {hparams_api.HParam("units", hparams_api.IntInterval(2, 16)): 2}) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Int("units", min_value=32, max_value=128, step=32) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, { hparams_api.HParam("units", hparams_api.Discrete([32, 64, 96, 128])): 32 }, ) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Float("learning_rate", min_value=0.5, max_value=1.25, step=0.25) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, { hparams_api.HParam("learning_rate", hparams_api.Discrete([0.5, 0.75, 1.0, 1.25])): 0.5 }, ) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Float("learning_rate", min_value=1e-4, max_value=1e-1) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, { hparams_api.HParam("learning_rate", hparams_api.RealInterval(1e-4, 1e-1)): 1e-4 }, ) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Float("theta", min_value=0.0, max_value=1.57) hps.Float("r", min_value=0.0, max_value=1.0) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) expected_hparams = { hparams_api.HParam("theta", hparams_api.RealInterval(0.0, 1.57)): 0.0, hparams_api.HParam("r", hparams_api.RealInterval(0.0, 1.0)): 0.0, } hparams_repr_list = [repr(hparams[x]) for x in hparams.keys()] expected_hparams_repr_list = [ repr(expected_hparams[x]) for x in expected_hparams.keys() ] assert sorted(hparams_repr_list) == sorted(expected_hparams_repr_list) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Boolean("has_beta") hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, { hparams_api.HParam("has_beta", hparams_api.Discrete([True, False])): False }, ) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Fixed("beta", 0.1) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, {hparams_api.HParam("beta", hparams_api.Discrete([0.1])): 0.1}) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Fixed("type", "WIDE_AND_DEEP") hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, { hparams_api.HParam("type", hparams_api.Discrete(["WIDE_AND_DEEP"])): "WIDE_AND_DEEP" }, ) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Fixed("condition", True) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, {hparams_api.HParam("condition", hparams_api.Discrete([True])): True}, ) hps = keras_tuner.engine.hyperparameters.HyperParameters() hps.Fixed("num_layers", 2) hparams = keras_tuner.engine.tuner_utils.convert_hyperparams_to_hparams( hps) _check_hparams_equal( hparams, {hparams_api.HParam("num_layers", hparams_api.Discrete([2])): 2})