def model_from_configs(model_config, loss_config, optimizer_config, metrics_config, weights_filename=None): """Creates a Keras model from the configurations typically stored in an InstanceState. Args: model_config (dict): Configuration dictionary representing the model. loss_config (dict, list, or str): Configuration representing the loss(es) for the model. optimizer_config (dict): Configuration representing the optimizer. metrics_config (dict, list, or str): Configuration representing the metrics. weights_filename (str, optional): Filename containing weights to load. Returns: Model: The Keras Model defined by the config objects. """ model = model_from_json(json.dumps(model_config)) loss = tf_utils.deserialize_loss(loss_config) optimizer = tf.keras.optimizers.deserialize(optimizer_config) # nopep8 metrics = json.loads(metrics_config) model.compile(loss=loss, optimizer=optimizer, metrics=metrics) if weights_filename: if tf.io.gfile.exists(weights_filename): model.load_weights(weights_filename) else: warning("No weights file: '%s'" % weights_filename) return model
def sort_by_metric(self, metric_name): "Returns a list of `ExecutionState`s sorted by a given metric." # FIXME: Refactor to avoid dup with InstanceState by adding an # Statescollection # Checking if the metric exists and get its direction. execution_state = self.get_last() # !don't use _objects -> use get() instead due to canonicalization metric = execution_state.metrics.get(metric_name) if not metric: warning('Metric %s not found' % metric_name) return [] # getting metric values values = {} for execution_state in self._objects.values(): value = execution_state.metrics.get(metric.name).get_best_value() # seems wrong but make it easy to sort by value and return excution values[value] = execution_state # sorting if metric.direction == 'min': sorted_values = sorted(values.keys()) else: sorted_values = sorted(values.keys(), reverse=True) sorted_execution_states = [] for val in sorted_values: sorted_execution_states.append(values[val]) return sorted_execution_states
def sort_by_metric(self, metric_name): "Returns a list of `InstanceState`s sorted by a given metric." # checking if metric exist and getting its direction instance_state = self.get_last() # !don't use _objects -> use get() instead due to canonicalization metric = instance_state.agg_metrics.get(metric_name) if not metric: warning('Metric %s not found' % metric_name) return [] # getting metric values values = {} for instance_state in self._objects.values(): metric = instance_state.agg_metrics.get(metric.name) value = metric.get_best_value() # seems wrong but make it easy to sort by value and return instance values[value] = instance_state # sorting if metric.direction == 'min': sorted_values = sorted(values.keys()) else: sorted_values = sorted(values.keys(), reverse=True) sorted_instance_states = [] for val in sorted_values: sorted_instance_states.append(values[val]) return sorted_instance_states
def reload_model(tuner_state, instance_state, execution_state=None, compile=False): """Reload the model for the given instance and execution. Args: tuner_state: TunerState, the state of the tuner. instance_state: InstanceState, the instance to reload. execution_state: ExecutionState, the execution to reload. compile: bool, if true, compile the model before returning it. Returns: tf.keras.models.Model, the reloaded model. """ model = instance_state.recreate_model() metrics = json.loads(instance_state.metrics_config) if compile: model.compile(loss=model.loss, optimizer=model.optimizer, metrics=metrics) if execution_state: weights = get_weights_filename(tuner_state, instance_state, execution_state) if exists(weights): model.load_weights(weights) else: warning( "Weights file '%s' not found. Model training will start" " from the beginning" % weights) return model
def __init__(self, **kwargs): super(HostState, self).__init__(**kwargs) self.results_dir = self._register('results_dir', 'results/', True) self.tmp_dir = self._register('tmp_dir', 'tmp/') self.export_dir = self._register('export_dir', 'export/', True) # ensure the user don't shoot himself in the foot if self.results_dir == self.tmp_dir: fatal('Result dir and tmp dir must be different') # create directory if needed tf_utils.create_directory(self.results_dir) tf_utils.create_directory(self.tmp_dir, remove_existing=True) tf_utils.create_directory(self.export_dir) # init _HOST config._Host = Host() status = config._Host.get_status() tf_version = status['software']['tensorflow'] if tf_version: major, minor, rev = tf_version.split('.') if major == '1': if int(minor) >= 13: print('ok') else: fatal( "Keras Tuner only work with TensorFlow version >= 1.13\ current version: %s - please upgrade" % tf_version) else: warning('Could not determine TensorFlow version.')
def sort_by_objective(self): "Returns a list of `InstanceState`s sorted by the objective." instance_state = self.get_last() if not instance_state: warning('No previous instance found') return [] return self.sort_by_metric(instance_state.objective)
def tune(self, x, y, **kwargs): # Determine the number of total models to search over. search_space_size = config._DISTRIBUTIONS.get_search_space_size() required_num_epochs = (search_space_size * self.state.max_epochs * self.state.num_executions) if required_num_epochs > self.state.remaining_budget: warning( "GridSearch epoch budget of %d is not sufficient to explore \ the entire space. Recommended budget: %d" % (self.state.remaining_budget, required_num_epochs)) while self.state.remaining_budget and search_space_size: instance = self.new_instance() # not instances left time to wrap-up if not instance: break # train n executions for the given model for _ in range(self.state.num_executions): instance.fit(x, y, self.state.max_epochs, **kwargs) search_space_size -= 1
def __init__(self, model_fn, objective, name, distributions, **kwargs): """ Tuner abstract class Args: model_fn (function): Function that return a Keras model name (str): name of the tuner objective (str): Which objective the tuner optimize for distributions (Distributions): distributions object Notes: All meta data and varialbles are stored into self.state defined in ../states/tunerstate.py """ # hypertuner state init self.state = TunerState(name, objective, **kwargs) self.stats = self.state.stats # shorthand access self.cloudservice = CloudService() # check model function if not model_fn: fatal("Model function can't be empty") try: mdl = model_fn() except: traceback.print_exc() fatal("Invalid model function") if not isinstance(mdl, Model): t = "tensorflow.keras.models.Model" fatal("Invalid model function: Doesn't return a %s object" % t) # function is valid - recording it self.model_fn = model_fn # Initializing distributions hparams = config._DISTRIBUTIONS.get_hyperparameters_config() if len(hparams) == 0: warning("No hyperparameters used in model function. Are you sure?") # set global distribution object to the one requested by tuner # !MUST be after _eval_model_fn() config._DISTRIBUTIONS = distributions(hparams) # instances management self.max_fail_streak = 5 # how many failure before giving up self.instance_states = InstanceStatesCollection() # previous models print("Loading from %s" % self.state.host.results_dir) count = self.instance_states.load_from_dir(self.state.host.results_dir, self.state.project, self.state.architecture) self.stats.instance_states_previously_trained = count info("Tuner initialized")
def add(self, idx, obj): """Add object to the collection Args: idx (str): object index obj (Object): Object to add """ if idx in self._objects: warning('overriding object %s - use update() instead?' % idx) self._objects[idx] = obj self._last_insert_idx = idx
def __init__(self, name, objective, **kwargs): super(TunerState, self).__init__(**kwargs) self.name = name self.start_time = int(time()) # objective self.objective = objective if self.objective == 'loss': warning("Objective set to loss - usually not the best objective\ are you sure?") # budget self.epoch_budget = self._register('epoch_budget', 100, True) self.max_epochs = self._register('max_epochs', 10, True) self.min_epochs = self._register('min_epochs', 3, True) self.remaining_budget = self.epoch_budget # user info # !don't use random identifiers -- it makes reloading impossible self.project = self._register('project', 'default') self.architecture = self._register('architecture', 'default') self.label_names = self._register('label_names', None) self.user_info = self._register('user_info', {}) # execution self.num_executions = self._register('num_executions', 1, True) self.max_model_parameters = self._register('max_model_parameters', 25000000, True) # checkpointing self.checkpoint = self._register('checkpoint', True) if not self.checkpoint: warning("models will not be saved are you sure?") # debug self.dry_run = self._register('dry_run', False) self.debug = self._register('debug', False) self.display_model = self._register('display_model', False) # sub-states self.host = HostState(**kwargs) self.stats = TunerStatsState() self.agg_metrics = None # set in Instance before 1st training # best instance tracking self.best_instance_config = None # set in callback after 1st training # logfile log_name = "%s-%s-%d.log" % (self.project, self.architecture, self.start_time) self.log_file = os.path.join(self.host.results_dir, log_name) set_log(self.log_file) self.eta = -1
def enable(self, api_key, url=None): """enable cloud service by setting API key""" self.api_key = api_key if url: self.base_url = url if self._check_access(): info("Cloud service enabled - Go to https://.. to track your " "tuning results in realtime.") self.status = OK self.is_enable = True else: warning("Invalid cloud API key") self.status = AUTH_ERROR self.is_enable = False
def get(self, idx): """Return the object associated with a given id Args: idx (str): Object id Returns: Object: object associated if found or None """ if idx in self._objects: return self._objects[idx] else: warning("%s not found" % idx) return None
def send_to_backend(url, api_key, info_type, info): """Sends data to the cloud service. Args: info_type (str): type of information sent info (dict): the data to send """ response = requests.post( url, headers={'X-AUTH': api_key}, json={ 'type': info_type, 'data': _normalize_data_to_send(info) }) if not response.ok: try: response_json = response.json() except json.decoder.JSONDecodeError: warning('Cloud service down -- data not uploaded: %s' % response.text) return CONNECT_ERROR if response_json['status'] == 'Unauthorized': warning('Invalid backend API key.') return AUTH_ERROR else: warning('Warning! Cloud service upload failed: %s' % response.text) return UPLOAD_ERROR return ERROR else: return OK
def _record_hyperparameter(self, name, value, group): """ Record hyperparameter value Args: name (str): name of the hyperparameter value: value of the hyperparameter group (str): which logical group this parameters belongs to """ hparam = {"name": name, "value": value, "group": group} key = self._get_key(name, group) # new hyper-parameter - makes reporting unstable if key not in self._hyperparameters_config: self.dynamic_hyperparameters = True if self.fatal_on_dynamic_hyperparmeter: fatal('Parameter %s is dynamic - this is incompatible with\ tuning algorithm' % key) else: warning('Parameter %s is dynamic - this will make reporitng\ innacurate. Consider making hyperparameters\ non-conditional' % key) self._hyperparameters[key] = hparam
def __load_instance(self, instance_state): if self.state.dry_run: return None # Determine the weights file (if any) to load, and rebuild the model. weights_file = None if instance_state.execution_states_collection: esc = instance_state.execution_states_collection execution_state = esc.get_last() weights_file = get_weights_filename(self.state, instance_state, execution_state) if not tf.io.gfile.exists(weights_file): warning("Could not open weights file: '%s'" % weights_file) weights_file = None model = instance_state.recreate_model(weights_filename=weights_file) return Instance(instance_state.idx, model, instance_state.hyper_parameters, self.state, self.cloudservice, instance_state=instance_state)
def new_instance(self): "Return a never seen before model instance" fail_streak = 0 collision_streak = 0 over_sized_streak = 0 while 1: # clean-up TF graph from previously stored (defunct) graph tf_utils.clear_tf_session() self.stats.generated_instances += 1 fail_streak += 1 try: model = self.model_fn() except: if self.state.debug: traceback.print_exc() self.stats.invalid_instances += 1 warning("invalid model %s/%s" % (self.stats.invalid_instances, self.max_fail_streak)) if self.stats.invalid_instances >= self.max_fail_streak: warning("too many consecutive failed models - stopping") return None continue # stop if the model_fn() return nothing if not model: warning("No model returned from model function - stopping.") return None # computing instance unique idx idx = self.__compute_model_id(model) if self.instance_states.exist(idx): collision_streak += 1 self.stats.collisions += 1 warning("Collision for %s -- skipping" % (idx)) if collision_streak >= self.max_fail_streak: return None continue # check size nump = tf_utils.compute_model_size(model) if nump > self.state.max_model_parameters: over_sized_streak += 1 self.stats.over_sized_models += 1 warning("Oversized model: %s parameters-- skipping" % (nump)) if over_sized_streak >= self.max_fail_streak: warning("too many consecutive failed model - stopping") return None continue # creating instance hparams = config._DISTRIBUTIONS.get_hyperparameters() instance = Instance(idx, model, hparams, self.state, self.cloudservice) break # recording instance self.instance_states.add(idx, instance.state) return instance
def get_objective(self): "Get metric objective" if not self._objective_name: warning("objective not set yet. returning None") return None return self._objects[self._objective_name]