def multi_step_lr_schedule( global_step: tf_compat.Tensor, start_step: int, milestone_steps: List[int], init_lr: float, gamma: float, name: str = "multi_step_lr_schedule", ): """ Create a multi step learning rate schedule in the current graph. Multiplies init_lr by gamma after each milestone has passed. Ex: lr = init_lr * (gamma ** NUM_UPDATES) :param global_step: the global step used for training :param start_step: the step to start the exponential schedule on :param milestone_steps: a list of steps to decrease the learning rate at, these are the number of steps that must pass after start_step to decrease lr :param init_lr: the learning rate to start the schedule with :param gamma: the decay weight to decrease init_lr by after every step_size interval :param name: the name scope to create the graph under :return: the calculated learning rate tensor """ with tf_compat.name_scope(name): global_step = tf_compat.cast(global_step, tf_compat.int64) milestone_steps = tf_compat.constant( [mile + start_step for mile in milestone_steps], dtype=tf_compat.int64, name="milestone_steps", ) start_step = tf_compat.constant(start_step, dtype=tf_compat.int64, name="start_step") init_lr = tf_compat.constant(init_lr, dtype=tf_compat.float32, name="init_lr") gamma = tf_compat.constant(gamma, dtype=tf_compat.float32, name="gamma") before = tf_compat.less(global_step, start_step, name="before") def _calc_lr(): less = tf_compat.cast( tf_compat.greater_equal(global_step, milestone_steps), tf_compat.int64) updates = tf_compat.reduce_sum(less) mult_g = tf_compat.pow(gamma, tf_compat.cast(updates, tf_compat.float32)) return tf_compat.multiply(init_lr, mult_g) learning_rate = tf_compat.cond(before, lambda: init_lr, _calc_lr, name="learning_rate") return learning_rate
def imagenet_normalizer(img): """ Normalize an image using mean and std of the imagenet dataset :param img: The input image to normalize :return: The normalized image """ img = tf_compat_div(img, 255.0) means = tf_compat.constant(IMAGENET_RGB_MEANS, dtype=tf_compat.float32) stds = tf_compat.constant(IMAGENET_RGB_STDS, dtype=tf_compat.float32) img = tf_compat_div(tf_compat.subtract(img, means), stds) return img
def symmetric_pad2d(x_tens: tf_compat.Tensor, pad: Union[str, int, Tuple[int, int]], data_format: str): """ Create a symmetric pad op in the current graph and scope. To do this, pad must be an integer or tuple of integers. If pad is a string, will not do anything and pad should be passed into the pool or conv op. :param x_tens: the tensor to apply padding to :param pad: the padding to apply symmetrically. If it is a single integer, will apply to both sides of height and width dimensions. If it is a tuple, will take the first element as the padding for both sides of height dimensions and second for booth sides of width ddimension. :param data_format: either channels_last or channels_first :return: the padded tensor """ if isinstance(pad, str): # default tensorflow_v1 padding return x_tens y_pad = [pad, pad] if isinstance(pad, int) else [pad[0], pad[0]] x_pad = [pad, pad] if isinstance(pad, int) else [pad[1], pad[1]] pad_tensor = ([[0, 0], y_pad, x_pad, [0, 0]] if data_format == "channels_last" else [[0, 0], [0, 0], y_pad, x_pad]) pad_tensor = tf_compat.constant(pad_tensor) return tf_compat.pad(x_tens, pad_tensor)
def create_ops( self, steps_per_epoch: int, global_step: Optional[tf_compat.Variable], graph: Optional[tf_compat.Graph], ) -> Tuple[List[Union[tf_compat.Tensor, tf_compat.Operation]], Dict[str, Any]]: """ Create ops to set the learning rate at a given value if the global step reaches a given value :param steps_per_epoch: the number of steps (batches) per training epoch :param global_step: the global step used while training :param graph: the graph to be modified :return: a tuple (empty list of ops, dict of learning rate and logging summaries) """ mod_ops, mod_extras = super().create_ops(graph, steps_per_epoch, global_step) name_scope = "{}/{}".format(NM_RECAL, self.__class__.__name__) with graph.as_default(): with tf_compat.name_scope(name_scope): learning_rate = tf_compat.constant( self.learning_rate, tf_compat.float32, name="learning_rate" ) _add_lr_extras(mod_extras, learning_rate, self.log_types) return mod_ops, mod_extras
def pruning_loss_sens_op_vars( graph: tf_compat.Graph = None, var_names: Union[List[str], Tuple[str]] = ("re:.*", ), mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured", ) -> List[SparsePruningOpVars]: """ Edit the graph for to inject pruning ops and vars to allow for a ks loss sensitivity analysis. Note: this must be run outside of a session for it to take effect. :param graph: the graph to inject pruning ops and vars into, if not supplied uses get_default_graph() :param var_names: List of variable names or regex patterns of variables to get the op vars for. Defaults to matching all variables :param mask_type: String to define type of sparsity (options: ['unstructured', 'channel', 'filter']), List to define block shape of a parameter's in and out channels, or a SparsityMaskCreator object. default is 'unstructured' :return: the created pruning op vars to be used in approx_ks_loss_sensitivity and one_shot_ks_loss_sensitivity """ if not graph: graph = tf_compat.get_default_graph() mask_creator = mask_type if not isinstance(mask_type, PruningMaskCreator): mask_creator = load_mask_creator(mask_type) ks_group = pruning_loss_sens_one_shot.__name__ prunable_ops_and_inputs = get_ops_and_inputs_by_name_or_regex( var_names, graph) op_vars = [] with graph.as_default(): for prune_op, prune_op_input in prunable_ops_and_inputs: with tf_compat.name_scope( PruningScope.model(prune_op, ks_group, trailing_slash=True)): sparsity = tf_compat.placeholder(dtype=tf_compat.float32, name="sparsity_placeholder") update = tf_compat.constant(True, tf_compat.bool) prune_op_var = create_op_pruning( prune_op, prune_op_input, sparsity, update, True, None, ks_group, mask_creator, ) op_vars.append(SparsePruningOpVars(prune_op_var, sparsity)) return op_vars
def _no_op(): with tf_compat.name_scope( PruningScope.model( op, ks_group, additional=PruningScope.OPS_UPDATE, trailing_slash=True, )): # return no op wrapped in group to match update type return tf_compat.group( tf_compat.constant(0.0, dtype=op_var_tens.dtype, name=PruningScope.OP_MASK_UPDATE_NO_OP))
def creator(self): """ :return: a created dataset that gives the file_path and label for each image under self.root """ labels_strs = [ fold.split(os.path.sep)[-1] for fold in glob.glob(os.path.join(self.root, "*")) ] labels_strs.sort() labels_dict = { lab: numpy.identity(len(labels_strs))[index].tolist() for index, lab in enumerate(labels_strs) } files_labels = [ (file, labels_dict[file.split(os.path.sep)[-2]]) for file in glob.glob(os.path.join(self.root, "*", "*")) ] random.Random(42).shuffle(files_labels) files, labels = zip(*files_labels) files = tf_compat.constant(files) labels = tf_compat.constant(labels) return tf_compat.data.Dataset.from_tensor_slices((files, labels))
def create_ops( self, steps_per_epoch: int, global_step: tf_compat.Variable, graph: tf_compat.Graph, ) -> Tuple[List[Union[tf_compat.Tensor, tf_compat.Operation]], Dict[str, Any]]: """ Create switch case computing the learning rate at a given global step and extras created by individual LR modifiers :param steps_per_epoch: the number of steps per training epoch :param global_step: the global step used while training :param graph: the graph to be modified :return: a tuple (list of empty ops, dict of named ops/tensors for learning rate and summaries as extras) """ mod_ops, mod_extras = super().create_ops(graph, steps_per_epoch, global_step) name_scope = "{}/{}".format(NM_RECAL, self.__class__.__name__) with graph.as_default(): with tf_compat.name_scope(name_scope): pred_fn_pairs = [] global_step = tf_compat.cast(global_step, tf_compat.int64) for index, child in enumerate(self._lr_modifiers): with tf_compat.name_scope(str(index)): _, child_extras = child.create_ops( steps_per_epoch, global_step, graph ) child_lr = child_extras[EXTRAS_KEY_LEARNING_RATE] child_start_step, _ = child.start_end_steps( steps_per_epoch, after_optim=False ) child_select = tf_compat.greater_equal( global_step, tf_compat.constant(child_start_step, tf_compat.int64), name="active", ) pred_fn_pairs.append((child_select, lambda lr=child_lr: lr)) learning_rate = tf_compat.case(pred_fn_pairs) _add_lr_extras(mod_extras, learning_rate, self.log_types) return mod_ops, mod_extras
def create_ks_schedule_ops( global_step: tf_compat.Variable, begin_step: int, end_step: int, update_step_freq: int, init_sparsity: float, final_sparsity: float, exponent: float, ks_group: str, ) -> Tuple[tf_compat.Tensor, tf_compat.Tensor]: """ Create a gradual schedule for model pruning (kernel sparsity). Creates a sparsity tensor that goes from init_sparsity til final_sparsity starting at begin_step and ending at end_step. Uses the global_step to map those. Additionally creates an update_ready tensor that is True if an update to the sparsity tensor should be run, False otherwise. :param global_step: the global optimizer step for the training graph :param begin_step: the global step to begin pruning at :param end_step: the global step to end pruning at :param update_step_freq: the number of global steps between each weight update :param init_sparsity: the starting value for sparsity of a weight tensor to be enforce :param final_sparsity: the end value for sparsity for a weight tensor to be enforce :param exponent: the exponent to use for interpolating between init_sparsity and final_sparsity higher values will lead to larger sparsity steps at the beginning vs the end ie: linear (1) vs cubic (3) :param ks_group: the group identifier the scope should be created under :return: a tuple containing the signal for update_ready and the target sparsity """ # create the scheduling ops first and the sparsity ops with tf_compat.name_scope( PruningScope.general(ks_group, additional=PruningScope.OPS_SCHEDULE, trailing_slash=True)): sched_before = tf_compat.less(global_step, begin_step) sched_start = tf_compat.equal(global_step, begin_step) sched_end = tf_compat.equal(global_step, end_step) sched_active = tf_compat.logical_and( tf_compat.greater(global_step, begin_step), tf_compat.less(global_step, end_step), ) sched_active_inclusive = tf_compat.logical_or( sched_active, tf_compat.logical_or(sched_start, sched_end)) sched_update = tf_compat.cond( tf_compat.less_equal(update_step_freq, 0), lambda: tf_compat.constant(True), lambda: tf_compat.equal( tf_compat.mod( (global_step - begin_step), update_step_freq), 0), ) sched_update_ready = tf_compat.logical_or( tf_compat.logical_or(sched_start, sched_end), sched_update) percentage = tf_compat.minimum( 1.0, tf_compat.maximum( 0.0, tf_compat_div( tf_compat.cast(global_step - begin_step, tf_compat.float32), end_step - begin_step, ), ), ) exp_percentage = 1 - tf_compat.pow(1 - percentage, exponent) calc_sparsity = (tf_compat.multiply(final_sparsity - init_sparsity, exp_percentage) + init_sparsity) # create the update ready tensor and sparsity tensor with tf_compat.name_scope( PruningScope.general(ks_group, trailing_slash=True)): update_ready = tf_compat.logical_and( sched_active_inclusive, sched_update_ready, name=PruningScope.OP_UPDATE_READY, ) sparsity = tf_compat.case( [ (sched_before, lambda: tf_compat.constant(0.0)), (sched_start, lambda: tf_compat.constant(init_sparsity)), (sched_active, lambda: calc_sparsity), ], default=lambda: tf_compat.constant(final_sparsity), name=PruningScope.OP_SPARSITY, ) # add return state to collections tf_compat.add_to_collection( PruningScope.collection_name(ks_group, PruningScope.OP_UPDATE_READY), update_ready, ) tf_compat.add_to_collection( PruningScope.collection_name(ks_group, PruningScope.OP_SPARSITY), sparsity) return update_ready, sparsity
def step_lr_schedule( global_step: tf_compat.Tensor, start_step: int, end_step: int, step_size: int, init_lr: float, gamma: float, name: str = "exponential_lr_schedule", ) -> tf_compat.Tensor: """ Create an exponential learning rate schedule in the current graph. Multiplies init_lr by gamma after each step_size interval has passed. Ex: lr = init_lr * (gamma ** NUM_UPDATES) :param global_step: the global step used for training :param start_step: the step to start the exponential schedule on :param end_step: the step to end the exponential schedule on, can be set to -1 and in that event will continually update the LR :param step_size: the number of steps between each gamma update to the init_lr :param init_lr: the learning rate to start the schedule with :param gamma: the decay weight to decrease init_lr by after every step_size interval :param name: the name scope to create the graph under :return: the calculated learning rate tensor """ with tf_compat.name_scope(name): global_step = tf_compat.cast(global_step, tf_compat.int64) max_updates = tf_compat.constant( (end_step - start_step) // step_size if end_step > 0 else -1, dtype=tf_compat.int64, name="max_updates", ) start_step = tf_compat.constant(start_step, dtype=tf_compat.int64, name="start_step") end_step = tf_compat.constant(end_step, dtype=tf_compat.int64, name="end_step") init_lr = tf_compat.constant(init_lr, dtype=tf_compat.float32, name="init_lr") step_size = tf_compat.constant(step_size, dtype=tf_compat.int64, name="step_size") gamma = tf_compat.constant(gamma, dtype=tf_compat.float32, name="gamma") before = tf_compat.less(global_step, start_step, name="before") after = tf_compat.logical_and( tf_compat.greater_equal(global_step, end_step, name="after"), tf_compat.not_equal(end_step, tf_compat.constant(-1, tf_compat.int64)), ) def _calc_lr(): steps = tf_compat.subtract(global_step, start_step) updates = tf_compat.cond( after, lambda: max_updates, lambda: tf_compat.cast( tf_compat.floor(tf_compat.divide(steps, step_size)), tf_compat.int64, ), ) mult_g = tf_compat.pow(gamma, tf_compat.cast(updates, tf_compat.float32)) return tf_compat.multiply(init_lr, mult_g) learning_rate = tf_compat.cond(before, lambda: init_lr, _calc_lr, name="learning_rate") return learning_rate