def old_to_new_architecture(old_architecture): """Convert architectures defined by block_types only. These architectures are more restricted -- they always have one input layer and one logits layer, and all blocks are only connected to the previous one or two blocks. Args: old_architecture: List of block_type ints. Returns: Architecture. """ node_list = [] for block_type in old_architecture: block_type_name = blocks_builder.BlockType(block_type).name if ("NASNET" in block_type_name) or ("AMOEBA" in block_type_name): input_indices = [-2, -1] else: input_indices = [-1] node_list.append( Node( block_type, input_indices=input_indices, combiner_type=CombinerType.CONCAT)) return Architecture(node_list)
def get_suggestion(self, trials, hparams, my_trial_id=None, model_dir=None): """See base class SearchAlgorithm.""" architectures, losses = self._load_trials(trials) # No feasible trials yet. if len(architectures ) < self._phoenix_spec.linear_model.trials_before_fit: return common.encode_architecture( hparams.initial_architecture, self._phoenix_spec.problem_type), None _, suggestion = self._suggest_by_padding(architectures, losses) # Decide whether to allow growth. # TODO(b/172564129): refactor common behavior with other search algorithms. allowed_depth = common.get_allowed_depth( len(architectures), depth_thresholds=self._phoenix_spec. increase_complexity_minimum_trials, max_depth=self._phoenix_spec.maximum_depth) explore_mode = common.random( self._phoenix_spec.increase_complexity_probability) new_block = blocks.BlockType[hparams.new_block_type] if suggestion.size <= allowed_depth and explore_mode: # increase_structure_depth expects that the architecture contains a # flatten block, which may not be true for the linear model's output. suggestion = np.array( architecture_utils.fix_architecture_order( suggestion, self._phoenix_spec.problem_type)) suggestion = architecture_utils.increase_structure_depth( suggestion, new_block, self._phoenix_spec.problem_type) elif _contains_row(architectures, suggestion): # The linear model suggested an architecture we've already tried # in a previous trial, so we mutate it. # TODO(b/172564129): more intelligent _contains_row check: should handle # when mutate_replace output has been tried, but not just a while loop, # since that could run forver if # of untried architectures is small. suggestion = common.mutate_replace(suggestion, new_block) else: # The linear model suggested a novel architecture; use it. pass suggestion = [blocks.BlockType(b) for b in suggestion] return np.array( architecture_utils.fix_architecture_order( suggestion, self._phoenix_spec.problem_type)), None
def fix_architecture_order(architecture, problem_type): """Fixes the architecture order of cnns. This function fixes the architecture for convolutional neural networks. Namely, if a dense block is before a convolutional block, then it switches the order. For the dnn and rnn case, the function doesn't do anything for the architecture as all architectures are valid. Args: architecture: an iterable of integers or `blocks.BlockType`. problem_type: a `PhoenixSpec.ProblemType` enum. Returns: a list of `blocks.BlockType`. """ # All achitectures are valid in the dnn and rnn case. if problem_type != phoenix_spec_pb2.PhoenixSpec.CNN: return architecture output_architecture = [] flattens = tuple(block for block in architecture if "FLATTEN" in blocks.BlockType(block).name) if not flattens: output_architecture = [blocks.BlockType.PLATE_REDUCTION_FLATTEN] logging.warning("initial_architecture does not have a flattening " "block.") logging.info("Adding a Flatten block to the architecture.") else: output_architecture = [flattens[0]] for block in architecture: if (block == blocks.BlockType.FLATTEN or block == blocks.BlockType.DOWNSAMPLE_FLATTEN or block == blocks.BlockType.PLATE_REDUCTION_FLATTEN): continue output_architecture = increase_structure_depth( np.array(output_architecture), block, problem_type) output_architecture = [i.item() for i in output_architecture] return [blocks.BlockType(i) for i in output_architecture]
def get_suggestion(self, trials, hparams, my_trial_id=None, model_dir=None): """See the base class for details.""" if self._phoenix_spec.beam_size < 1: raise ValueError("phoenix_spec.beam_size must be >= 1.") sorted_trials = self._metadata.get_best_k( trials, k=int(1e10), valid_only=True) or [] num_completed_trials = len(sorted_trials) best_trials = sorted_trials[:self._phoenix_spec.beam_size] # No feasible trials yet. if not best_trials: return common.encode_architecture( hparams.initial_architecture, self._phoenix_spec.problem_type), None # Increase depth if possible. best_architecture, best_trial = ( common.choose_random_trial_and_get_architecture(best_trials)) allowed_depth = common.get_allowed_depth( num_completed_trials, depth_thresholds=self._phoenix_spec. increase_complexity_minimum_trials, max_depth=self._max_depth) logging.info("Maximal depth allowed: %d", allowed_depth) explore_mode = common.random( self._phoenix_spec.increase_complexity_probability) new_block = blocks.BlockType[hparams.new_block_type] if best_architecture.size < allowed_depth and explore_mode: common.write_fork_edge(model_dir, my_trial_id, best_trial) return architecture_utils.increase_structure_depth( best_architecture, new_block, self._phoenix_spec.problem_type), best_trial # Otherwise enter evolutionary mode. logging.info("using evolution") output_architecture = common.mutate_replace(best_architecture, new_block) output_architecture = [ blocks.BlockType(x) for x in output_architecture ] common.write_fork_edge(model_dir, my_trial_id, best_trial) return np.array( architecture_utils.fix_architecture_order( output_architecture, self._phoenix_spec.problem_type)), best_trial
def _get_suggestion(architectures, blocks_to_use, losses, grow=False, remove_outliers=False, pass_flatten=False): """Testing subroutine to handle boilerplate Trial construction, dirs, etc.""" # TODO(b/172564129): Figure out how to use mock decorator for free functions. with mock.patch("model_search.architecture" ".architecture_utils.get_architecture") as mock_get_arch: blocks_strs = [blocks.BlockType(b).name for b in blocks_to_use] spec = search_test_utils.create_spec( phoenix_spec_pb2.PhoenixSpec.CNN, blocks_to_use=blocks_strs, ) spec.search_type = phoenix_spec_pb2.PhoenixSpec.LINEAR_MODEL spec.increase_complexity_probability = 1.0 if grow else 0.0 spec.linear_model.remove_outliers = remove_outliers spec.linear_model.trials_before_fit = 1 algorithm = linear_model.LinearModel(spec) mock_get_arch.side_effect = lambda idx: architectures[int(idx)] trials = [] for i, loss in enumerate(losses): if isinstance(loss, (np.floating, np.integer)): loss = loss.item() trials.append( trial_module.Trial({ "id": i, "model_dir": str(i), "status": "COMPLETED", "trial_infeasible": False, "final_measurement": { "objective_value": loss } })) hparams = hp.HParams(new_block_type=NEW_BLOCK) # Second return val fork_trial is a nonsense concept for LinearModel. output_architecture, _ = algorithm.get_suggestion(trials, hparams) if not pass_flatten: output_architecture = np.array( [b for b in output_architecture if b not in blocks.FLATTEN_TYPES]) return output_architecture
def test_flatten_output(self, grow): """Ensure we output suggestions with a flatten block correctly placed.""" # Make trials s.t. the linear model will output all convolutions. architectures = [ np.repeat(blocks.BlockType.EMPTY_BLOCK, 4), np.repeat(blocks.BlockType.CONVOLUTION_3X3, 4) ] losses = [0.1, 0.01] blocks_to_use = [blocks.BlockType.CONVOLUTION_3X3] # Make sure the model suggestion includes a flatten block, # despite raw model output being all convolutional. best = _get_suggestion( architectures, blocks_to_use, losses, grow=grow, pass_flatten=True) flattens = [b for b in best if "FLATTEN" in blocks.BlockType(b).name] nflat = len(flattens) self.assertGreater(nflat, 0)
def mutate_replace(architecture, new_block): """Replaces one random block with the chosen new block. Returns a copy; input is not modified. The element to replace is chosen uniformly at random. Special care is taken not to replace the FLATTEN block. Args: architecture: An np.ndarray of integers corresponding to BlockType enum. new_block: Integer value of the desired BlockType to insert. Returns: An np.array of the architecture containing the new block. """ output_architecture = architecture.copy() while True: block_to_replace = np.random.randint(0, architecture.size) blocktype = blocks.BlockType(output_architecture[block_to_replace]) if blocktype not in blocks.FLATTEN_TYPES: break output_architecture[block_to_replace] = new_block return output_architecture
def get_suggestion(self, trials, hparams, my_trial_id=None, model_dir=None): """See the base class for details.""" del my_trial_id # Unused. new_block = blocks.BlockType[hparams.new_block_type] if self._is_reduction_block(new_block): raise ValueError( "ConstrainedDescent should not have reduction blocks in " "its search space.") if self._phoenix_spec.beam_size < 1: raise ValueError("phoenix_spec.beam_size must be >= 1.") sorted_trials = self._metadata.get_best_k( trials, k=int(1e10), valid_only=True) or [] num_completed_trials = len(sorted_trials) best_trials = sorted_trials[:self._phoenix_spec.beam_size] # No feasible trials yet, use initial architecture passed in from hparams. if not best_trials: best_architecture = common.encode_architecture( hparams.initial_architecture, self._phoenix_spec.problem_type) best_trial = None else: best_architecture, best_trial = ( common.choose_random_trial_and_get_architecture(best_trials)) # Get the architecture without reductions or replications which will be # grown or mutated. if self._phoenix_spec.replicate_cell: output_architecture = best_architecture[:self._phoenix_spec. num_blocks_in_cell] grow_mode = False else: output_architecture = self._remove_reduction_blocks( best_architecture) grow_mode = common.random( self._phoenix_spec.increase_complexity_probability) # Grow, mutate, and/or replicate architecture then add reductions & flatten. allowed_depth = self._get_allowed_depth(num_completed_trials) logging.info("Maximum depth allowed: %d", allowed_depth) if output_architecture.size < allowed_depth and grow_mode: logging.info("Growing the architecture.") output_architecture = architecture_utils.increase_structure_depth( output_architecture, new_block, self._phoenix_spec.problem_type) else: logging.info("Mutating the architecture.") output_architecture = common.mutate_replace( output_architecture, new_block) if self._phoenix_spec.replicate_cell: replication_times = allowed_depth // self._phoenix_spec.num_blocks_in_cell output_architecture = np.concatenate( [output_architecture for _ in range(replication_times)]) output_architecture = self._add_reduction_blocks( output_architecture, self._phoenix_spec.num_blocks_in_cell, self._phoenix_spec.reduction_block_type) output_architecture = [ blocks.BlockType(x) for x in output_architecture ] output_architecture = np.array( architecture_utils.fix_architecture_order( output_architecture, self._phoenix_spec.problem_type)) return output_architecture, best_trial
def _is_reduction_block(self, block): name = blocks.BlockType(block).name return "REDUCTION" in name or "DOWNSAMPLE" in name or "POOL" in name
def create_tower_spec(phoenix_spec, inputs, architecture, dimension, is_frozen, lengths=None, allow_auxiliary_head=False): """Creates the logits for the tower. Args: phoenix_spec: The trial's `phoenix_spec_pb2.PhoenixSpec` proto. inputs: The list of `tf.Tensors` of the tower. architecture: The list of `blocks.BlockType` of the tower architecture. dimension: int - the output tensor last axis dimension. is_frozen: Whether the tower should be frozen. lengths: A tensor of shape [batch] holding the sequence length for a sequential problem (rnn). allow_auxiliary_head: Whether to allow creating an auxiliary head if possible. Only applicable for CNNs. Returns: A LogitsSpec containing the main and auxiliary logits and the architecture of the underlying tower. """ # Discard inputs[0] since this is the raw features. all_layer_tensors = inputs pre_logits = inputs[-1] logits_weight = 1.0 aux_logits = None aux_logits_weight = None if (phoenix_spec.problem_type == phoenix_spec_pb2.PhoenixSpec.RNN_ALL_ACTIVATIONS): logits = tf.compat.v1.layers.conv1d(inputs=pre_logits, filters=dimension, kernel_size=1) elif (phoenix_spec.problem_type == phoenix_spec_pb2.PhoenixSpec.RNN_LAST_ACTIVATIONS): if lengths is not None: logits = utils.last_activations_in_sequence( tf.compat.v1.layers.conv1d(inputs=pre_logits, filters=dimension, kernel_size=1), lengths) else: logging.warning("Length is missing for rnn_last problem type.") logits = tf.compat.v1.layers.conv1d(inputs=pre_logits, filters=dimension, kernel_size=1) elif phoenix_spec.problem_type == phoenix_spec_pb2.PhoenixSpec.CNN: logits = tf.keras.layers.Dense(dimension, name="dense")(pre_logits) if allow_auxiliary_head and phoenix_spec.use_auxiliary_head: reductions = [] flattens = [] for i, block in enumerate(architecture): name = blocks.BlockType(block).name if "DOWNSAMPLE" in name or "REDUCTION" in name: reductions.append(i) # Some blocks reduce and flatten. if "FLATTEN" in name: flattens.append(i) if reductions: # Add the auxiliary head right before the reduction cell. idx = reductions[-1] aux_logits = _build_nas_aux_head(inputs[idx], dimension, phoenix_spec.cnn_data_format) if aux_logits is not None: aux_logits_weight = phoenix_spec.auxiliary_head_loss_weight if flattens and aux_logits is None: idx = flattens[-1] aux_logits = tf.keras.layers.Dense( dimension, name="aux_dense")(inputs[idx]) aux_logits_weight = phoenix_spec.auxiliary_head_loss_weight elif phoenix_spec.problem_type == phoenix_spec_pb2.PhoenixSpec.DNN: logits = tf.keras.layers.Dense(dimension, name="dense")(pre_logits) else: raise ValueError("phoenix_spec.problem_type must be either DNN, CNN, " "RNN_LAST_ACTIVATIONS, or RNN_ALL_ACTIVATIONS.") logits = tf.identity(logits, name="logits") if aux_logits is not None: aux_logits = tf.identity(aux_logits, name="aux_logits") # TODO(b/172564129): Remove from eval graph. if is_frozen: logits = tf.stop_gradient(logits) if aux_logits is not None: aux_logits = tf.stop_gradient(aux_logits) return TowerSpec( logits_spec=LogitsSpec(logits, logits_weight, aux_logits, aux_logits_weight), architecture=[blocks.BlockType(block).name for block in architecture], layer_tensors=all_layer_tensors)
def construct_tower(phoenix_spec, input_tensor, tower_name, architecture, is_training, lengths, logits_dimension, is_frozen, dropout_rate=None, allow_auxiliary_head=False): """Creates a tower giving an architecture. Args: phoenix_spec: The trial's `phoenix_spec_pb2.PhoenixSpec` proto. input_tensor: An input `tf.Tensor` to build the network on top of. tower_name: a unique name for the tower (string). architecture: np.array of ints (`blocks.BlockType`) with the architecture of the neural network to build. is_training: a boolean indicating if we are in training. lengths: A `tf.Tensor` with the lengths (dimenions: [batch_size]) holding the length of each sequence for sequential problems. Keep as None, for non sequential problems. logits_dimension: The last axis dimension of the logits. is_frozen: Is the tower frozen - integer and not boolean. dropout_rate: a float indicating the rate of dropouts to apply between blocks. Applied only if the value is above zero. allow_auxiliary_head: Whether to allow importing the tower's auxiliary head, if the tower has one. Only applicable for CNNs. Returns: The output `tf.Tensor` of the last layer in the built neural network. """ blocks_builders = blocks.Blocks() output = [input_tensor] block_index = 1 str_signature = "" with tf.compat.v1.variable_scope("Phoenix/{}".format(tower_name)): for block_type in architecture: str_signature += str(block_type) # TODO(b/172564129): Should block_index also be ignored when uniform # average transfer learning? How would we handle repeated blocks, e.g. two # FC layers stacked on top of each other. scope = "{0}_{1}_{2}".format(str(block_index), blocks.BlockType(block_type).name, str_signature) scope = strip_scope( scope, phoenix_spec.transfer_learning_spec.transfer_learning_type, str_signature) with tf.compat.v1.variable_scope(scope): with (arg_scope(DATA_FORMAT_OPS, data_format=phoenix_spec.cnn_data_format)): output = blocks_builders[block_type].build( input_tensors=output, is_training=is_training, lengths=lengths) if dropout_rate and dropout_rate > 0: output[-1] = tf.compat.v1.layers.dropout( output[-1], rate=dropout_rate, training=is_training) block_index += 1 # Create the logits. scope = "last_dense_{}".format(str_signature) scope = strip_scope( scope, phoenix_spec.transfer_learning_spec.transfer_learning_type, str_signature) with tf.compat.v1.variable_scope(scope): tower_spec = create_tower_spec(phoenix_spec, output, architecture, logits_dimension, is_frozen, lengths, allow_auxiliary_head) set_architecture(architecture, tower_name) set_parameter(tower_name, DROPOUTS, (-1.0 if dropout_rate is None else dropout_rate), tf.float32) set_parameter(tower_name, IS_FROZEN, int(is_frozen)) return tower_spec
def block_name(self): return str(blocks_builder.BlockType(self.block_type).name)