def build_model_base(images, model_name, training, override_params=None): """A helper functiion to create a base model and return global_pool. Args: images: input images tensor. model_name: string, the model name of a pre-defined MnasNet. training: boolean, whether the model is constructed for training. override_params: A dictionary of params for overriding. Fields must exist in mnasnet_model.GlobalParams. Returns: features: global pool features. endpoints: the endpoints for each layer. Raises: When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. """ assert isinstance(images, tf.Tensor) blocks_args, global_params = get_model_params(model_name, override_params) with tf.variable_scope(model_name): model = efficientnet_model.Model(blocks_args, global_params) features = model(images, training=training, features_only=True) features = tf.identity(features, 'global_pool') return features, model.endpoints
def build_model_base(images, model_name, training, override_params=None): """Create a base feature network and return the features before pooling. Args: images: input images tensor. model_name: string, the predefined model name. training: boolean, whether the model is constructed for training. override_params: A dictionary of params for overriding. Fields must exist in efficientnet_model.GlobalParams. Returns: features: base features before pooling. endpoints: the endpoints for each layer. Raises: When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. """ assert isinstance(images, tf.Tensor) # For backward compatibility. if override_params and override_params.get('drop_connect_rate', None): override_params[ 'survival_prob'] = 1 - override_params['drop_connect_rate'] blocks_args, global_params = get_model_params(model_name, override_params) with tf.variable_scope(model_name): model = efficientnet_model.Model(blocks_args, global_params) features = model(images, training=training, features_only=True) features = tf.identity(features, 'features') return features, model.endpoints
def build_model(images, model_name, training, override_params=None, model_dir=None, fine_tuning=False): """A helper functiion to creates a model and returns predicted logits. Args: images: input images tensor. model_name: string, the predefined model name. training: boolean, whether the model is constructed for training. override_params: A dictionary of params for overriding. Fields must exist in efficientnet_model.GlobalParams. model_dir: string, optional model dir for saving configs. fine_tuning: boolean, whether the model is used for finetuning. Returns: logits: the logits tensor of classes. endpoints: the endpoints for each layer. Raises: When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. """ assert isinstance(images, tf.Tensor) if not training or fine_tuning: if not override_params: override_params = {} override_params['batch_norm'] = utils.BatchNormalization blocks_args, global_params = get_model_params(model_name, override_params) if not training or fine_tuning: global_params = global_params._replace( batch_norm=utils.BatchNormalization) if model_dir: param_file = os.path.join(model_dir, 'model_params.txt') if not tf.gfile.Exists(param_file): if not tf.gfile.Exists(model_dir): tf.gfile.MakeDirs(model_dir) with tf.gfile.GFile(param_file, 'w') as f: logging.info('writing to %s', param_file) f.write('model_name= %s\n\n' % model_name) f.write('global_params= %s\n\n' % str(global_params)) f.write('blocks_args= %s\n\n' % str(blocks_args)) print("===============================================") with tf.variable_scope(model_name): model = efficientnet_model.Model(blocks_args, global_params) logits = model(images, training=training) logits = tf.identity(logits, 'logits') # return logits, model.endpoints return logits, model
def build_model(images, model_name, training, override_params=None, model_dir=None, use_adv_bn=False, is_teacher=False): '''A helper functiion to creates a model and returns predicted logits. Args: images: input images tensor. model_name: string, the predefined model name. training: boolean, whether the model is constructed for training. override_params: A dictionary of params for overriding. Fields must exist in efficientnet_model.GlobalParams. model_dir: string, optional model dir for saving configs. Returns: logits: the logits tensor of classes. endpoints: the endpoints for each layer. Raises: When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. ''' assert isinstance(images, tf.Tensor) blocks_args, global_params = get_model_params(model_name, override_params) if model_dir: param_file = os.path.join(model_dir, 'model_params.txt') if not tf.gfile.Exists(param_file): if not tf.gfile.Exists(model_dir): tf.gfile.MakeDirs(model_dir) with tf.gfile.GFile(param_file, 'w') as f: tf.logging.info('writing to %s' % param_file) f.write('model_name= %s\n\n' % model_name) f.write('global_params= %s\n\n' % str(global_params)) f.write('blocks_args= %s\n\n' % str(blocks_args)) model = efficientnet_model.Model(blocks_args, global_params, use_adv_bn, is_teacher) logits = model(images, training=training) logits = tf.identity(logits, 'logits') return logits, model.endpoints
def build_model(images, model_name, training, override_params=None): """A helper functiion to creates a model and returns predicted logits. Args: images: input images tensor. model_name: string, the predefined model name. training: boolean, whether the model is constructed for training. override_params: A dictionary of params for overriding. Fields must exist in efficientnet_model.GlobalParams. Returns: logits: the logits tensor of classes. endpoints: the endpoints for each layer. Raises: When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. """ assert isinstance(images, tf.Tensor) if model_name.startswith('efficientnet'): width_coefficient, depth_coefficient, _ = efficientnet_params( model_name) blocks_args, global_params = efficientnet(width_coefficient, depth_coefficient) else: raise NotImplementedError('model name is not pre-defined: %s' % model_name) if override_params: # ValueError will be raised here if override_params has fields not included # in global_params. global_params = global_params._replace(**override_params) tf.logging.info('global_params= %s', global_params) tf.logging.info('blocks_args= %s', blocks_args) with tf.variable_scope(model_name): model = efficientnet_model.Model(blocks_args, global_params) logits = model(images, training=training) logits = tf.identity(logits, 'logits') return logits, model.endpoints
def create_model(model_name, input_shape=None): """Creates and reads operations from the given model. Args: model_name: str, one of the DEFAULT_INPUT_SIZES.keys() input_shape: str or None, if None will be read from the dictionary. Returns: list, of operations. """ if input_shape is None: input_size = DEFAULT_INPUT_SIZES[model_name] input_shape = (1, input_size, input_size, 3) blocks_args, global_params = efficientnet_builder.get_model_params( model_name, None) print('global_params= %s' % str(global_params)) print('blocks_args= %s' % str('\n'.join(map(str, blocks_args)))) tf.reset_default_graph() with tf.variable_scope(model_name): model = efficientnet_model.Model(blocks_args, global_params) # This will initialize the variables. _ = model(tf.ones((input_shape))) return model, input_shape
def build_model(images, model_name, training, override_params=None, model_dir=None, fine_tuning=False, features_only=False, pooled_features_only=False): """A helper function to create a model and return predicted logits. Args: images: input images tensor. model_name: string, the predefined model name. training: boolean, whether the model is constructed for training. override_params: A dictionary of params for overriding. Fields must exist in efficientnet_model.GlobalParams. model_dir: string, optional model dir for saving configs. fine_tuning: boolean, whether the model is used for finetuning. features_only: build the base feature network only (excluding final 1x1 conv layer, global pooling, dropout and fc head). pooled_features_only: build the base network for features extraction (after 1x1 conv layer and global pooling, but before dropout and fc head). Returns: logits: the logits tensor of classes. endpoints: the endpoints for each layer. Raises: When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. """ assert isinstance(images, tf.Tensor) assert not (features_only and pooled_features_only) # For backward compatibility. if override_params and override_params.get('drop_connect_rate', None): override_params[ 'survival_prob'] = 1 - override_params['drop_connect_rate'] if not training or fine_tuning: if not override_params: override_params = {} override_params['batch_norm'] = utils.BatchNormalization blocks_args, global_params = get_model_params(model_name, override_params) if model_dir: param_file = os.path.join(model_dir, 'model_params.txt') if not tf.gfile.Exists(param_file): if not tf.gfile.Exists(model_dir): tf.gfile.MakeDirs(model_dir) with tf.gfile.GFile(param_file, 'w') as f: logging.info('writing to %s', param_file) f.write('model_name= %s\n\n' % model_name) f.write('global_params= %s\n\n' % str(global_params)) f.write('blocks_args= %s\n\n' % str(blocks_args)) with tf.variable_scope(model_name): model = efficientnet_model.Model(blocks_args, global_params) outputs = model(images, training=training, features_only=features_only, pooled_features_only=pooled_features_only) if features_only: outputs = tf.identity(outputs, 'features') elif pooled_features_only: outputs = tf.identity(outputs, 'pooled_features') else: outputs = tf.identity(outputs, 'logits') return outputs, model.endpoints
def main(argv): del argv # unused arg tf.io.gfile.makedirs(FLAGS.output_dir) logging.info('Saving checkpoints at %s', FLAGS.output_dir) tf.random.set_seed(FLAGS.seed) batch_size = FLAGS.per_core_batch_size * FLAGS.num_cores steps_per_epoch = APPROX_IMAGENET_TRAIN_IMAGES // batch_size steps_per_eval = IMAGENET_VALIDATION_IMAGES // batch_size logging.info('Saving checkpoints at %s', FLAGS.output_dir) if FLAGS.use_gpu: logging.info('Use GPU') strategy = tf.distribute.MirroredStrategy() else: logging.info('Use TPU at %s', FLAGS.tpu if FLAGS.tpu is not None else 'local') resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu) tf.config.experimental_connect_to_cluster(resolver) tf.tpu.experimental.initialize_tpu_system(resolver) strategy = tf.distribute.experimental.TPUStrategy(resolver) width_coefficient, depth_coefficient, input_image_size, dropout_rate = ( efficientnet_model.efficientnet_params(FLAGS.model_name)) imagenet_train = utils.ImageNetInput( is_training=True, use_bfloat16=FLAGS.use_bfloat16, data_dir=FLAGS.data_dir, batch_size=FLAGS.per_core_batch_size, image_size=input_image_size, normalize_input=True, one_hot=True) imagenet_eval = utils.ImageNetInput( is_training=False, use_bfloat16=FLAGS.use_bfloat16, data_dir=FLAGS.data_dir, batch_size=batch_size, image_size=input_image_size, normalize_input=True, one_hot=True) train_dataset = strategy.experimental_distribute_datasets_from_function( imagenet_train.input_fn) test_datasets = { 'clean': strategy.experimental_distribute_dataset(imagenet_eval.input_fn()), } train_iterator = iter(train_dataset) test_iterator = iter(test_datasets['clean']) if FLAGS.use_bfloat16: policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16') tf.keras.mixed_precision.experimental.set_policy(policy) summary_writer = tf.summary.create_file_writer( os.path.join(FLAGS.output_dir, 'summaries')) with strategy.scope(): logging.info('Building %s model', FLAGS.model_name) model = efficientnet_model.Model(width_coefficient, depth_coefficient, dropout_rate) scaled_lr = FLAGS.base_learning_rate * (batch_size / 256.0) # Decay epoch is 2.4, warmup epoch is 5 according to the Efficientnet paper. decay_steps = steps_per_epoch * 2.4 warmup_step = steps_per_epoch * 5 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( scaled_lr, decay_steps, decay_rate=0.97, staircase=True) learning_rate = utils.WarmupDecaySchedule(lr_schedule, warmup_step) optimizer = tf.keras.optimizers.RMSprop( learning_rate, rho=0.9, momentum=0.9, epsilon=0.001) if FLAGS.moving_average_decay > 0: optimizer = utils.MovingAverage( optimizer, average_decay=FLAGS.moving_average_decay) optimizer.shadow_copy(model) metrics = { 'train/negative_log_likelihood': tf.keras.metrics.Mean(), 'train/accuracy': tf.keras.metrics.CategoricalAccuracy(), 'train/ece': ed.metrics.ExpectedCalibrationError( num_bins=FLAGS.num_bins), 'train/loss': tf.keras.metrics.Mean(), 'test/negative_log_likelihood': tf.keras.metrics.Mean(), 'test/accuracy': tf.keras.metrics.CategoricalAccuracy(), 'test/ece': ed.metrics.ExpectedCalibrationError( num_bins=FLAGS.num_bins), } logging.info('Finished building %s model', FLAGS.model_name) checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) latest_checkpoint = tf.train.latest_checkpoint(FLAGS.output_dir) initial_epoch = 0 if latest_checkpoint: # checkpoint.restore must be within a strategy.scope() so that optimizer # slot variables are mirrored. checkpoint.restore(latest_checkpoint) logging.info('Loaded checkpoint %s', latest_checkpoint) initial_epoch = optimizer.iterations.numpy() // steps_per_epoch def train_step(inputs): """Build `step_fn` for efficientnet learning.""" images, labels = inputs num_replicas = tf.cast(strategy.num_replicas_in_sync, tf.float32) l2_coeff = tf.cast(FLAGS.l2, tf.float32) with tf.GradientTape() as tape: logits = model(images, training=True) logits = tf.cast(logits, tf.float32) negative_log_likelihood = tf.reduce_mean( tf.keras.losses.categorical_crossentropy( labels, logits, from_logits=True, label_smoothing=FLAGS.label_smoothing)) def _is_batch_norm(v): """Decide whether a variable belongs to `batch_norm`.""" keywords = ['batchnorm', 'batch_norm', 'bn'] return any([k in v.name.lower() for k in keywords]) l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in model.trainable_weights if not _is_batch_norm(v)]) loss = negative_log_likelihood + l2_coeff * l2_loss scaled_loss = loss / num_replicas gradients = tape.gradient(scaled_loss, model.trainable_weights) # MovingAverage optimizer automatically updates avg when applying gradients. optimizer.apply_gradients(zip(gradients, model.trainable_weights)) sparse_labels = tf.cast( tf.math.argmax(labels, axis=-1, output_type=tf.int32), tf.float32) probs = tf.nn.softmax(logits) metrics['train/loss'].update_state(loss) metrics['train/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['train/accuracy'].update_state(labels, logits) metrics['train/ece'].update_state(sparse_labels, probs) step_info = { 'loss/negative_log_likelihood': negative_log_likelihood / num_replicas, 'loss/total_loss': scaled_loss, } return step_info def eval_step(inputs): """A single step.""" images, labels = inputs logits = model(images, training=False) logits = tf.cast(logits, tf.float32) negative_log_likelihood = tf.reduce_mean( tf.keras.losses.categorical_crossentropy( labels, logits, from_logits=True)) sparse_labels = tf.cast( tf.math.argmax(labels, axis=-1, output_type=tf.int32), tf.float32) probs = tf.nn.softmax(logits) metrics['test/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['test/accuracy'].update_state(labels, logits) metrics['test/ece'].update_state(sparse_labels, probs) @tf.function def epoch_fn(should_eval): """Build `epoch_fn` for training and potential eval.""" for _ in tf.range(tf.cast(steps_per_epoch, tf.int32)): info = strategy.run(train_step, args=(next(train_iterator),)) optim_step = optimizer.iterations if optim_step % tf.cast(100, optim_step.dtype) == 0: for k, v in info.items(): v_reduce = strategy.reduce(tf.distribute.ReduceOp.SUM, v, None) tf.summary.scalar(k, v_reduce, optim_step) tf.summary.scalar('loss/lr', learning_rate(optim_step), optim_step) summary_writer.flush() if should_eval: if isinstance(optimizer, utils.MovingAverage): optimizer.swap_weights(strategy) for _ in tf.range(tf.cast(steps_per_eval, tf.int32)): strategy.run(eval_step, args=(next(test_iterator),)) if isinstance(optimizer, utils.MovingAverage): optimizer.swap_weights(strategy) # Main training loop. start_time = time.time() with summary_writer.as_default(): for epoch in range(initial_epoch, FLAGS.train_epochs): logging.info('Starting to run epoch: %s', epoch) should_eval = (epoch % FLAGS.evaluation_interval == 0) epoch_start_time = time.time() # Pass tf constant to avoid re-tracing. epoch_fn(tf.constant(should_eval)) epoch_time = time.time() - epoch_start_time example_per_secs = (steps_per_epoch * batch_size) / epoch_time if not should_eval: tf.summary.scalar( 'examples_per_secs', example_per_secs, optimizer.iterations) summary_writer.flush() current_step = (epoch + 1) * steps_per_epoch max_steps = steps_per_epoch * FLAGS.train_epochs time_elapsed = time.time() - start_time steps_per_sec = float(current_step) / time_elapsed eta_seconds = (max_steps - current_step) / steps_per_sec message = ('{:.1%} completion: epoch {:d}/{:d}. {:.1f} steps/s. ' 'ETA: {:.0f} min. Time elapsed: {:.0f} min'.format( current_step / max_steps, epoch + 1, FLAGS.train_epochs, steps_per_sec, eta_seconds / 60, time_elapsed / 60)) logging.info(message) logging.info('Train Loss: %.4f, Accuracy: %.2f%%', metrics['train/loss'].result(), metrics['train/accuracy'].result() * 100) if should_eval: logging.info('Test NLL: %.4f, Accuracy: %.2f%%', metrics['test/negative_log_likelihood'].result(), metrics['test/accuracy'].result() * 100) total_metrics = metrics.copy() total_results = {name: metric.result() for name, metric in total_metrics.items()} total_results.update({'lr': learning_rate(optimizer.iterations)}) with summary_writer.as_default(): for name, result in total_results.items(): if should_eval or 'test' not in name: tf.summary.scalar(name, result, step=epoch + 1) for metric in metrics.values(): metric.reset_states() if (FLAGS.checkpoint_interval > 0 and (epoch + 1) % FLAGS.checkpoint_interval == 0): checkpoint_name = checkpoint.save(os.path.join( FLAGS.output_dir, 'checkpoint')) logging.info('Saved checkpoint to %s', checkpoint_name) final_checkpoint_name = checkpoint.save( os.path.join(FLAGS.output_dir, 'checkpoint')) logging.info('Saved last checkpoint to %s', final_checkpoint_name)