def set_env(use_amp, use_fast_math=False): os.environ['CUDA_CACHE_DISABLE'] = '0' os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private' os.environ['TF_GPU_THREAD_COUNT'] = '1' if hvd is None else str(hvd.size()) os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1' os.environ['TF_ADJUST_HUE_FUSED'] = '1' os.environ['TF_ADJUST_SATURATION_FUSED'] = '1' os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' os.environ['TF_SYNC_ON_FINISH'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '2' os.environ['TF_DISABLE_NVTX_RANGES'] = '1' if use_amp: hvd_info_rank0("AMP is activated") os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1' os.environ['TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE'] = '1' os.environ['TF_ENABLE_AUTO_MIXED_PRECISION_LOSS_SCALING'] = '1' if use_fast_math: hvd_info_rank0("use_fast_math is activated") os.environ['TF_ENABLE_CUBLAS_TENSOR_OP_MATH_FP32'] = '1' os.environ['TF_ENABLE_CUDNN_TENSOR_OP_MATH_FP32'] = '1' os.environ['TF_ENABLE_CUDNN_RNN_TENSOR_OP_MATH_FP32'] = '1'
def get_session_config(use_xla): config = tf.ConfigProto() config.allow_soft_placement = True config.log_device_placement = False config.gpu_options.allow_growth = True if horovod_enabled(): config.gpu_options.visible_device_list = str(hvd.local_rank()) if use_xla: hvd_info_rank0("XLA is activated - Experimental Feature") config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 config.gpu_options.force_gpu_compatible = True # Force pinned memory config.intra_op_parallelism_threads = 1 # Avoid pool of Eigen threads if FLAGS.deterministic: config.inter_op_parallelism_threads = 1 elif horovod_enabled(): config.inter_op_parallelism_threads = max( 2, (multiprocessing.cpu_count() // hvd.size()) - 2) else: config.inter_op_parallelism_threads = 4 return config
def efficientdet(features, model_name=None, config=None, **kwargs): """Build EfficientDet model.""" if not config and not model_name: raise ValueError('please specify either model name or config') if not config: config = hparams_config.get_efficientdet_config(model_name) if kwargs: config.override(kwargs) hvd_info_rank0(config) # build backbone features. features = build_backbone(features, config) hvd_info_rank0('backbone params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build feature network. fpn_feats = build_feature_network(features, config) hvd_info_rank0('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) # build class and box predictions. class_outputs, box_outputs = build_class_and_box_outputs(fpn_feats, config) hvd_info_rank0('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format( *utils.num_params_flops())) return class_outputs, box_outputs
def build_bifpn_layer(feats, fpn_name, fpn_config, is_training, input_size, fpn_num_filters, min_level, max_level, separable_conv, apply_bn_for_resampling, conv_after_downsample, use_native_resize_op, conv_bn_relu_pattern, pooling_type): """Builds a feature pyramid given previous feature pyramid and config.""" config = fpn_config or get_fpn_config(fpn_name) num_output_connections = [0 for _ in feats] for i, fnode in enumerate(config.nodes): with tf.variable_scope('fnode{}'.format(i)): hvd_info_rank0(f'fnode {i} : {fnode}') new_node_width = int(fnode['width_ratio'] * input_size) nodes = [] for idx, input_offset in enumerate(fnode['inputs_offsets']): input_node = feats[input_offset] num_output_connections[input_offset] += 1 input_node = resample_feature_map( input_node, '{}_{}_{}'.format(idx, input_offset, len(feats)), new_node_width, fpn_num_filters, apply_bn_for_resampling, is_training, conv_after_downsample, use_native_resize_op, pooling_type) nodes.append(input_node) # Combine all nodes. dtype = nodes[0].dtype if config.weight_method == 'attn': edge_weights = [ tf.cast(tf.Variable(1.0, name='WSM'), dtype=dtype) for _ in range(len(fnode['inputs_offsets'])) ] normalized_weights = tf.nn.softmax(tf.stack(edge_weights)) nodes = tf.stack(nodes, axis=-1) new_node = tf.reduce_sum( tf.multiply(nodes, normalized_weights), -1) elif config.weight_method == 'fastattn': edge_weights = [ tf.nn.relu( tf.cast(tf.Variable(1.0, name='WSM'), dtype=dtype)) for _ in range(len(fnode['inputs_offsets'])) ] weights_sum = tf.add_n(edge_weights) nodes = [ nodes[i] * edge_weights[i] / (weights_sum + 0.0001) for i in range(len(nodes)) ] new_node = tf.add_n(nodes) elif config.weight_method == 'sum': new_node = tf.add_n(nodes) else: raise ValueError('unknown weight_method {}'.format( config.weight_method)) with tf.variable_scope('op_after_combine{}'.format(len(feats))): if not conv_bn_relu_pattern: new_node = utils.relu_fn(new_node) if separable_conv: conv_op = functools.partial(tf.layers.separable_conv2d, depth_multiplier=1) else: conv_op = tf.layers.conv2d new_node = conv_op( new_node, filters=fpn_num_filters, kernel_size=(3, 3), padding='same', use_bias=True if not conv_bn_relu_pattern else False, name='conv') new_node = utils.batch_norm_relu( new_node, is_training_bn=is_training, relu=False if not conv_bn_relu_pattern else True, data_format='channels_last', name='bn') feats.append(new_node) num_output_connections.append(0) output_feats = {} for l in range(min_level, max_level + 1): for i, fnode in enumerate(reversed(config.nodes)): if fnode['width_ratio'] == F(l): output_feats[l] = feats[-1 - i] break return output_feats
def build_feature_network(features, config): """Build FPN input features. Args: features: input tensor. config: a dict-like config, including all parameters. Returns: A dict from levels to the feature maps processed after feature network. """ feats = [] if config.min_level not in features.keys(): raise ValueError( 'features.keys ({}) should include min_level ({})'.format( features.keys(), config.min_level)) # Build additional input features that are not from backbone. for level in range(config.min_level, config.max_level + 1): if level in features.keys(): feats.append(features[level]) else: # Adds a coarser level by downsampling the last feature map. feats.append( resample_feature_map( feats[-1], name='p%d' % level, target_width=feats[-1].shape[1] // 2, target_num_channels=config.fpn_num_filters, apply_bn=config.apply_bn_for_resampling, is_training=config.is_training_bn, conv_after_downsample=config.conv_after_downsample, use_native_resize_op=config.use_native_resize_op, pooling_type=config.pooling_type)) _verify_feats_size(feats, input_size=config.image_size, min_level=config.min_level, max_level=config.max_level) with tf.variable_scope('fpn_cells'): for rep in range(config.fpn_cell_repeats): with tf.variable_scope('cell_{}'.format(rep)): hvd_info_rank0('building cell %d', rep) new_feats = build_bifpn_layer( feats=feats, fpn_name=config.fpn_name, fpn_config=config.fpn_config, input_size=config.image_size, fpn_num_filters=config.fpn_num_filters, min_level=config.min_level, max_level=config.max_level, separable_conv=config.separable_conv, is_training=config.is_training_bn, apply_bn_for_resampling=config.apply_bn_for_resampling, conv_after_downsample=config.conv_after_downsample, use_native_resize_op=config.use_native_resize_op, conv_bn_relu_pattern=config.conv_bn_relu_pattern, pooling_type=config.pooling_type) feats = [ new_feats[level] for level in range(config.min_level, config.max_level + 1) ] _verify_feats_size(feats, input_size=config.image_size, min_level=config.min_level, max_level=config.max_level) return new_feats