def profile_full_pass(self, device, num_warmup, num_iter, batch_size): """Profile full pass execution with tensorflow.""" options = profilers.ProfilerOptions() options.num_warmup = num_warmup options.num_iter = num_iter options.include_bias_and_activation = False from profilers.tensorflow_profiler import TensorFlowProfiler profiler = TensorFlowProfiler(options) if batch_size: for l in self.graph.topology_order: l.layer_op.batch_size = batch_size layers = [ layer_spec.layer_op for layer_spec in self.graph.topology_order ] return profiler.profile_full_pass(layers)
def _profile_for_batch_size(layer_list, direction, device, batch_size, use_only_gemm, ppp_comp, ppp_comm, cross_device_bandwidth=None): """Use flops profiler to estiamte execution with under the given spec.""" logger.debug('Profile for\n pass: %s\n device: %s\n batch size: %s' % (direction, device.name, batch_size)) times = [] params_in_bytes = 0 # Estimate forward time for each layer. for layer_spec in layer_list: layer = layer_spec.layer_op if batch_size: layer.batch_size = batch_size options = profilers.ProfilerOptions() options.direction = direction options.gradient_wrt = None if use_only_gemm: options.use_cudnn_heuristics = False # FIXME: we don't include bias and activation for simplicity. options.include_bias_and_activation = False options.ppp_comp = ppp_comp options.ppp_comm = ppp_comm flops_profiler = profilers.FlopsProfiler( options, device) # Why instantiate new profiler for every layer? layer_time = flops_profiler.profile( layer, layer_spec.device_id, [p.device_id for p in layer_spec.parents], cross_device_bandwidth) params_in_bytes += layer.weights_in_bytes times.append(layer_time) return times, params_in_bytes
def profile(netspec_files, device_name, num_warmup, num_iter, extract_conv_dir, direction, gradient_wrt, use_only_gemm, executor, ppp_comp, separator): """Profiling a neural network.""" def _print_tabular(cudnn_result, tensorflow_result): assert len(cudnn_result) == len(tensorflow_result) print( separator.join( ['layer', 'ours', 'cudnn', 'tensorflow', 'ours_alg', 'cu_alg'])) sum_ours, sum_cu, sum_tf = 0, 0, 0 for cudnn_prof, tf_prof in zip(cudnn_result, tensorflow_result): (layer_name, ours_time, cudnn_time, tf_time, our_msg, cu_msg) = ['', 0, 0, 0, '', ''] if cudnn_prof: layer_name, ours_time, cudnn_time, _, our_msg, cu_msg = ( cudnn_prof) if tf_prof: layer_name, ours_time, tf_time, _, our_msg, _ = tf_prof our_msg = our_msg.replace('CUDNN_CONVOLUTION_', '') cu_msg = cu_msg.replace('CUDNN_CONVOLUTION_', '') if layer_name == 'data': continue sum_ours += ours_time sum_cu += cudnn_time sum_tf += tf_time print( separator.join([ str(x) for x in (layer_name, ours_time, cudnn_time, tf_time, our_msg, cu_msg) ])) print(separator.join(['Sum', str(sum_ours), str(sum_cu), str(sum_tf)])) all_results = dict() for netspec_file in netspec_files: profiler = Profiler(netspec_file, separator=separator) if extract_conv_dir: profiler.save_conv_layers(extract_conv_dir) if profile: options = profilers.ProfilerOptions() options.direction = direction options.gradient_wrt = gradient_wrt options.num_iter = num_iter options.num_warmup = num_warmup options.ppp_comp = ppp_comp tensorflow_result, cudnn_result = None, None if executor == 'tensorflow': options.use_cudnn_heuristics = False tensorflow_result = profiler.profile(device_name, options, executor='tensorflow') if not use_only_gemm: options.use_cudnn_heuristics = True if executor == 'cudnn': cudnn_result = profiler.profile(device_name, options, executor='cudnn') if cudnn_result: tensorflow_result = [None] * len(cudnn_result) elif tensorflow_result: cudnn_result = [None] * len(tensorflow_result) all_results[netspec_file] = (cudnn_result, tensorflow_result) for net in all_results: print('Network: %s' % net) print('Direction: %s' % direction) if direction == 'backward': print('Gradient wrt: %s' % gradient_wrt) (cu, tf) = all_results[net] _print_tabular(cu, tf)
def _profile_for_apply_updates(params_in_bytes, device): flops_profiler = profilers.FlopsProfiler(profilers.ProfilerOptions(), device) return flops_profiler.profile_apply_updates(params_in_bytes)