Example #1
0
    def profile_full_pass(self, device, num_warmup, num_iter, batch_size):
        """Profile full pass execution with tensorflow."""
        options = profilers.ProfilerOptions()
        options.num_warmup = num_warmup
        options.num_iter = num_iter
        options.include_bias_and_activation = False
        from profilers.tensorflow_profiler import TensorFlowProfiler
        profiler = TensorFlowProfiler(options)

        if batch_size:
            for l in self.graph.topology_order:
                l.layer_op.batch_size = batch_size

        layers = [
            layer_spec.layer_op for layer_spec in self.graph.topology_order
        ]

        return profiler.profile_full_pass(layers)
Example #2
0
def _profile_for_batch_size(layer_list,
                            direction,
                            device,
                            batch_size,
                            use_only_gemm,
                            ppp_comp,
                            ppp_comm,
                            cross_device_bandwidth=None):
    """Use flops profiler to estiamte execution with under the given spec."""
    logger.debug('Profile for\n  pass: %s\n  device: %s\n  batch size: %s' %
                 (direction, device.name, batch_size))
    times = []
    params_in_bytes = 0

    # Estimate forward time for each layer.
    for layer_spec in layer_list:
        layer = layer_spec.layer_op
        if batch_size:
            layer.batch_size = batch_size

        options = profilers.ProfilerOptions()
        options.direction = direction
        options.gradient_wrt = None
        if use_only_gemm:
            options.use_cudnn_heuristics = False
        # FIXME: we don't include bias and activation for simplicity.
        options.include_bias_and_activation = False
        options.ppp_comp = ppp_comp
        options.ppp_comm = ppp_comm
        flops_profiler = profilers.FlopsProfiler(
            options, device)  # Why instantiate new profiler for every layer?

        layer_time = flops_profiler.profile(
            layer, layer_spec.device_id,
            [p.device_id for p in layer_spec.parents], cross_device_bandwidth)
        params_in_bytes += layer.weights_in_bytes
        times.append(layer_time)

    return times, params_in_bytes
Example #3
0
def profile(netspec_files, device_name, num_warmup, num_iter, extract_conv_dir,
            direction, gradient_wrt, use_only_gemm, executor, ppp_comp,
            separator):
    """Profiling a neural network."""
    def _print_tabular(cudnn_result, tensorflow_result):
        assert len(cudnn_result) == len(tensorflow_result)

        print(
            separator.join(
                ['layer', 'ours', 'cudnn', 'tensorflow', 'ours_alg',
                 'cu_alg']))
        sum_ours, sum_cu, sum_tf = 0, 0, 0
        for cudnn_prof, tf_prof in zip(cudnn_result, tensorflow_result):
            (layer_name, ours_time, cudnn_time, tf_time, our_msg,
             cu_msg) = ['', 0, 0, 0, '', '']
            if cudnn_prof:
                layer_name, ours_time, cudnn_time, _, our_msg, cu_msg = (
                    cudnn_prof)
            if tf_prof:
                layer_name, ours_time, tf_time, _, our_msg, _ = tf_prof

            our_msg = our_msg.replace('CUDNN_CONVOLUTION_', '')
            cu_msg = cu_msg.replace('CUDNN_CONVOLUTION_', '')

            if layer_name == 'data':
                continue

            sum_ours += ours_time
            sum_cu += cudnn_time
            sum_tf += tf_time

            print(
                separator.join([
                    str(x) for x in (layer_name, ours_time, cudnn_time,
                                     tf_time, our_msg, cu_msg)
                ]))
        print(separator.join(['Sum', str(sum_ours), str(sum_cu), str(sum_tf)]))

    all_results = dict()
    for netspec_file in netspec_files:
        profiler = Profiler(netspec_file, separator=separator)

        if extract_conv_dir:
            profiler.save_conv_layers(extract_conv_dir)

        if profile:
            options = profilers.ProfilerOptions()
            options.direction = direction
            options.gradient_wrt = gradient_wrt
            options.num_iter = num_iter
            options.num_warmup = num_warmup
            options.ppp_comp = ppp_comp

            tensorflow_result, cudnn_result = None, None
            if executor == 'tensorflow':
                options.use_cudnn_heuristics = False
                tensorflow_result = profiler.profile(device_name,
                                                     options,
                                                     executor='tensorflow')

            if not use_only_gemm:
                options.use_cudnn_heuristics = True

            if executor == 'cudnn':
                cudnn_result = profiler.profile(device_name,
                                                options,
                                                executor='cudnn')

            if cudnn_result:
                tensorflow_result = [None] * len(cudnn_result)
            elif tensorflow_result:
                cudnn_result = [None] * len(tensorflow_result)
            all_results[netspec_file] = (cudnn_result, tensorflow_result)

    for net in all_results:
        print('Network: %s' % net)
        print('Direction: %s' % direction)
        if direction == 'backward':
            print('Gradient wrt: %s' % gradient_wrt)
        (cu, tf) = all_results[net]
        _print_tabular(cu, tf)
Example #4
0
def _profile_for_apply_updates(params_in_bytes, device):
    flops_profiler = profilers.FlopsProfiler(profilers.ProfilerOptions(),
                                             device)
    return flops_profiler.profile_apply_updates(params_in_bytes)