Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(description='profile torchvision models')
    parser.add_argument('-d',
                        '--device',
                        type=str,
                        default="all",
                        help="device ids")
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help="verbose msg")
    parser.add_argument('-t',
                        '--timing',
                        action='store_true',
                        help="enable timing")
    args = parser.parse_args()

    device, devlist = get_device(args.device)

    rasp.set_config({
        'profile': {
            'batch_size': 1,
            'num_batches': 5,
            'warmup_batches': 5,
            'timing_max_depth': -1,
            'compute_max_depth': -1,
            'verbose': args.verbose,
        },
    })

    fields = [
        'name', 'type', 'in_shape', 'out_shape', 'params', 'lat', 'net_lat',
        'lat[%]', 'flops', 'mem_r', 'mem_w', 'mem_rw', 'dev_mem_alloc',
        'dev_mem_delta'
    ]

    C = 16
    w = 4
    l = 8
    model = RectNet(C, w, l).to(device)

    bsize = 128
    fm_size = 224
    inputs = torch.randn(bsize, C, fm_size, fm_size).to(device)

    stats = rasp.profile_compute_once(model, inputs=inputs)
    if args.timing: stats = rasp.profile_timing_once(model, inputs=inputs)
    summary, _ = rasp.summary_tape(stats, report_fields=fields)
    print(summary)
    summary, _ = rasp.summary_all(stats, report_fields=fields)
    print(summary)
    summary, _ = rasp.summary_node(stats, report_fields=fields)
    print(summary)
    rasp.profile_off(model)
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(description='profile torchvision models')
    parser.add_argument('-d',
                        '--device',
                        type=str,
                        default='all',
                        help='device ids')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='verbose msg')
    parser.add_argument('-t',
                        '--timing',
                        action='store_true',
                        help='enable timing')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default='(1, 3, 224, 224)',
                        help='input shape')
    args = parser.parse_args()

    device, devlist = get_device(args.device)

    rasp.set_config({
        'profile': {
            'batch_size': 1,
            'num_batches': 50,
            'warmup_batches': 5,
            'timing_max_depth': -1,
            'compute_max_depth': -1,
            'verbose': args.verbose,
        },
    })

    print('%s | %s | %s | %s | %s' %
          ('Model', 'Params', 'FLOPs', 'latency', 'FLOPS'))
    print('---|---|---|---|---')

    fields = [
        'name', 'type', 'in_shape', 'out_shape', 'params', 'lat', 'net_lat',
        'lat[%]', 'flops', 'FLOPS', 'mem_rw', 'dev_mem_alloc', 'dev_mem_delta'
    ]

    input_shape = tuple(eval(args.input))
    inputs = torch.randn(input_shape, device=device)

    for i, name in enumerate(model_names):
        model = models.__dict__[name]().to(device=device)
        summary, df = rasp.stat(model,
                                inputs=inputs,
                                device=device,
                                report_fields=fields,
                                timing=args.timing,
                                print_stat=False,
                                returns='sum, data')
        if args.verbose:
            print(summary)
        total_f = df.tail(1)
        rasp.profile_off(model)
        flops, params = total_f.flops[0], total_f.params[0]
        if args.timing:
            latency = total_f.lat[0]
            FLOPS = total_f.FLOPS[0]
        else:
            latency = 0
            FLOPS = 0
        print('%s | %s | %s | %s | %s' %
              (name, rasp.round_value(params), rasp.round_value(flops),
               rasp.round_value(latency), rasp.round_value(FLOPS)))
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(description='profile torchvision models')
    parser.add_argument('-d',
                        '--device',
                        type=str,
                        default="all",
                        help="device ids")
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help="verbose msg")
    parser.add_argument('-t',
                        '--timing',
                        action='store_true',
                        help="enable timing")
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default='(8, 64, 224, 224)',
                        help='input shape')
    args = parser.parse_args()

    device, devlist = get_device(args.device)

    rasp.set_config({
        'profile': {
            'batch_size': 1,
            'num_batches': 100,
            'warmup_batches': 10,
            'timing_max_depth': -1,
            'compute_max_depth': -1,
            'verbose': args.verbose,
        },
    })

    print("%s | %s | %s | %s" % ("Model", "Params", "FLOPs", "FLOPS"))
    print("---|---|---|---")

    fields = [
        'name', 'type', 'in_shape', 'out_shape', 'params', 'lat', 'net_lat',
        'lat[%]', 'flops', 'FLOPS', 'mem_r', 'mem_w', 'mem_rw',
        'dev_mem_alloc', 'dev_mem_delta'
    ]

    input_shape = tuple(eval(args.input))
    chn_in = input_shape[1]
    inputs = torch.randn(input_shape, device=device)

    for i, name in enumerate(ops):
        model = ops[name](chn_in, stride=1, affine=True)
        summary, df = rasp.stat(model,
                                inputs=inputs,
                                device=device,
                                report_fields=fields,
                                timing=args.timing,
                                print_stat=False,
                                returns='sum, data')
        if args.verbose:
            print(summary)
        rasp.profile_off(model)
        total_f = df.tail(1)
        total_flops, total_params, total_FLOPS = total_f.flops[
            0], total_f.params[0], total_f.FLOPS[0]
        print("%s | %s | %s | %s" %
              (name, rasp.round_value(total_params),
               rasp.round_value(total_flops), rasp.round_value(total_FLOPS)))