예제 #1
0
    def __init__(
        self,
        local_dir='~/.torch/latency_tools/',
        url='https://hanlab.mit.edu/files/proxylessNAS/LatencyTools/mobile_trim.yaml'
    ):
        if url.startswith('http'):
            fname = download_url(url, local_dir, overwrite=True)
        else:
            fname = url

        with open(fname, 'r') as fp:
            self.lut = yaml.load(fp)
예제 #2
0
def proxyless_base(net_config=None,
                   n_classes=None,
                   bn_param=None,
                   dropout_rate=None,
                   local_path='~/.torch/proxylessnas/'):
    assert net_config is not None, 'Please input a network config'
    if 'http' in net_config:
        net_config_path = download_url(net_config, local_path)
    else:
        net_config_path = net_config
    net_config_json = json.load(open(net_config_path, 'r'))

    if n_classes is not None:
        net_config_json['classifier']['out_features'] = n_classes
    if dropout_rate is not None:
        net_config_json['classifier']['dropout_rate'] = dropout_rate

    net = ProxylessNASNets.build_from_config(net_config_json)
    if bn_param is not None:
        net.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

    return net
def supporting_elastic_expand(train_func, run_manager, args,
                              validate_func_dict):
    dynamic_net = run_manager.net
    if isinstance(dynamic_net, nn.DataParallel):
        dynamic_net = dynamic_net.module

    # load stage info
    stage_info_path = os.path.join(run_manager.path, 'expand.stage')
    try:
        stage_info = json.load(open(stage_info_path))
    except Exception:
        stage_info = {'stage': 0}

    # load pretrained models
    validate_func_dict['expand_ratio_list'] = sorted(
        dynamic_net.expand_ratio_list)

    if args.phase == 1:
        model_path = download_url(
            'https://hanlab.mit.edu/files/OnceForAll/ofa_checkpoints/ofa_D234_E6_K357',
            model_dir='.torch/ofa_checkpoints/%d' % hvd.rank())
        load_models(run_manager, dynamic_net, model_path=model_path)
    else:
        model_path = download_url(
            'https://hanlab.mit.edu/files/OnceForAll/ofa_checkpoints/ofa_D234_E46_K357',
            model_dir='.torch/ofa_checkpoints/%d' % hvd.rank())
        load_models(run_manager, dynamic_net, model_path=model_path)
    dynamic_net.re_organize_middle_weights()
    run_manager.write_log(
        '%.3f\t%.3f\t%.3f\t%s' % validate(run_manager, **validate_func_dict),
        'valid')

    expand_stage_list = dynamic_net.expand_ratio_list.copy()
    expand_stage_list.sort(reverse=True)
    n_stages = len(expand_stage_list) - 1
    start_stage = n_stages - 1

    for current_stage in range(start_stage, n_stages):
        run_manager.write_log(
            '-' * 30 + 'Supporting Elastic Expand Ratio: %s -> %s' %
            (expand_stage_list[:current_stage + 1],
             expand_stage_list[:current_stage + 2]) + '-' * 30, 'valid')

        # add expand list constraints
        supported_expand = expand_stage_list[:current_stage + 2]
        if len(set(dynamic_net.ks_list)) == 1 and len(
                set(dynamic_net.depth_list)) == 1:
            validate_func_dict['expand_ratio_list'] = supported_expand
        else:
            validate_func_dict['expand_ratio_list'] = sorted(
                {min(supported_expand),
                 max(supported_expand)})
        dynamic_net.set_constraint(supported_expand,
                                   constraint_type='expand_ratio')

        # train
        train_func(
            run_manager, args, lambda _run_manager, epoch, is_test: validate(
                _run_manager, epoch, is_test, **validate_func_dict))

        # next stage & reset
        stage_info['stage'] += 1
        run_manager.start_epoch = 0
        run_manager.best_acc = 0.0
        dynamic_net.re_organize_middle_weights(
            expand_ratio_stage=stage_info['stage'])
        if isinstance(run_manager, DistributedRunManager):
            run_manager.broadcast()

        # save and validate
        run_manager.save_model(model_name='expand_stage%d.pth.tar' %
                               stage_info['stage'])
        json.dump(stage_info, open(stage_info_path, 'w'), indent=4)
        validate_func_dict['expand_ratio_list'] = sorted(
            dynamic_net.expand_ratio_list)
        run_manager.write_log(
            '%.3f\t%.3f\t%.3f\t%s' %
            validate(run_manager, **validate_func_dict), 'valid')
예제 #4
0
args.dy_conv_scaling_mode = 1
args.independent_distributed_sampling = False

args.kd_ratio = 1.0
args.kd_type = 'ce'

if __name__ == '__main__':
    os.makedirs(args.path, exist_ok=True)

    # Initialize Horovod
    hvd.init()
    # Pin GPU to be used to process local rank (one GPU per process)
    torch.cuda.set_device(hvd.local_rank())

    args.teacher_path = download_url(
        'https://hanlab.mit.edu/files/OnceForAll/ofa_checkpoints/ofa_D4_E6_K7',
        model_dir='.torch/ofa_checkpoints/%d' % hvd.rank())

    num_gpus = hvd.size()

    torch.manual_seed(args.manual_seed)
    torch.cuda.manual_seed_all(args.manual_seed)
    np.random.seed(args.manual_seed)
    random.seed(args.manual_seed)

    # image size
    args.image_size = [
        int(img_size) for img_size in args.image_size.split(',')
    ]
    if len(args.image_size) == 1:
        args.image_size = args.image_size[0]
예제 #5
0
args.dy_conv_scaling_mode = 1
args.independent_distributed_sampling = False

args.kd_ratio = 1.0
args.kd_type = 'ce'

if __name__ == '__main__':
    os.makedirs(args.path, exist_ok=True)

    # Initialize Horovod
    hvd.init()
    # Pin GPU to be used to process local rank (one GPU per process)
    torch.cuda.set_device(hvd.local_rank())

    args.teacher_path = download_url(
        'https://file.lzhu.me/projects/OnceForAll/ofa_checkpoints/ofa_D4_E6_K7',
        model_dir='.torch/ofa_checkpoints/%d' % hvd.rank())

    num_gpus = hvd.size()

    torch.manual_seed(args.manual_seed)
    torch.cuda.manual_seed_all(args.manual_seed)
    np.random.seed(args.manual_seed)
    random.seed(args.manual_seed)

    # image size
    args.image_size = [
        int(img_size) for img_size in args.image_size.split(',')
    ]
    if len(args.image_size) == 1:
        args.image_size = args.image_size[0]
예제 #6
0
args.dy_conv_scaling_mode = 1
args.independent_distributed_sampling = False

args.kd_ratio = 1.0
args.kd_type = 'ce'

if __name__ == '__main__':
    os.makedirs(args.path, exist_ok=True)

    # Initialize Horovod
    hvd.init()
    # Pin GPU to be used to process local rank (one GPU per process)
    torch.cuda.set_device(hvd.local_rank())

    args.teacher_path = download_url(
        '/NAS_REMOTE/shaozl/Fine-grained/once-for-all-master/.torch/ofa_checkpoints/ofa_ws_D4_E6_K7',
        model_dir='.torch/ofa_checkpoints/%d' % hvd.rank())

    num_gpus = hvd.size()

    torch.manual_seed(args.manual_seed)
    torch.cuda.manual_seed_all(args.manual_seed)
    np.random.seed(args.manual_seed)
    random.seed(args.manual_seed)

    # image size
    args.image_size = [
        int(img_size) for img_size in args.image_size.split(',')
    ]
    if len(args.image_size) == 1:
        args.image_size = args.image_size[0]
예제 #7
0
def supporting_elastic_depth(train_func, run_manager, args,
                             validate_func_dict):
    dynamic_net = run_manager.net
    if isinstance(dynamic_net, nn.DataParallel):
        dynamic_net = dynamic_net.module

    # load stage info
    stage_info_path = os.path.join(run_manager.path, 'depth.stage')
    try:
        stage_info = json.load(open(stage_info_path))
    except Exception:
        stage_info = {'stage': 0}

    # load pretrained models
    validate_func_dict['depth_list'] = sorted(dynamic_net.depth_list)

    if args.phase == 1:
        model_path = download_url(
            'https://file.lzhu.me/projects/OnceForAll/ofa_checkpoints/ofa_D4_E6_K357',
            model_dir='.torch/ofa_checkpoints/%d' % hvd.rank())
        load_models(run_manager, dynamic_net, model_path=model_path)
    else:
        model_path = download_url(
            'https://file.lzhu.me/projects/OnceForAll/ofa_checkpoints/ofa_D34_E6_K357',
            model_dir='.torch/ofa_checkpoints/%d' % hvd.rank())
        load_models(run_manager, dynamic_net, model_path=model_path)
    # validate after loading weights
    run_manager.write_log(
        '%.3f\t%.3f\t%.3f\t%s' % validate(run_manager, **validate_func_dict),
        'valid')

    depth_stage_list = dynamic_net.depth_list.copy()
    depth_stage_list.sort(reverse=True)
    n_stages = len(depth_stage_list) - 1
    start_stage = n_stages - 1

    for current_stage in range(start_stage, n_stages):
        run_manager.write_log(
            '-' * 30 + 'Supporting Elastic Depth: %s -> %s' %
            (depth_stage_list[:current_stage + 1],
             depth_stage_list[:current_stage + 2]) + '-' * 30, 'valid')

        # add depth list constraints
        supported_depth = depth_stage_list[:current_stage + 2]
        if len(set(dynamic_net.ks_list)) == 1 and len(
                set(dynamic_net.expand_ratio_list)) == 1:
            validate_func_dict['depth_list'] = supported_depth
        else:
            validate_func_dict['depth_list'] = sorted(
                {min(supported_depth),
                 max(supported_depth)})
        dynamic_net.set_constraint(supported_depth, constraint_type='depth')

        # train
        train_func(
            run_manager, args, lambda _run_manager, epoch, is_test: validate(
                _run_manager, epoch, is_test, **validate_func_dict))

        # next stage & reset
        stage_info['stage'] += 1
        run_manager.start_epoch = 0
        run_manager.best_acc = 0.0

        # save and validate
        run_manager.save_model(model_name='depth_stage%d.pth.tar' %
                               stage_info['stage'])
        json.dump(stage_info, open(stage_info_path, 'w'), indent=4)
        validate_func_dict['depth_list'] = sorted(dynamic_net.depth_list)
        run_manager.write_log(
            '%.3f\t%.3f\t%.3f\t%s' %
            validate(run_manager, **validate_func_dict), 'valid')