def csgd_prune_pipeline(local_rank, init_hdf5, base_train_config,
                        csgd_train_config, target_deps, centri_strength,
                        pacesetter_dict, succeeding_strategy):
    #   If there is no given base weights file, train from scratch.
    if init_hdf5 is None:
        csgd_init_weights = os.path.join(base_train_config.output_dir,
                                         'finish.hdf5')
        if not os.path.exists(csgd_init_weights):
            train_main(local_rank=local_rank,
                       cfg=base_train_config,
                       use_nesterov=True)
    else:
        csgd_init_weights = init_hdf5

    #   C-SGD train then prune
    pruned_weights = os.path.join(csgd_train_config.output_dir, 'pruned.hdf5')
    csgd_train_main(local_rank=local_rank,
                    cfg=csgd_train_config,
                    target_deps=target_deps,
                    succeeding_strategy=succeeding_strategy,
                    pacesetter_dict=pacesetter_dict,
                    centri_strength=centri_strength,
                    pruned_weights=pruned_weights,
                    init_hdf5=csgd_init_weights,
                    use_nesterov=True)  # TODO init?

    #   Test it.
    if local_rank == 0:
        general_test(csgd_train_config.network_type, weights=pruned_weights)
Beispiel #2
0
    log_dir = 'acnet_exps/{}_{}_train'.format(network_type, block_type)

    weight_decay_bias = weight_decay_strength
    config = get_baseconfig_by_epoch(network_type=network_type,
                                     dataset_name=get_dataset_name_by_model_name(network_type), dataset_subset='train',
                                     global_batch_size=batch_size, num_node=1,
                                     weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9,
                                     max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries, cosine_minimum=lrs.cosine_minimum,
                                     lr_decay_factor=lrs.lr_decay_factor,
                                     warmup_epochs=0, warmup_method='linear', warmup_factor=0,
                                     ckpt_iter_period=40000, tb_iter_period=100, output_dir=log_dir,
                                     tb_dir=log_dir, save_weights=None, val_epoch_period=5, linear_final_lr=lrs.linear_final_lr,
                                     weight_decay_bias=weight_decay_bias, deps=None)

    if block_type == 'acb':
        builder = ACNetBuilder(base_config=config, deploy=False, gamma_init=gamma_init)
    else:
        builder = ConvBuilder(base_config=config)

    target_weights = os.path.join(log_dir, 'finish.hdf5')
    if not os.path.exists(target_weights):
        train_main(local_rank=start_arg.local_rank, cfg=config, convbuilder=builder,
               show_variables=True, auto_continue=auto_continue)

    if block_type == 'acb' and start_arg.local_rank == 0:
        convert_acnet_weights(target_weights, target_weights.replace('.hdf5', '_deploy.hdf5'), eps=1e-5)
        deploy_builder = ACNetBuilder(base_config=config, deploy=True)
        general_test(network_type=network_type, weights=target_weights.replace('.hdf5', '_deploy.hdf5'),
                 builder=deploy_builder)
Beispiel #3
0
    resrep_builder = ResRepBuilder(base_config=config,
                                   resrep_config=resrep_config)

    if resrep_config.weight_decay_on_compactor:
        no_l2_keywords = ['depth']
    else:
        no_l2_keywords = ['depth', 'compactor']

    print('######################################################')
    print('start ere, the original flops is ', flops_func(deps))
    print('######################################################')

    if not os.path.exists(
            os.path.join(config.output_dir, 'finish_converted.hdf5')):
        resrep_train_main(local_rank=start_arg.local_rank,
                          cfg=config,
                          resrep_config=resrep_config,
                          resrep_builder=resrep_builder,
                          show_variables=True,
                          init_hdf5=init_hdf5,
                          auto_continue=auto_continue,
                          no_l2_keywords=no_l2_keywords)

    general_test(network_type=network_type,
                 weights=os.path.join(config.output_dir,
                                      'finish_converted.hdf5'),
                 builder=ResRepBuilder(base_config=config,
                                       resrep_config=resrep_config,
                                       mode='deploy'))