Example #1
0
def get_se_sites():
     
    # Read the data from CSV file and returns dictionary
    coordinates_data = {}
    try:
        csv_file = open('sites.tmp', "r")
    except IOError:
        sys.exit("There are no coordinates info for SE. Get first the computing sites with get_ce_sites()")
    lines = csv_file.read().split("\n")
    for row in lines:
        if row != "":
            site = row.split(',')
            if site[1].replace('.','',1).replace('-','',1).isdigit() and \
            site[2].replace('.','',1).replace('-','',1).isdigit():
                coordinates_data[site[0]] = [float(site[1]),float(site[2])]      
    se_sites = {}
    manager = Manager()
    se_result = manager.listSEs()['Value']
    se_health = service.getHealthyProductionSEs()
    #se_health = service.getAllStorageElementStatus()
    if not se_health['OK']: 
        print "[WARNING]: No health info for SE sites"
    for se in sorted(se_result['Active']):
        se_site = {}
        result = gConfig.getOptionsDict('/Resources/StorageElements/' + se)
        result2 = gConfig.getOptionsDict('/Resources/StorageElements/' + se \
                                        + '/AccessProtocol.1')
        if result['OK'] and result2['OK']:
            result = result['Value']
            result2 = result2['Value']
            se_site = result2
            se_site['Read'] = result['ReadAccess']
            se_site['Write'] = result['WriteAccess']

            if se.split('-')[0] in coordinates_data:
                se_site['Coordinates'] = coordinates_data[se.split('-')[0]]
            else:
                print "[WARNING]: No location info for " + se
            se_sites[se] = se_site

            if se_health['OK'] and se in se_health['Value']:
                se_site['Health'] = se_health['Value'][se][0]


    return se_sites
Example #2
0
def main():
    """Do stuff."""
    args = argparser.get_parser()

    # Don't use this, neither set learning rate as a linear function
    # of the count of gpus, it will make accuracy lower
    # args.batch_size = args.batch_size * torch.cuda.device_count()
    set_seeds(args)
    settings(args)
    logger = CsvLogger(file_name='acc',
                       resume=True,
                       path='results',
                       data_format='csv')
    # If set > 0, will resume training from a given checkpoint.
    resume_from_epoch, resume_folder = check_resume_epoch(args)

    dataset_history, dataset2num_classes, masks, shared_layer_info = info_reload(
        resume_from_epoch, args)

    model = build_model(args, dataset_history, dataset2num_classes)
    model = nn.DataParallel(model)
    for dataset in dataset_history:
        args.dataset = dataset
        utils.set_dataset_paths(args)
        model.module.set_dataset(args.dataset)
        train_loader, val_loader, test_loader = load_data(args)
        manager = Manager(args, model, shared_layer_info, masks, train_loader,
                          test_loader)
        manager.load_checkpoint_only_for_evaluate(resume_from_epoch,
                                                  resume_folder)
        model = model.cuda()
        test_acc = manager.validate(resume_from_epoch - 1)
        idx = dataset_history.index(dataset)
        finished = len(dataset_history)
        logger.add(dataset=dataset,
                   idx=idx,
                   finished=finished,
                   acc=round(test_acc, 4))
        logger.save()
    return
Example #3
0
"""
  Documentation:    http://wiki.guildwars2.com/wiki/API

  Threading:        http://www.tutorialspoint.com/python/python_multithreading.htm
"""
from utils.manager import Manager
from helpers.data_mapping import MetaData

""" 
    Main processing
"""
if __name__ == "__main__":
    # Create manager (a.k.a Task Master) to do the majority of the work
    # @todo: Provide it a list of tasks which need to be done
    num_threads = 15 # 15
    max_queue_size = 50000 # 30,000
    missed_only = False
    
    print('Running main crawler....')
    taskmaster = Manager(num_threads, max_queue_size, missed_only)
    taskmaster.go() 
    
    print('Populating Meta Data.....')
    md = MetaData()
    md.populate()
    print('Done')
    
Example #4
0
def main():
    """Do stuff."""
    args = parser.parse_args()

    # Don't use this, neither set learning rate as a linear function
    # of the count of gpus, it will make accuracy lower
    # args.batch_size = args.batch_size * torch.cuda.device_count()
    args.network_width_multiplier = math.sqrt(args.network_width_multiplier)
    args.max_allowed_network_width_multiplier = math.sqrt(
        args.max_allowed_network_width_multiplier)
    if args.mode == 'prune':
        args.save_folder = os.path.join(args.save_folder,
                                        str(args.target_sparsity))
        if args.initial_sparsity != 0.0:
            args.load_folder = os.path.join(args.load_folder,
                                            str(args.initial_sparsity))

    if args.save_folder and not os.path.isdir(args.save_folder):
        os.makedirs(args.save_folder)

    if args.log_path:
        set_logger(args.log_path)

    if args.pruning_ratio_to_acc_record_file and not os.path.isdir(
            args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]):
        os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0])

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        args.cuda = False

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    cudnn.benchmark = True

    # If set > 0, will resume training from a given checkpoint.
    resume_from_epoch = 0
    resume_folder = args.load_folder
    for try_epoch in range(200, 0, -1):
        if os.path.exists(
                args.checkpoint_format.format(save_folder=resume_folder,
                                              epoch=try_epoch)):
            resume_from_epoch = try_epoch
            break

    if args.restore_epoch:
        resume_from_epoch = args.restore_epoch

    # Set default train and test path if not provided as input.
    utils.set_dataset_paths(args)

    if resume_from_epoch:
        print("Resume from epoch: ", resume_from_epoch)
        filepath = args.checkpoint_format.format(save_folder=resume_folder,
                                                 epoch=resume_from_epoch)
        checkpoint = torch.load(filepath)
        checkpoint_keys = checkpoint.keys()
        dataset_history = checkpoint['dataset_history']
        dataset2num_classes = checkpoint['dataset2num_classes']
        masks = checkpoint['masks']
        shared_layer_info = checkpoint['shared_layer_info']
        # shared_layer_info[args.dataset]['network_width_multiplier'] = 1.0
        if 'num_for_construct' in checkpoint_keys:
            num_for_construct = checkpoint['num_for_construct']
        if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[
                args.dataset]:  # TODO, temporary solution
            args.network_width_multiplier = shared_layer_info[
                args.dataset]['network_width_multiplier']
    else:
        dataset_history = []
        dataset2num_classes = {}
        masks = {}
        shared_layer_info = {}

    if args.baseline_acc_file is None or not os.path.isfile(
            args.baseline_acc_file):
        sys.exit(3)
    with open(args.baseline_acc_file, 'r') as jsonfile:
        json_data = json.load(jsonfile)
        baseline_acc = float(json_data[args.dataset])

    if args.mode == 'prune' and not args.pruning_ratio_to_acc_record_file:
        sys.exit(-1)

    if args.arch == 'resnet18':
        model = models.__dict__[args.arch](
            dataset_history=dataset_history,
            dataset2num_classes=dataset2num_classes,
            network_width_multiplier=args.network_width_multiplier,
            shared_layer_info=shared_layer_info)
    elif 'vgg' in args.arch:
        custom_cfg = [
            64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
            512, 512, 512, 'M'
        ]
        model = models.__dict__[args.arch](
            custom_cfg,
            dataset_history=dataset_history,
            dataset2num_classes=dataset2num_classes,
            network_width_multiplier=args.network_width_multiplier,
            shared_layer_info=shared_layer_info)
    else:
        print('Error!')
        sys.exit(1)

    # Add and set the model dataset.
    model.add_dataset(args.dataset, args.num_classes)
    model.set_dataset(args.dataset)

    model = nn.DataParallel(model)
    model = model.cuda()
    if not masks:
        for name, module in model.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                mask = torch.ByteTensor(module.weight.data.size()).fill_(0)
                if 'cuda' in module.weight.data.type():
                    mask = mask.cuda()
                masks[name] = mask
    else:
        # when we expand network, we need to allocate new masks
        NEED_ADJUST_MASK = False
        for name, module in model.named_modules():
            if isinstance(module, nl.SharableConv2d):
                if masks[name].size(1) < module.weight.data.size(1):
                    assert args.mode == 'finetune'
                    NEED_ADJUST_MASK = True
                elif masks[name].size(1) > module.weight.data.size(1):
                    assert args.mode == 'inference'
                    NEED_ADJUST_MASK = True

        if NEED_ADJUST_MASK:
            if args.mode == 'finetune':
                for name, module in model.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:masks[name].size(0), :masks[name].
                             size(1), :, :].copy_(masks[name])
                        masks[name] = mask
                    elif isinstance(module, nl.SharableLinear):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:masks[name].size(0), :masks[name].size(1)].copy_(
                            masks[name])
                        masks[name] = mask
            elif args.mode == 'inference':
                for name, module in model.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:, :, :, :].copy_(
                            masks[name][:mask.size(0), :mask.size(1), :, :])
                        masks[name] = mask
                    elif isinstance(module, nl.SharableLinear):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:, :].copy_(
                            masks[name][:mask.size(0), :mask.size(1)])
                        masks[name] = mask

    if args.dataset not in shared_layer_info:

        shared_layer_info[args.dataset] = {
            'bias': {},
            'bn_layer_running_mean': {},
            'bn_layer_running_var': {},
            'bn_layer_weight': {},
            'bn_layer_bias': {},
            'piggymask': {}
        }

        piggymasks = {}
        task_id = model.module.datasets.index(args.dataset) + 1
        if task_id > 1:
            for name, module in model.module.named_modules():
                if isinstance(module, nl.SharableConv2d) or isinstance(
                        module, nl.SharableLinear):
                    piggymasks[name] = torch.zeros_like(masks['module.' +
                                                              name],
                                                        dtype=torch.float32)
                    piggymasks[name].fill_(0.01)
                    piggymasks[name] = Parameter(piggymasks[name])
                    module.piggymask = piggymasks[name]
    elif args.finetune_again:
        # reinitialize piggymask
        piggymasks = {}
        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                piggymasks[name] = torch.zeros_like(masks['module.' + name],
                                                    dtype=torch.float32)
                piggymasks[name].fill_(0.01)
                piggymasks[name] = Parameter(piggymasks[name])
                module.piggymask = piggymasks[name]
    else:
        # try:
        piggymasks = shared_layer_info[args.dataset]['piggymask']
        # except:
        #    piggymasks = {}
        task_id = model.module.datasets.index(args.dataset) + 1
        if task_id > 1:
            for name, module in model.module.named_modules():
                if isinstance(module, nl.SharableConv2d) or isinstance(
                        module, nl.SharableLinear):
                    module.piggymask = piggymasks[name]
    shared_layer_info[args.dataset][
        'network_width_multiplier'] = args.network_width_multiplier

    if args.num_classes == 2:
        train_loader = dataset.cifar100_train_loader_two_class(
            args.dataset, args.batch_size)
        val_loader = dataset.cifar100_val_loader_two_class(
            args.dataset, args.val_batch_size)
    elif args.num_classes == 5:
        train_loader = dataset.cifar100_train_loader(args.dataset,
                                                     args.batch_size)
        val_loader = dataset.cifar100_val_loader(args.dataset,
                                                 args.val_batch_size)
    else:
        print("num_classes should be either 2 or 5")
        sys.exit(1)

    # if we are going to save checkpoint in other folder, then we recalculate the starting epoch
    if args.save_folder != args.load_folder:
        start_epoch = 0
    else:
        start_epoch = resume_from_epoch

    curr_prune_step = begin_prune_step = start_epoch * len(train_loader)
    end_prune_step = curr_prune_step + args.pruning_interval * len(
        train_loader)

    manager = Manager(args, model, shared_layer_info, masks, train_loader,
                      val_loader, begin_prune_step, end_prune_step)
    if args.mode == 'inference':
        manager.load_checkpoint_only_for_evaluate(resume_from_epoch,
                                                  resume_folder)
        manager.validate(resume_from_epoch - 1)
        return

    lr = args.lr
    lr_mask = args.lr_mask
    # update all layers
    named_params = dict(model.named_parameters())
    params_to_optimize_via_SGD = []
    named_of_params_to_optimize_via_SGD = []
    masks_to_optimize_via_Adam = []
    named_of_masks_to_optimize_via_Adam = []

    for name, param in named_params.items():
        if 'classifiers' in name:
            if '.{}.'.format(model.module.datasets.index(
                    args.dataset)) in name:
                params_to_optimize_via_SGD.append(param)
                named_of_params_to_optimize_via_SGD.append(name)
            continue
        elif 'piggymask' in name:
            masks_to_optimize_via_Adam.append(param)
            named_of_masks_to_optimize_via_Adam.append(name)
        else:
            params_to_optimize_via_SGD.append(param)
            named_of_params_to_optimize_via_SGD.append(name)

    optimizer_network = optim.SGD(params_to_optimize_via_SGD,
                                  lr=lr,
                                  weight_decay=0.0,
                                  momentum=0.9,
                                  nesterov=True)
    optimizers = Optimizers()
    optimizers.add(optimizer_network, lr)

    if masks_to_optimize_via_Adam:
        optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask)
        optimizers.add(optimizer_mask, lr_mask)

    manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder)
    """Performs training."""
    curr_lrs = []
    for optimizer in optimizers:
        for param_group in optimizer.param_groups:
            curr_lrs.append(param_group['lr'])
            break

    if args.mode == 'prune':
        if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder:
            args.epochs = 20 + resume_from_epoch
        logging.info('')
        logging.info('Before pruning: ')
        logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity,
                                                       args.target_sparsity))

        must_pruning_ratio_for_curr_task = 0.0

        json_data = {}
        if os.path.isfile(args.pruning_ratio_to_acc_record_file):
            with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file:
                json_data = json.load(json_file)

        if args.network_width_multiplier >= args.max_allowed_network_width_multiplier and json_data[
                '0.0'] < baseline_acc:
            # If we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning
            logging.info(
                'we reach the upperbound and still do not get the accuracy over our target on curr task'
            )
            remain_num_tasks = args.total_num_tasks - len(dataset_history)
            logging.info('remain_num_tasks: {}'.format(remain_num_tasks))
            ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1), 1)
            logging.info('ratio_allow_for_curr_task: {:.4f}'.format(
                ratio_allow_for_curr_task))
            must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task
            if args.initial_sparsity >= must_pruning_ratio_for_curr_task:
                sys.exit(6)

        manager.validate(start_epoch - 1)
        logging.info('')
    elif args.mode == 'finetune':
        if not args.finetune_again:
            manager.pruner.make_finetuning_mask()
            logging.info('Finetune stage...')
        else:
            logging.info('Piggymask Retrain...')
            history_best_avg_val_acc_when_retraining = manager.validate(
                start_epoch - 1)
            num_epochs_that_criterion_does_not_get_better = 0

        stop_lr_mask = True
        if manager.pruner.calculate_curr_task_ratio() == 0.0:
            logging.info(
                'There is no left space in convolutional layer for curr task'
                ', we will try to use prior experience as long as possible')
            stop_lr_mask = False

    for epoch_idx in range(start_epoch, args.epochs):
        avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx,
                                                       curr_lrs,
                                                       curr_prune_step)

        avg_val_acc = manager.validate(epoch_idx)

        # if args.mode == 'prune' and (epoch_idx+1) >= (args.pruning_interval + start_epoch) and (
        #     avg_val_acc > history_best_avg_val_acc_when_prune):
        #     pass
        if args.finetune_again:
            if avg_val_acc > history_best_avg_val_acc_when_retraining:
                history_best_avg_val_acc_when_retraining = avg_val_acc

                num_epochs_that_criterion_does_not_get_better = 0
                if args.save_folder is not None:
                    print("Removing saved checkpoint")
                    for path in os.listdir(args.save_folder):
                        if '.pth.tar' in path:
                            os.remove(os.path.join(args.save_folder, path))
                else:
                    print('Something is wrong! Block the program with pdb')
                    pdb.set_trace()

                history_best_avg_val_acc = avg_val_acc
                manager.save_checkpoint(optimizers, epoch_idx,
                                        args.save_folder)
            else:
                num_epochs_that_criterion_does_not_get_better += 1

            if args.finetune_again and num_epochs_that_criterion_does_not_get_better == 5:
                saved = False
                for try_epoch in range(200, 0, -1):
                    if os.path.exists(
                            args.checkpoint_format.format(
                                save_folder=args.save_folder,
                                epoch=try_epoch)):
                        saved = True
                        print("Found saved checkpoint")
                        break
                if not saved:
                    print("No saved checkpoint..")
                    manager.save_checkpoint(optimizers, epoch_idx,
                                            args.save_folder)
                logging.info("stop retraining")
                sys.exit(0)

        if args.mode == 'finetune':
            if epoch_idx + 1 == 50 or epoch_idx + 1 == 80:
                for param_group in optimizers[0].param_groups:
                    param_group['lr'] *= 0.1
                curr_lrs[0] = param_group['lr']
            if len(optimizers.lrs) == 2:
                if epoch_idx + 1 == 50:
                    for param_group in optimizers[1].param_groups:
                        param_group['lr'] *= 0.2
                if stop_lr_mask and epoch_idx + 1 == 70:
                    for param_group in optimizers[1].param_groups:
                        param_group['lr'] *= 0.0

                curr_lrs[1] = param_group['lr']

    if args.save_folder is not None:
        pass
    #     paths = os.listdir(args.save_folder)
    #     if paths and '.pth.tar' in paths[0]:
    #         for checkpoint_file in paths:
    #             os.remove(os.path.join(args.save_folder, checkpoint_file))
    else:
        print('Something is wrong! Block the program with pdb')
        pdb.set_trace()

    if avg_train_acc > 0.95:
        manager.save_checkpoint(optimizers, epoch_idx, args.save_folder)
    else:
        logging.info(f"Training Accuracy goal not met ({avg_train_acc})!")
        if args.dataset == "aquatic_mammals" and avg_train_acc > 0.85:
            logging.info("Saving model...")
            manager.save_checkpoint(optimizers, epoch_idx, args.save_folder)
        else:
            logging.info("Not saving model...")

    logging.info('-' * 16)

    if args.pruning_ratio_to_acc_record_file:
        json_data = {}
        if os.path.isfile(args.pruning_ratio_to_acc_record_file):
            with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file:
                json_data = json.load(json_file)

        if args.mode == 'finetune' and not args.test_piggymask:
            json_data[0.0] = round(avg_val_acc, 4)
            with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file:
                json.dump(json_data, json_file)
            if avg_train_acc > 0.95 and avg_val_acc >= baseline_acc:
                print("Pass!")
                pass
            elif args.network_width_multiplier >= args.max_allowed_network_width_multiplier and avg_val_acc < baseline_acc:
                print("Option 2")
                if manager.pruner.calculate_curr_task_ratio() == 0.0:
                    sys.exit(5)
                else:
                    sys.exit(0)
            else:
                print("Option 3")
                if args.network_width_multiplier >= args.max_allowed_network_width_multiplier:
                    print("Network Cannot Expand Anymore!")
                    logging.info("Network Cannot Expand Anymore!")
                    if manager.pruner.calculate_curr_task_ratio() == 0.0:
                        sys.exit(5)
                    else:
                        sys.exit(0)

                else:
                    logging.info("It's time to expand the Network")
                    logging.info('Auto expand network')
                    sys.exit(2)

            if manager.pruner.calculate_curr_task_ratio() == 0.0:
                logging.info(
                    'There is no left space in convolutional layer for curr task, so needless to prune'
                )
                sys.exit(5)

        elif args.mode == 'prune':
            if avg_train_acc > 0.95:
                json_data[args.target_sparsity] = round(avg_val_acc, 4)
                with open(args.pruning_ratio_to_acc_record_file,
                          'w') as json_file:
                    json.dump(json_data, json_file)
            else:
                sys.exit(6)

            must_pruning_ratio_for_curr_task = 0.0

            if args.network_width_multiplier >= args.max_allowed_network_width_multiplier and json_data[
                    '0.0'] < baseline_acc:
                # If we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning
                logging.info(
                    'we reach the upperbound and still do not get the accuracy over our target on curr task'
                )
                remain_num_tasks = args.total_num_tasks - len(dataset_history)
                logging.info('remain_num_tasks: {}'.format(remain_num_tasks))
                ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1),
                                                  1)
                logging.info('ratio_allow_for_curr_task: {:.4f}'.format(
                    ratio_allow_for_curr_task))
                must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task
                if args.target_sparsity >= must_pruning_ratio_for_curr_task:
                    sys.exit(6)
Example #5
0
import os
from utils.manager import Manager
from spider.spcCrawl import XiciCrawl, XilaCrawl, NimaCrawl
from verify.judger import Judger
from db.spcDB import RedisDB

if "__main__" == __name__:
    # db = RedisDB("test")
    spiders = [XiciCrawl(), XilaCrawl(), NimaCrawl()]
    for spi in spiders:
        spi.cocurrent = True
    #     if not spi.db:
    #         spi.db = db
        # spi.run()
        # print(spi.proxyLs)
    # Judger().run()
    manager = Manager()
    manager.run()
    
Example #6
0
import spotipy
from utils.manager import Manager
from utils.constants import Constants

Constants.init()
Manager.init()
Manager.get_token()
sp = Manager.get_spotify_instance()
songs_ids = Manager.get_songs_ids()

limit = 50
[
    sp.current_user_saved_tracks_delete(tracks=songs_ids[i:i + limit])
    for i in range(0, len(songs_ids), limit)
]
Example #7
0
from utils.constants import Constants
from utils.manager import Manager

Constants.init()
Manager.init()
Manager.get_token()
'''
Manager.get_spotify_instance()
Manager.get_songs_ids()
Manager.pick_base_song_id()
Manager.get_candidates_ids()
Manager.get_songs_features()
print(Manager.get_similar_songs())
'''
def main():
    """Do stuff."""
    args = parser.parse_args()

    # args.batch_size = args.batch_size * torch.cuda.device_count()
    args.network_width_multiplier = math.sqrt(args.network_width_multiplier)

    if args.mode == 'prune':
        args.save_folder = os.path.join(args.save_folder,
                                        str(args.target_sparsity))
        if args.initial_sparsity != 0.0:
            args.load_folder = os.path.join(args.load_folder,
                                            str(args.initial_sparsity))

    if args.pruning_ratio_to_acc_record_file and not os.path.isdir(
            args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]):
        os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0])

    if args.save_folder and not os.path.isdir(args.save_folder):
        os.makedirs(args.save_folder)

    if args.log_path:
        set_logger(args.log_path)

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        args.cuda = False

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    cudnn.benchmark = True

    # If set > 0, will resume training from a given checkpoint.
    resume_from_epoch = 0
    resume_folder = args.load_folder
    for try_epoch in range(200, 0, -1):
        if os.path.exists(
                args.checkpoint_format.format(save_folder=resume_folder,
                                              epoch=try_epoch)):
            resume_from_epoch = try_epoch
            break

    if args.restore_epoch:
        resume_from_epoch = args.restore_epoch

    # Set default train and test path if not provided as input.
    utils.set_dataset_paths(args)

    if resume_from_epoch:
        filepath = args.checkpoint_format.format(save_folder=resume_folder,
                                                 epoch=resume_from_epoch)
        checkpoint = torch.load(filepath)
        checkpoint_keys = checkpoint.keys()
        dataset_history = checkpoint['dataset_history']
        dataset2num_classes = checkpoint['dataset2num_classes']
        masks = checkpoint['masks']
        shared_layer_info = checkpoint['shared_layer_info']
        if 'num_for_construct' in checkpoint_keys:
            num_for_construct = checkpoint['num_for_construct']
        if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[
                args.dataset]:
            args.network_width_multiplier = shared_layer_info[
                args.dataset]['network_width_multiplier']
    else:
        dataset_history = []
        dataset2num_classes = {}
        masks = {}
        shared_layer_info = {}

    if args.arch == 'resnet50':
        # num_for_construct = [64, 64, 64*4, 128, 128*4, 256, 256*4, 512, 512*4]
        model = models.__dict__[args.arch](
            dataset_history=dataset_history,
            dataset2num_classes=dataset2num_classes,
            network_width_multiplier=args.network_width_multiplier,
            shared_layer_info=shared_layer_info)
    elif 'vgg' in args.arch:
        custom_cfg = [
            64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
            512, 512, 512, 'M'
        ]
        model = models.__dict__[args.arch](
            custom_cfg,
            dataset_history=dataset_history,
            dataset2num_classes=dataset2num_classes,
            network_width_multiplier=args.network_width_multiplier,
            shared_layer_info=shared_layer_info,
            progressive_init=args.progressive_init)
    else:
        print('Error!')
        sys.exit(1)

    # Add and set the model dataset.
    model.add_dataset(args.dataset, args.num_classes)
    model.set_dataset(args.dataset)

    # Move model to GPU
    model = nn.DataParallel(model)
    model = model.cuda()

    # For datasets whose image_size is 224 and also the first task
    if args.use_imagenet_pretrained and model.module.datasets.index(
            args.dataset) == 0:
        curr_model_state_dict = model.state_dict()
        if args.arch == 'custom_vgg':
            state_dict = model_zoo.load_url(model_urls['vgg16_bn'])
            for name, param in state_dict.items():
                if 'classifier' not in name:
                    curr_model_state_dict['module.' + name].copy_(param)
            curr_model_state_dict['module.features.45.weight'].copy_(
                state_dict['classifier.0.weight'])
            curr_model_state_dict['module.features.45.bias'].copy_(
                state_dict['classifier.0.bias'])
            curr_model_state_dict['module.features.48.weight'].copy_(
                state_dict['classifier.3.weight'])
            curr_model_state_dict['module.features.48.bias'].copy_(
                state_dict['classifier.3.bias'])
            if args.dataset == 'imagenet':
                curr_model_state_dict['module.classifiers.0.weight'].copy_(
                    state_dict['classifier.6.weight'])
                curr_model_state_dict['module.classifiers.0.bias'].copy_(
                    state_dict['classifier.6.bias'])
        elif args.arch == 'resnet50':
            state_dict = model_zoo.load_url(model_urls['resnet50'])
            for name, param in state_dict.items():
                if 'fc' not in name:
                    curr_model_state_dict['module.' + name].copy_(param)
            if args.dataset == 'imagenet':
                curr_model_state_dict['module.classifiers.0.weight'].copy_(
                    state_dict['fc.weight'])
                curr_model_state_dict['module.classifiers.0.bias'].copy_(
                    state_dict['fc.bias'])
        else:
            print(
                "Currently, we didn't define the mapping of {} between imagenet pretrained weight and our model"
                .format(args.arch))
            sys.exit(5)

    if not masks:
        for name, module in model.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                mask = torch.ByteTensor(module.weight.data.size()).fill_(0)
                mask = mask.cuda()
                masks[name] = mask
    else:
        # when we expand network, we need to allocate new masks
        NEED_ADJUST_MASK = False
        for name, module in model.named_modules():
            if isinstance(module, nl.SharableConv2d):
                if masks[name].size(1) < module.weight.data.size(1):
                    assert args.mode == 'finetune'
                    NEED_ADJUST_MASK = True
                elif masks[name].size(1) > module.weight.data.size(1):
                    assert args.mode == 'inference'
                    NEED_ADJUST_MASK = True

        if NEED_ADJUST_MASK:
            if args.mode == 'finetune':
                for name, module in model.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        mask = mask.cuda()
                        mask[:masks[name].size(0), :masks[name].
                             size(1), :, :].copy_(masks[name])
                        masks[name] = mask
                    elif isinstance(module, nl.SharableLinear):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        mask = mask.cuda()
                        mask[:masks[name].size(0), :masks[name].size(1)].copy_(
                            masks[name])
                        masks[name] = mask
            elif args.mode == 'inference':
                for name, module in model.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        mask = mask.cuda()
                        mask[:, :, :, :].copy_(
                            masks[name][:mask.size(0), :mask.size(1), :, :])
                        masks[name] = mask
                    elif isinstance(module, nl.SharableLinear):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(0)
                        mask = mask.cuda()
                        mask[:, :].copy_(
                            masks[name][:mask.size(0), :mask.size(1)])
                        masks[name] = mask

    if args.dataset not in shared_layer_info:

        shared_layer_info[args.dataset] = {
            'bias': {},
            'bn_layer_running_mean': {},
            'bn_layer_running_var': {},
            'bn_layer_weight': {},
            'bn_layer_bias': {},
            'piggymask': {}
        }

        piggymasks = {}
        task_id = model.module.datasets.index(args.dataset) + 1
        if task_id > 1:
            for name, module in model.module.named_modules():
                if isinstance(module, nl.SharableConv2d) or isinstance(
                        module, nl.SharableLinear):
                    piggymasks[name] = torch.zeros_like(masks['module.' +
                                                              name],
                                                        dtype=torch.float32)
                    piggymasks[name].fill_(0.01)
                    piggymasks[name] = Parameter(piggymasks[name])
                    module.piggymask = piggymasks[name]
    else:
        piggymasks = shared_layer_info[args.dataset]['piggymask']
        task_id = model.module.datasets.index(args.dataset) + 1
        if task_id > 1:
            for name, module in model.module.named_modules():
                if isinstance(module, nl.SharableConv2d) or isinstance(
                        module, nl.SharableLinear):
                    module.piggymask = piggymasks[name]

    shared_layer_info[args.dataset][
        'network_width_multiplier'] = args.network_width_multiplier

    if 'cropped' in args.dataset:
        train_loader = dataset.train_loader_cropped(args.train_path,
                                                    args.batch_size)
        val_loader = dataset.val_loader_cropped(args.val_path,
                                                args.val_batch_size)
    else:
        train_loader = dataset.train_loader(args.train_path, args.batch_size)
        val_loader = dataset.val_loader(args.val_path, args.val_batch_size)

    # if we are going to save checkpoint in other folder, then we recalculate the starting epoch
    if args.save_folder != args.load_folder:
        start_epoch = 0
    else:
        start_epoch = resume_from_epoch

    curr_prune_step = begin_prune_step = start_epoch * len(train_loader)
    end_prune_step = curr_prune_step + args.pruning_interval * len(
        train_loader)

    manager = Manager(args, model, shared_layer_info, masks, train_loader,
                      val_loader, begin_prune_step, end_prune_step)

    if args.mode == 'inference':
        manager.load_checkpoint_only_for_evaluate(resume_from_epoch,
                                                  resume_folder)
        manager.validate(resume_from_epoch - 1)
        return

    lr = args.lr
    lr_mask = args.lr_mask
    # update all layers
    named_params = dict(model.named_parameters())
    params_to_optimize_via_SGD = []
    named_of_params_to_optimize_via_SGD = []
    masks_to_optimize_via_Adam = []
    named_of_masks_to_optimize_via_Adam = []

    for name, param in named_params.items():
        if 'classifiers' in name:
            if '.{}.'.format(model.module.datasets.index(
                    args.dataset)) in name:
                params_to_optimize_via_SGD.append(param)
                named_of_params_to_optimize_via_SGD.append(name)
            continue
        elif 'piggymask' in name:
            masks_to_optimize_via_Adam.append(param)
            named_of_masks_to_optimize_via_Adam.append(name)
        else:
            params_to_optimize_via_SGD.append(param)
            named_of_params_to_optimize_via_SGD.append(name)

    optimizer_network = optim.SGD(params_to_optimize_via_SGD,
                                  lr=lr,
                                  weight_decay=0.0,
                                  momentum=0.9,
                                  nesterov=True)
    optimizers = Optimizers()
    optimizers.add(optimizer_network, lr)

    if masks_to_optimize_via_Adam:
        optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask)
        optimizers.add(optimizer_mask, lr_mask)

    manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder)
    """Performs training."""
    curr_lrs = []
    for optimizer in optimizers:
        for param_group in optimizer.param_groups:
            curr_lrs.append(param_group['lr'])
            break

    if args.jsonfile is None or not os.path.isfile(args.jsonfile):
        sys.exit(3)
    with open(args.jsonfile, 'r') as jsonfile:
        json_data = json.load(jsonfile)
        baseline_acc = float(json_data[args.dataset])

    if args.mode == 'prune':
        if args.dataset != 'imagenet':
            history_best_avg_val_acc_when_prune = 0.0
            #history_best_avg_val_acc_when_prune = baseline_acc - 0.005
        else:
            if 'vgg' in args.arch:
                baseline_acc = 0.7336
                history_best_avg_val_acc_when_prune = baseline_acc - 0.005
            elif 'resnet50' in args.arch:
                baseline_acc = 0.7616
                history_best_avg_val_acc_when_prune = baseline_acc - 0.005
            else:
                print('Something is wrong')
                exit(1)

        stop_prune = True

        if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder:
            if args.dataset == 'imagenet':
                args.epochs = 10 + resume_from_epoch
            else:
                args.epochs = 20 + resume_from_epoch
        logging.info('')
        logging.info('Before pruning: ')
        logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity,
                                                       args.target_sparsity))
        manager.validate(start_epoch - 1)
        logging.info('')

    elif args.mode == 'finetune':
        manager.pruner.make_finetuning_mask()

        if args.dataset == 'imagenet':
            manager.validate(0)
            manager.save_checkpoint(optimizers, 0, args.save_folder)
            return

        history_best_avg_val_acc = 0.0
        num_epochs_that_criterion_does_not_get_better = 0
        times_of_decaying_learning_rate = 0

    for epoch_idx in range(start_epoch, args.epochs):
        avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx,
                                                       curr_lrs,
                                                       curr_prune_step)
        avg_val_acc = manager.validate(epoch_idx)

        if args.mode == 'prune' and (epoch_idx + 1) >= (
                args.pruning_interval + start_epoch) and (
                    avg_val_acc > history_best_avg_val_acc_when_prune):
            stop_prune = False
            history_best_avg_val_acc_when_prune = avg_val_acc
            if args.save_folder is not None:
                paths = os.listdir(args.save_folder)
                if paths and '.pth.tar' in paths[0]:
                    for checkpoint_file in paths:
                        os.remove(
                            os.path.join(args.save_folder, checkpoint_file))
            else:
                print('Something is wrong! Block the program with pdb')
                pdb.set_trace()

            manager.save_checkpoint(optimizers, epoch_idx, args.save_folder)

        if args.mode == 'finetune':

            if avg_val_acc > history_best_avg_val_acc:
                num_epochs_that_criterion_does_not_get_better = 0
                if args.save_folder is not None:
                    paths = os.listdir(args.save_folder)
                    if paths and '.pth.tar' in paths[0]:
                        for checkpoint_file in paths:
                            os.remove(
                                os.path.join(args.save_folder,
                                             checkpoint_file))
                else:
                    print('Something is wrong! Block the program with pdb')
                    pdb.set_trace()

                history_best_avg_val_acc = avg_val_acc
                manager.save_checkpoint(optimizers, epoch_idx,
                                        args.save_folder)
            else:
                num_epochs_that_criterion_does_not_get_better += 1

            if times_of_decaying_learning_rate >= 3:
                print()
                print(
                    "times_of_decaying_learning_rate reach {}, stop training".
                    format(times_of_decaying_learning_rate))
                break

            if num_epochs_that_criterion_does_not_get_better >= 5:
                times_of_decaying_learning_rate += 1
                num_epochs_that_criterion_does_not_get_better = 0
                for param_group in optimizers[0].param_groups:
                    param_group['lr'] *= 0.1
                curr_lrs[0] = param_group['lr']
                print()
                print("continously {} epochs doesn't get higher acc, "
                      "decay learning rate by multiplying 0.1".format(
                          num_epochs_that_criterion_does_not_get_better))

                if times_of_decaying_learning_rate == 1 and len(
                        optimizers.lrs) == 2:
                    for param_group in optimizers[1].param_groups:
                        param_group['lr'] *= 0.2
                    curr_lrs[1] = param_group['lr']

    print('-' * 16)

    if args.pruning_ratio_to_acc_record_file:
        json_data = {}
        if os.path.isfile(args.pruning_ratio_to_acc_record_file):
            with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file:
                json_data = json.load(json_file)

    if args.mode == 'finetune' and not args.test_piggymask:
        if args.pruning_ratio_to_acc_record_file:
            json_data[0.0] = round(history_best_avg_val_acc, 4)
            with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file:
                json.dump(json_data, json_file)

        if history_best_avg_val_acc - baseline_acc > -0.005:  # TODO
            #json_data = {}
            #json_data['acc_before_prune'] = '{:.4f}'.format(history_best_avg_val_acc)
            #with open(args.tmp_benchmark_file, 'w') as jsonfile:
            #    json.dump(json_data, jsonfile)
            pass
        else:
            print("It's time to expand the Network")
            print('Auto expand network')
            sys.exit(2)

        if manager.pruner.calculate_curr_task_ratio() == 0.0:
            print(
                'There is no left space in convolutional layer for curr task, so needless to prune'
            )
            sys.exit(5)

    elif args.mode == 'prune':
        #        if stop_prune:
        #            print('Acc too low, stop pruning.')
        #            sys.exit(4)
        if args.pruning_ratio_to_acc_record_file:
            json_data[args.target_sparsity] = round(
                history_best_avg_val_acc_when_prune, 4)
            with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file:
                json.dump(json_data, json_file)
Example #9
0
# coding: utf-8
from utils.manager import Manager

__author__ = 'deff'
import sys
import os
if len(sys.argv) < 1:
    print("usage:python  coeus.py sdk_path [-d]")
    print("-d means delete temp file.")
    print("sdk_path: aar or jar or apk")
    sys.exit(0)


sdk_path = sys.argv[1]

if not os.path.exists(sdk_path):
    sdk_path = os.path.join(os.curdir, sdk_path)
    if not os.path.exists(sdk_path):
        print("please input correct sdk_path.")
        sys.exit(0)

m = Manager(sdk_path)
m.start()

if len(sys.argv) > 2 and sys.argv[2] == "-d":
    m.delete()
Example #10
0
import sys
sys.path.append("../")

from utils.manager import Manager

if "__main__" == __name__:
    try:
        m = Manager()
        m.run()
    except KeyboardInterrupt: 
        exit()
Example #11
0
def main():
    args = argparser.get_parser()
    set_seeds(args)
    settings(args)

    # If set > 0, will resume training from a given checkpoint.
    resume_from_epoch, resume_folder = check_resume_epoch(args)

    dataset_history, dataset2num_classes, masks, shared_layer_info = info_reload(resume_from_epoch, args)

    model = build_model(args, dataset_history, dataset2num_classes)

    # Add and set the model dataset.
    model.add_dataset(args.dataset, args.num_classes)
    model.set_dataset(args.dataset)

    masks = load_or_build_masks(masks, model, args)

    model = nn.DataParallel(model)

    shared_layer_info = check_if_need_build_shared_layer_info(args, shared_layer_info)

    train_loader, val_loader, test_loader = load_data(args)

    # if we are going to save checkpoint in other folder, then we recalculate the starting epoch
    start_epoch = calculate_start_epoch(args, resume_from_epoch)

    manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader)

    args.training_steps = args.epochs * len(train_loader)
    optimizers, schedulers = set_optimizers(args, model)
    # manager.save_checkpoint(optimizers, 0, args.save_folder)
    manager.load_checkpoint(resume_from_epoch, resume_folder, args)

    """Performs training."""
    curr_lrs = obtain_curr_lrs(optimizers)

    if args.mode == 'prune':
        print('Sparsity ratio: {}'.format(args.one_shot_prune_perc))
        print('Execute one shot pruning ...')
        manager.one_shot_prune(args.one_shot_prune_perc)
        manager.pruner.apply_mask()
    elif args.mode == 'finetune':
        manager.pruner.make_finetuning_mask()
        logging.info('Finetune stage...')

    freeze_modules(model, args)

    max_val_acc = 0
    max_test_acc = 0
    model = model.cuda()
    for epoch_idx in range(start_epoch, args.epochs):
        need_save = False
        manager.train(optimizers, schedulers, epoch_idx, curr_lrs)
        avg_val_acc = manager.validate(epoch_idx)
        manager.val_loader = test_loader
        logging.info("performance on test")
        test_acc = manager.validate(epoch_idx)
        manager.val_loader = val_loader
        if avg_val_acc >= max_val_acc:
            need_save = True
            max_val_acc = avg_val_acc
            max_test_acc = test_acc

        if need_save:
            check_if_need_remove_checkpoint_files(args)
            manager.save_checkpoint(epoch_idx, args.save_folder)
    logging.info('-' * 16)