def main(): script_dir = os.path.dirname(__file__) module_path = os.path.abspath(os.path.join(script_dir, '..', '..')) global msglogger # Parse arguments args = parser.get_parser().parse_args() if args.epochs is None: args.epochs = 90 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) msglogger = apputils.config_pylogger( os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir, args.verbose) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state( filter(None, [args.compress, args.qe_stats_file ]), # remove both None and empty strings msglogger.logdir, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) if args.evaluate: args.deterministic = True if args.deterministic: distiller.set_deterministic( args.seed) # For experiment reproducability else: if args.seed is not None: distiller.set_seed(args.seed) # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image # classification models, as the input sizes don't change during the run # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3 cudnn.benchmark = True start_epoch = 0 ending_epoch = args.epochs perf_scores_history = [] if args.cpu or not torch.cuda.is_available(): # Set GPU index to -1 if using CPU args.device = 'cpu' args.gpus = -1 else: args.device = 'cuda' if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: raise ValueError( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: raise ValueError( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) if 'cifar' in args.dataset: args.dataset = 'cifar10' args.num_classes = 10 elif 'imagenet' in args.dataset: args.dataset = 'imagenet' args.num_classes = 1000 # Infer the dataset from the model name # args.dataset = distiller.apputils.classification_dataset_str_from_arch(args.arch) # args.num_classes = distiller.apputils.classification_num_classes(args.dataset) if args.earlyexit_thresholds: args.num_exits = len(args.earlyexit_thresholds) + 1 args.loss_exits = [0] * args.num_exits args.losses_exits = [] args.exiterrors = [] # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. # Expanded for hyperspectral datasets # the real load_data call (not wrapper) populates n_classes prior to get_model hyperparams = vars(args) hyperparams.update( {'model': args.arch} ) # for load_data, get_model needs the model name so that hyperparams can be populated prior to retrieving dataset if args.formerly_used_technique is not None: hyperparams.update( {'formerly_used_technique': args.formerly_used_technique}) if args.old_n_components is not None: hyperparams.update({'n_bands': int(args.old_n_components)}) train_loader, val_loader, test_loader, _ = load_data( args, hyperparams=hyperparams) hyperparams = dict((k, v) for k, v in hyperparams.items() if v is not None) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) # Create the model model = create_model(args.pretrained, args.dataset, args.arch, parallel=not args.load_serialized, device_ids=args.gpus, hyperparams=hyperparams) compression_scheduler = None # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(msglogger.logdir) pylogger = PythonLogger(msglogger) # capture thresholds for early-exit training if args.earlyexit_thresholds: msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds) # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1 if args.deprecated_resume: msglogger.warning( 'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.' ) if not args.reset_optimizer: msglogger.warning( 'If you wish to also reset the optimizer, call with: --reset-optimizer' ) args.reset_optimizer = True args.resumed_checkpoint_path = args.deprecated_resume # We can optionally resume from a checkpoint optimizer = None if args.resumed_checkpoint_path: model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint( model, args.resumed_checkpoint_path, model_device=args.device) elif args.load_model_path: model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) if args.reset_optimizer: start_epoch = 0 if optimizer is not None: optimizer = None msglogger.info( '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0' ) # Define loss function (criterion) criterion = nn.CrossEntropyLoss().to(args.device) if optimizer is None: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) if args.AMC: return automated_deep_compression(model, criterion, optimizer, pylogger, args) if args.greedy: return greedy(model, criterion, optimizer, pylogger, args) # This sample application can be invoked to produce various summary reports. if args.summary: for summary in args.summary: distiller.model_summary(model, summary, args.dataset) return if args.export_onnx is not None: return distiller.export_img_classifier_to_onnx(model, os.path.join( msglogger.logdir, args.export_onnx), args.dataset, add_softmax=True, verbose=False) if args.qe_calibration: return acts_quant_stats_collection(model, criterion, pylogger, args) if args.activation_histograms: return acts_histogram_collection(model, criterion, pylogger, args) activations_collectors = create_activation_stats_collectors( model, *args.activation_stats) # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. train_loader, val_loader, test_loader, _ = load_data( args, hyperparams=hyperparams) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) args.num_classes = hyperparams['n_classes'] if args.sensitivity is not None: sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2]) return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities) if args.evaluate: return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args, compression_scheduler) if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. compression_scheduler = distiller.file_config( model, optimizer, args.compress, compression_scheduler, (start_epoch - 1) if args.resumed_checkpoint_path else None) # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer) model.to(args.device) elif compression_scheduler is None: compression_scheduler = distiller.CompressionScheduler(model) if args.thinnify: #zeros_mask_dict = distiller.create_model_masks_dict(model) assert args.resumed_checkpoint_path is not None, \ "You must use --resume-from to provide a checkpoint file to thinnify" distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None) apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler, name="{}_thinned".format( args.resumed_checkpoint_path.replace( ".pth.tar", "")), dir=msglogger.logdir) print( "Note: your model may have collapsed to random inference, so you may want to fine-tune" ) return args.kd_policy = None if args.kd_teacher: teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus) if args.kd_resume: teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume) dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt) args.kd_policy = distiller.KnowledgeDistillationPolicy( model, teacher, args.kd_temp, dlw) compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs, frequency=1) msglogger.info('\nStudent-Teacher knowledge distillation enabled:') msglogger.info('\tTeacher Model: %s', args.kd_teacher) msglogger.info('\tTemperature: %s', args.kd_temp) msglogger.info('\tLoss Weights (distillation | student | teacher): %s', ' | '.join(['{:.2f}'.format(val) for val in dlw])) msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch) if start_epoch >= ending_epoch: msglogger.error( 'epoch count is too low, starting epoch is {} but total epochs set to {}' .format(start_epoch, ending_epoch)) raise ValueError('Epochs parameter is too low. Nothing to do.') for epoch in range(start_epoch, ending_epoch): # This is the main training loop. msglogger.info('\n') if compression_scheduler: compression_scheduler.on_epoch_begin( epoch, metrics=(vloss if (epoch != start_epoch) else 10**6)) # Train for one epoch with collectors_context(activations_collectors["train"]) as collectors: train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers=[tflogger, pylogger], args=args) distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger]) distiller.log_activation_statsitics( epoch, "train", loggers=[tflogger], collector=collectors["sparsity"]) if args.masks_sparsity: msglogger.info( distiller.masks_sparsity_tbl_summary( model, compression_scheduler)) # evaluate on validation set with collectors_context(activations_collectors["valid"]) as collectors: top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch) distiller.log_activation_statsitics( epoch, "valid", loggers=[tflogger], collector=collectors["sparsity"]) save_collectors_data(collectors, msglogger.logdir) stats = ('Performance/Validation/', OrderedDict([('Loss', vloss), ('Top1', top1), ('Top5', top5)])) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) if compression_scheduler: compression_scheduler.on_epoch_end(epoch, optimizer) # Update the list of top scores achieved so far, and save the checkpoint update_training_scores_history(perf_scores_history, model, top1, top5, epoch, args.num_best_scores) is_best = epoch == perf_scores_history[0].epoch checkpoint_extras = { 'current_top1': top1, 'best_top1': perf_scores_history[0].top1, 'best_epoch': perf_scores_history[0].epoch } apputils.save_checkpoint(epoch, args.arch, model, optimizer=optimizer, scheduler=compression_scheduler, extras=checkpoint_extras, is_best=is_best, name=args.name, dir=msglogger.logdir) # Finally run results on the test set test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
def calib_eval_fn(model): classifier.test(calib_data_loader, model, cc.criterion, [], None, args) model = create_model(args.pretrained, args.dataset, args.arch, parallel=not args.load_serialized, device_ids=args.gpus) args.device = next(model.parameters()).device if args.resumed_checkpoint_path: args.load_model_path = args.resumed_checkpoint_path if args.load_model_path: msglogger.info("Loading checkpoint from %s" % args.load_model_path) model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) dummy_input = torch.rand(*model.input_shape, device=args.device) if args.qe_stats_file: msglogger.info("Loading stats from %s" % args.qe_stats_file) with open(args.qe_stats_file, 'r') as f: act_stats = distiller.yaml_ordered_load(f) else: act_stats = None model, overrides = ptq_greedy_search(model, dummy_input, test_fn, calib_eval_fn=calib_eval_fn, args=args, act_stats=act_stats) # Prepare a compression scheduler yaml config file:
def arbitrary_channel_pruning(config, channels_to_remove, is_parallel): """Test removal of arbitrary channels. The test receives a specification of channels to remove. Based on this specification, the channels are pruned and then physically removed from the model (via a "thinning" process). """ model, zeros_mask_dict = common.setup_test(config.arch, config.dataset, is_parallel) pair = config.module_pairs[0] conv2 = common.find_module_by_name(model, pair[1]) assert conv2 is not None # Test that we can access the weights tensor of the first convolution in layer 1 conv2_p = distiller.model_find_param(model, pair[1] + ".weight") assert conv2_p is not None assert conv2_p.dim() == 4 num_channels = conv2_p.size(1) cnt_nnz_channels = num_channels - len(channels_to_remove) mask = create_channels_mask(conv2_p, channels_to_remove) assert distiller.density_ch(mask) == ( conv2.in_channels - len(channels_to_remove)) / conv2.in_channels # Cool, so now we have a mask for pruning our channels. # Use the mask to prune zeros_mask_dict[pair[1] + ".weight"].mask = mask zeros_mask_dict[pair[1] + ".weight"].apply_mask(conv2_p) all_channels = set([ch for ch in range(num_channels)]) nnz_channels = set( distiller.find_nonzero_channels_list(conv2_p, pair[1] + ".weight")) channels_removed = all_channels - nnz_channels logger.info("Channels removed {}".format(channels_removed)) # Now, let's do the actual network thinning distiller.remove_channels(model, zeros_mask_dict, config.arch, config.dataset, optimizer=None) conv1 = common.find_module_by_name(model, pair[0]) assert conv1 assert conv1.out_channels == cnt_nnz_channels assert conv2.in_channels == cnt_nnz_channels assert conv1.weight.size(0) == cnt_nnz_channels assert conv2.weight.size(1) == cnt_nnz_channels if config.bn_name is not None: bn1 = common.find_module_by_name(model, config.bn_name) assert bn1.running_var.size(0) == cnt_nnz_channels assert bn1.running_mean.size(0) == cnt_nnz_channels assert bn1.num_features == cnt_nnz_channels assert bn1.bias.size(0) == cnt_nnz_channels assert bn1.weight.size(0) == cnt_nnz_channels dummy_input = distiller.get_dummy_input(config.dataset, distiller.model_device(model)) optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.1) run_forward_backward(model, optimizer, dummy_input) # Let's test saving and loading a thinned model. # We save 3 times, and load twice, to make sure to cover some corner cases: # - Make sure that after loading, the model still has hold of the thinning recipes # - Make sure that after a 2nd load, there no problem loading (in this case, the # - tensors are already thin, so this is a new flow) # (1) save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None) model_2 = create_model(False, config.dataset, config.arch, parallel=is_parallel) model(dummy_input) model_2(dummy_input) conv2 = common.find_module_by_name(model_2, pair[1]) assert conv2 is not None with pytest.raises(KeyError): model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar') compression_scheduler = distiller.CompressionScheduler(model) hasattr(model, 'thinning_recipes') run_forward_backward(model, optimizer, dummy_input) # (2) save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None, scheduler=compression_scheduler) model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar') assert hasattr(model_2, 'thinning_recipes') logger.info("test_arbitrary_channel_pruning - Done") # (3) save_checkpoint(epoch=0, arch=config.arch, model=model_2, optimizer=None, scheduler=compression_scheduler) model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar') assert hasattr(model_2, 'thinning_recipes') logger.info("test_arbitrary_channel_pruning - Done 2")
def test_load_lean_checkpoint_2(): checkpoint_filename = '../examples/ssl/checkpoints/checkpoint_trained_dense.pth.tar' model = create_model(False, 'cifar10', 'resnet20_cifar', 0) model = load_lean_checkpoint(model, checkpoint_filename)
def main(): global msglogger script_dir = os.path.dirname(__file__) args = parse_args() # Distiller loggers msglogger = apputils.config_pylogger('logging.conf', args.name, output_dir=args.output_dir) tflogger = TensorBoardLogger(msglogger.logdir) # tflogger.log_gradients = True # pylogger = PythonLogger(msglogger) if args.seed is not None: msglogger.info("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) np.random.seed(seed=args.seed) args.qe_mode = str(args.qe_mode).split('.')[1] args.qe_clip_acts = str(args.qe_clip_acts).split('.')[1] apputils.log_execution_env_state(sys.argv) if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: msglogger.error( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) exit(1) if len(args.gpus) > 1: msglogger.error('ERROR: Only single GPU supported for NCF') exit(1) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: msglogger.error( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) exit(1) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Save configuration to file config = {k: v for k, v in args.__dict__.items()} config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp()) config['local_timestamp'] = str(datetime.now()) run_dir = msglogger.logdir msglogger.info("Saving config and results to {}".format(run_dir)) if not os.path.exists(run_dir) and run_dir != '': os.makedirs(run_dir) utils.save_config(config, run_dir) # Check that GPUs are actually available use_cuda = not args.no_cuda and torch.cuda.is_available() t1 = time.time() # Load Data training = not (args.eval or args.qe_calibration or args.activation_histograms) msglogger.info('Loading data') if training: train_dataset = CFTrainDataset( os.path.join(args.data, TRAIN_RATINGS_FILENAME), args.negative_samples) train_dataloader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items else: train_dataset = None train_dataloader = None nb_users, nb_items = (138493, 26744) test_ratings = load_test_ratings( os.path.join(args.data, TEST_RATINGS_FILENAME)) # noqa: E501 test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME)) msglogger.info( 'Load data done [%.1f s]. #user=%d, #item=%d, #train=%s, #test=%d' % (time.time() - t1, nb_users, nb_items, str(train_dataset.mat.nnz) if training else 'N/A', len(test_ratings))) # Create model model = NeuMF(nb_users, nb_items, mf_dim=args.factors, mf_reg=0., mlp_layer_sizes=args.layers, mlp_layer_regs=[0. for i in args.layers], split_final=args.split_final) if use_cuda: model = model.cuda() msglogger.info(model) msglogger.info("{} parameters".format(utils.count_parameters(model))) # Save model text description with open(os.path.join(run_dir, 'model.txt'), 'w') as file: file.write(str(model)) compression_scheduler = None start_epoch = 0 optimizer = None if args.load: if training: model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint( model, args.load) if args.reset_optimizer: start_epoch = 0 optimizer = None else: model = apputils.load_lean_checkpoint(model, args.load) # Add loss to graph criterion = nn.BCEWithLogitsLoss() if use_cuda: criterion = criterion.cuda() if training and optimizer is None: optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) if args.compress: compression_scheduler = distiller.file_config(model, optimizer, args.compress) model.cuda() # Create files for tracking training valid_results_file = os.path.join(run_dir, 'valid_results.csv') if args.qe_calibration or args.activation_histograms: calib = { 'portion': args.qe_calibration, 'desc_str': 'quantization calibration stats', 'collect_func': partial(distiller.data_loggers.collect_quant_stats, inplace_runtime_check=True, disable_inplace_attrs=True) } hists = { 'portion': args.activation_histograms, 'desc_str': 'activation histograms', 'collect_func': partial(distiller.data_loggers.collect_histograms, activation_stats=None, nbins=2048, save_hist_imgs=True) } d = calib if args.qe_calibration else hists distiller.utils.assign_layer_fq_names(model) num_users = int(np.floor(len(test_ratings) * d['portion'])) msglogger.info( "Generating {} based on {:.1%} of the test-set ({} users)".format( d['desc_str'], d['portion'], num_users)) test_fn = partial(val_epoch, ratings=test_ratings, negs=test_negs, K=args.topk, use_cuda=use_cuda, processes=args.processes, num_users=num_users) d['collect_func'](model=model, test_fn=test_fn, save_dir=run_dir, classes=None) return 0 if args.eval: if args.quantize_eval and args.qe_calibration is None: model.cpu() quantizer = quantization.PostTrainLinearQuantizer.from_args( model, args) dummy_input = (torch.tensor([1]), torch.tensor([1]), torch.tensor([True], dtype=torch.bool)) quantizer.prepare_model(dummy_input) model.cuda() distiller.utils.assign_layer_fq_names(model) if args.eval_fp16: model = model.half() # Calculate initial Hit Ratio and NDCG begin = time.time() hits, ndcgs = val_epoch(model, test_ratings, test_negs, args.topk, use_cuda=use_cuda, processes=args.processes) val_time = time.time() - begin hit_rate = np.mean(hits) msglogger.info( 'Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, val_time = {val_time:.2f}' .format(K=args.topk, hit_rate=hit_rate, ndcg=np.mean(ndcgs), val_time=val_time)) hit_rate = 0 if args.quantize_eval: checkpoint_name = 'quantized' apputils.save_checkpoint(0, 'NCF', model, optimizer=None, extras={'quantized_hr@10': hit_rate}, name='_'.join([args.name, 'quantized']) if args.name else checkpoint_name, dir=msglogger.logdir) return 0 total_samples = len(train_dataloader.sampler) steps_per_epoch = math.ceil(total_samples / args.batch_size) best_hit_rate = 0 best_epoch = 0 for epoch in range(start_epoch, args.epochs): msglogger.info('') model.train() losses = utils.AverageMeter() begin = time.time() if compression_scheduler: compression_scheduler.on_epoch_begin(epoch, optimizer) loader = tqdm.tqdm(train_dataloader) for batch_index, (user, item, label) in enumerate(loader): user = torch.autograd.Variable(user, requires_grad=False) item = torch.autograd.Variable(item, requires_grad=False) label = torch.autograd.Variable(label, requires_grad=False) if use_cuda: user = user.cuda(async=True) item = item.cuda(async=True) label = label.cuda(async=True) if compression_scheduler: compression_scheduler.on_minibatch_begin( epoch, batch_index, steps_per_epoch, optimizer) outputs = model(user, item, torch.tensor([False], dtype=torch.bool)) loss = criterion(outputs, label) if compression_scheduler: compression_scheduler.before_backward_pass( epoch, batch_index, steps_per_epoch, loss, optimizer, return_loss_components=False) losses.update(loss.data.item(), user.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() if compression_scheduler: compression_scheduler.on_minibatch_end(epoch, batch_index, steps_per_epoch, optimizer) # Save stats to file description = ( 'Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, loss=losses)) loader.set_description(description) steps_completed = batch_index + 1 if steps_completed % args.log_freq == 0: stats_dict = OrderedDict() stats_dict['Loss'] = losses.avg stats = ('Performance/Training/', stats_dict) params = model.named_parameters( ) if args.log_params_histograms else None distiller.log_training_progress(stats, params, epoch, steps_completed, steps_per_epoch, args.log_freq, [tflogger]) tflogger.log_model_buffers(model, ['tracked_min', 'tracked_max'], 'Quant/Train/Acts/TrackedMinMax', epoch, steps_completed, steps_per_epoch, args.log_freq) train_time = time.time() - begin begin = time.time() hits, ndcgs = val_epoch(model, test_ratings, test_negs, args.topk, use_cuda=use_cuda, output=valid_results_file, epoch=epoch, processes=args.processes) val_time = time.time() - begin if compression_scheduler: compression_scheduler.on_epoch_end(epoch, optimizer) hit_rate = np.mean(hits) mean_ndcgs = np.mean(ndcgs) stats_dict = OrderedDict() stats_dict['HR@{0}'.format(args.topk)] = hit_rate stats_dict['NDCG@{0}'.format(args.topk)] = mean_ndcgs stats = ('Performance/Validation/', stats_dict) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) msglogger.info( 'Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, AvgTrainLoss = {loss.avg:.4f}, ' 'train_time = {train_time:.2f}, val_time = {val_time:.2f}'.format( epoch=epoch, K=args.topk, hit_rate=hit_rate, ndcg=mean_ndcgs, loss=losses, train_time=train_time, val_time=val_time)) is_best = False if hit_rate > best_hit_rate: best_hit_rate = hit_rate is_best = True best_epoch = epoch extras = { 'current_hr@10': hit_rate, 'best_hr@10': best_hit_rate, 'best_epoch': best_epoch } apputils.save_checkpoint(epoch, 'NCF', model, optimizer, compression_scheduler, extras, is_best, dir=run_dir) if args.threshold is not None: if np.mean(hits) >= args.threshold: msglogger.info("Hit threshold of {}".format(args.threshold)) break
def _init_learner(args): # Create the model model = create_model(args.pretrained, args.dataset, args.arch, parallel=not args.load_serialized, device_ids=args.gpus) compression_scheduler = None # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1 if args.deprecated_resume: msglogger.warning( 'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.' ) if not args.reset_optimizer: msglogger.warning( 'If you wish to also reset the optimizer, call with: --reset-optimizer' ) args.reset_optimizer = True args.resumed_checkpoint_path = args.deprecated_resume optimizer = None start_epoch = 0 if args.resumed_checkpoint_path: model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint( model, args.resumed_checkpoint_path, model_device=args.device) elif args.load_model_path: model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) if args.reset_optimizer: start_epoch = 0 if optimizer is not None: optimizer = None msglogger.info( '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0' ) if optimizer is None: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.debug('Optimizer Type: %s', type(optimizer)) msglogger.debug('Optimizer Args: %s', optimizer.defaults) if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. compression_scheduler = distiller.file_config( model, optimizer, args.compress, compression_scheduler, (start_epoch - 1) if args.resumed_checkpoint_path else None) # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer) model.to(args.device) elif compression_scheduler is None: compression_scheduler = distiller.CompressionScheduler(model) ending_epoch = args.epochs if start_epoch >= ending_epoch: msglogger.error( 'epoch count is too low, starting epoch is {} but total epochs set to {}' .format(start_epoch, ending_epoch)) raise ValueError('Epochs parameter is too low. Nothing to do.') return model, compression_scheduler, optimizer, start_epoch, ending_epoch
def image_classifier_ptq_lapq(model, criterion, loggers, args): # data loader function for splitting the validation set. args = deepcopy(args) effective_test_size_bak = args.effective_test_size args.effective_test_size = args.lapq_eval_size eval_data_loader = classifier.load_data(args, load_train=False, load_val=False, load_test=True, fixed_subset=True) args.effective_test_size = effective_test_size_bak test_data_loader = classifier.load_data(args, load_train=False, load_val=False, load_test=True) model = model.eval() device = next(model.parameters()).device if args.lapq_eval_memoize_dataloader: images_batches = [] targets_batches = [] for images, targets in eval_data_loader: images_batches.append(images.to(device)) targets_batches.append(targets.to(device)) memoized_data_loader = [(torch.cat(images_batches), torch.cat(targets_batches))] else: memoized_data_loader = None def eval_fn(model): if memoized_data_loader: loss = 0 for images, targets in memoized_data_loader: outputs = model(images) loss += criterion(outputs, targets).item() loss = loss / len(memoized_data_loader) else: _, _, loss = classifier.test(eval_data_loader, model, criterion, loggers, None, args) return loss def test_fn(model): top1, top5, loss = classifier.test(test_data_loader, model, criterion, loggers, None, args) return OrderedDict([('top-1', top1), ('top-5', top5), ('loss', loss)]) args.device = device if args.resumed_checkpoint_path: args.load_model_path = args.resumed_checkpoint_path if args.load_model_path: msglogger.info("Loading checkpoint from %s" % args.load_model_path) model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) quantizer = distiller.quantization.PostTrainLinearQuantizer.from_args(model, args) dummy_input = torch.rand(*model.input_shape, device=args.device) model, qp_dict = lapq.ptq_coordinate_search(quantizer, dummy_input, eval_fn, test_fn=test_fn, **lapq.cmdline_args_to_dict(args)) results = test_fn(quantizer.model) msglogger.info("Arch: %s \tTest: \t top1 = %.3f \t top5 = %.3f \t loss = %.3f" % (args.arch, results['top-1'], results['top-5'], results['loss'])) distiller.yaml_ordered_save('%s.quant_params_dict.yaml' % args.arch, qp_dict) distiller.apputils.save_checkpoint(0, args.arch, model, extras={'top1': results['top-1'], 'qp_dict': qp_dict}, name=args.name, dir=msglogger.logdir)
def main(): script_dir = os.path.dirname(__file__) module_path = os.path.abspath(os.path.join(script_dir, '..', '..')) global msglogger # Parse arguments args = parser.get_parser().parse_args() if args.epochs is None: args.epochs = 90 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) msglogger = apputils.config_pylogger( os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state(args.compress, msglogger.logdir, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) start_epoch = 0 ending_epoch = args.epochs perf_scores_history = [] if args.evaluate: args.deterministic = True if args.deterministic: # Experiment reproducibility is sometimes important. Pete Warden expounded about this # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ distiller.set_deterministic( ) # Use a well-known seed, for repeatability of experiments else: # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image # classification models, as the input sizes don't change during the run # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3 cudnn.benchmark = True if args.cpu or not torch.cuda.is_available(): # Set GPU index to -1 if using CPU args.device = 'cpu' args.gpus = -1 else: args.device = 'cuda' if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: raise ValueError( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: raise ValueError( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Infer the dataset from the model name args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet' args.num_classes = 10 if args.dataset == 'cifar10' else 1000 # Create the model model = create_model(args.pretrained, args.dataset, args.arch, parallel=not args.load_serialized, device_ids=args.gpus) compression_scheduler = None optimizer = None if args.resumed_checkpoint_path: model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint( model, args.resumed_checkpoint_path, use_swa_model=args.use_swa_model, model_device=args.device) elif args.load_model_path: model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) if args.reset_optimizer: start_epoch = 0 if optimizer is not None: optimizer = None msglogger.info( '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0' ) # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(msglogger.logdir) pylogger = PythonLogger(msglogger) # Define loss function (criterion) criterion = nn.CrossEntropyLoss().to(args.device) if optimizer is None: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) # This sample application can be invoked to produce various summary reports. if args.summary: return summarize_model(model, args.dataset, which_summary=args.summary) activations_collectors = create_activation_stats_collectors( model, *args.activation_stats) if args.qe_calibration: msglogger.info('Quantization calibration stats collection enabled:') msglogger.info( '\tStats will be collected for {:.1%} of test dataset'.format( args.qe_calibration)) msglogger.info( '\tSetting constant seeds and converting model to serialized execution' ) distiller.set_deterministic() model = distiller.make_non_parallel_copy(model) activations_collectors.update( create_quantization_stats_collector(model)) args.evaluate = True args.effective_test_size = args.qe_calibration # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. train_loader, val_loader, test_loader, _ = apputils.load_data( args.dataset, os.path.expanduser(args.data), args.batch_size, args.workers, args.validation_split, args.deterministic, args.effective_train_size, args.effective_valid_size, args.effective_test_size) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) if args.evaluate: return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args, compression_scheduler)
def main(): script_dir = os.path.dirname(__file__) module_path = os.path.abspath(os.path.join(script_dir, '..', '..')) global msglogger # Parse arguments args = parser.get_parser().parse_args() if args.epochs is None: args.epochs = 90 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) msglogger = apputils.config_pylogger( os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state(args.compress, msglogger.logdir, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) start_epoch = 0 ending_epoch = args.epochs perf_scores_history = [] if args.deterministic: # Experiment reproducibility is sometimes important. Pete Warden expounded about this # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ # In Pytorch, support for deterministic execution is still a bit clunky. if args.workers > 1: raise ValueError( 'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1' ) # Use a well-known seed, for repeatability of experiments distiller.set_deterministic() else: # This issue: https://github.com/pytorch/pytorch/issues/3659 # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled. cudnn.benchmark = True if args.cpu or not torch.cuda.is_available(): # Set GPU index to -1 if using CPU args.device = 'cpu' args.gpus = -1 else: args.device = 'cuda' if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: raise ValueError( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: raise ValueError( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Infer the dataset from the model name args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet' args.num_classes = 10 if args.dataset == 'cifar10' else 1000 if args.earlyexit_thresholds: args.num_exits = len(args.earlyexit_thresholds) + 1 args.loss_exits = [0] * args.num_exits args.losses_exits = [] args.exiterrors = [] # Create the model model = create_model(args.pretrained, args.dataset, args.arch, parallel=not args.load_serialized, device_ids=args.gpus) compression_scheduler = None # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(msglogger.logdir) pylogger = PythonLogger(msglogger) # capture thresholds for early-exit training if args.earlyexit_thresholds: msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds) # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1 if args.deprecated_resume: msglogger.warning( 'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.' ) if not args.reset_optimizer: msglogger.warning( 'If you wish to also reset the optimizer, call with: --reset-optimizer' ) args.reset_optimizer = True args.resumed_checkpoint_path = args.deprecated_resume # We can optionally resume from a checkpoint optimizer = None if args.resumed_checkpoint_path: model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint( model, args.resumed_checkpoint_path, model_device=args.device) elif args.load_model_path: model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) if args.reset_optimizer: start_epoch = 0 if optimizer is not None: optimizer = None msglogger.info( '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0' ) # Define loss function (criterion) criterion = nn.CrossEntropyLoss().to(args.device) if optimizer is None: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) if args.AMC: return automated_deep_compression(model, criterion, optimizer, pylogger, args) if args.greedy: return greedy(model, criterion, optimizer, pylogger, args) # This sample application can be invoked to produce various summary reports. if args.summary: return summarize_model(model, args.dataset, which_summary=args.summary) activations_collectors = create_activation_stats_collectors( model, *args.activation_stats) if args.qe_calibration: msglogger.info('Quantization calibration stats collection enabled:') msglogger.info( '\tStats will be collected for {:.1%} of test dataset'.format( args.qe_calibration)) msglogger.info( '\tSetting constant seeds and converting model to serialized execution' ) distiller.set_deterministic() model = distiller.make_non_parallel_copy(model) activations_collectors.update( create_quantization_stats_collector(model)) args.evaluate = True args.effective_test_size = args.qe_calibration # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. train_loader, val_loader, test_loader, _ = apputils.load_data( args.dataset, os.path.expanduser(args.data), args.batch_size, args.workers, args.validation_split, args.deterministic, args.effective_train_size, args.effective_valid_size, args.effective_test_size) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) if args.sensitivity is not None: sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2]) return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities) if args.evaluate: return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args, compression_scheduler) if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. compression_scheduler = distiller.file_config( model, optimizer, args.compress, compression_scheduler, (start_epoch - 1) if args.resumed_checkpoint_path else None) # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer) model.to(args.device) elif compression_scheduler is None: compression_scheduler = distiller.CompressionScheduler(model) if args.thinnify: #zeros_mask_dict = distiller.create_model_masks_dict(model) assert args.resumed_checkpoint_path is not None, \ "You must use --resume-from to provide a checkpoint file to thinnify" distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None) apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler, name="{}_thinned".format( args.resumed_checkpoint_path.replace( ".pth.tar", "")), dir=msglogger.logdir) print( "Note: your model may have collapsed to random inference, so you may want to fine-tune" ) return args.kd_policy = None if args.kd_teacher: teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus) if args.kd_resume: teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume) dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt) args.kd_policy = distiller.KnowledgeDistillationPolicy( model, teacher, args.kd_temp, dlw) compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs, frequency=1) msglogger.info('\nStudent-Teacher knowledge distillation enabled:') msglogger.info('\tTeacher Model: %s', args.kd_teacher) msglogger.info('\tTemperature: %s', args.kd_temp) msglogger.info('\tLoss Weights (distillation | student | teacher): %s', ' | '.join(['{:.2f}'.format(val) for val in dlw])) msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch) if start_epoch >= ending_epoch: msglogger.error( 'epoch count is too low, starting epoch is {} but total epochs set to {}' .format(start_epoch, ending_epoch)) raise ValueError('Epochs parameter is too low. Nothing to do.') for epoch in range(start_epoch, ending_epoch): # This is the main training loop. msglogger.info('\n') if compression_scheduler: compression_scheduler.on_epoch_begin( epoch, metrics=(vloss if (epoch != start_epoch) else 10**6)) # Train for one epoch with collectors_context(activations_collectors["train"]) as collectors: train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers=[tflogger, pylogger], args=args) distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger]) distiller.log_activation_statsitics( epoch, "train", loggers=[tflogger], collector=collectors["sparsity"]) if args.masks_sparsity: msglogger.info( distiller.masks_sparsity_tbl_summary( model, compression_scheduler)) # evaluate on validation set with collectors_context(activations_collectors["valid"]) as collectors: top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch) distiller.log_activation_statsitics( epoch, "valid", loggers=[tflogger], collector=collectors["sparsity"]) save_collectors_data(collectors, msglogger.logdir) stats = ('Performance/Validation/', OrderedDict([('Loss', vloss), ('Top1', top1), ('Top5', top5)])) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) if compression_scheduler: compression_scheduler.on_epoch_end(epoch, optimizer) # Update the list of top scores achieved so far, and save the checkpoint update_training_scores_history(perf_scores_history, model, top1, top5, epoch, args.num_best_scores) is_best = epoch == perf_scores_history[0].epoch checkpoint_extras = { 'current_top1': top1, 'best_top1': perf_scores_history[0].top1, 'best_epoch': perf_scores_history[0].epoch } apputils.save_checkpoint(epoch, args.arch, model, optimizer=optimizer, scheduler=compression_scheduler, extras=checkpoint_extras, is_best=is_best, name=args.name, dir=msglogger.logdir) # Finally run results on the test set test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
def main(): script_dir = os.path.dirname(__file__) module_path = os.path.abspath(os.path.join(script_dir, '..', '..')) global msglogger # Parse arguments args = parser.get_parser().parse_args() if args.epochs is None: args.epochs = 200 if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) msglogger = apputils.config_pylogger( os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state( filter(None, [args.compress, args.qe_stats_file ]), # remove both None and empty strings msglogger.logdir, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) if args.evaluate: args.deterministic = True if args.deterministic: distiller.set_deterministic( args.seed) # For experiment reproducability else: if args.seed is not None: distiller.set_seed(args.seed) # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image # classification models, as the input sizes don't change during the run # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3 cudnn.benchmark = True start_epoch = 0 ending_epoch = args.epochs perf_scores_history = [] if args.cpu or not torch.cuda.is_available(): # Set GPU index to -1 if using CPU args.device = 'cpu' args.gpus = -1 else: args.device = 'cuda' if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: raise ValueError( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: raise ValueError( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Infer the dataset from the model name # TODO args.dataset = 'coco' # args.num_classes = 21 # wc -l ~/data/VOC2012/voc-model-labels.txt if args.load_vgg19 and args.arch != 'vgg19': raise ValueError( '``load_vgg19`` should be set only when vgg19 is used') model = create_pose_estimation_model(args.pretrained, args.dataset, args.arch, load_vgg19=args.load_vgg19, parallel=not args.load_serialized, device_ids=args.gpus) compression_scheduler = None # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(msglogger.logdir) pylogger = PythonLogger(msglogger) # <editor-fold desc=">>> Load Model"> # We can optionally resume from a checkpoint optimizer = None if args.resumed_checkpoint_path: model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint( model, args.resumed_checkpoint_path, model_device=args.device) elif args.load_model_path: model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) if args.reset_optimizer: start_epoch = 0 if optimizer is not None: optimizer = None msglogger.info( '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0' ) # </editor-fold> # Define loss function (criterion) # get_loss(saved_for_loss, heat_temp, heat_weight,vec_temp, vec_weight) criterion = { 'shufflenetv2': shufflenetv2_get_loss, 'vgg19': vgg19_get_loss, 'hourglass': hourglass_get_loss, }[args.arch] if optimizer is None: trainable_vars = [ param for param in model.parameters() if param.requires_grad ] optimizer = torch.optim.SGD(trainable_vars, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) # TODO: load lr_scheduler lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=5, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=3, min_lr=0, eps=1e-08) if args.AMC: return automated_deep_compression(model, criterion, optimizer, pylogger, args) if args.greedy: return greedy(model, criterion, optimizer, pylogger, args) # This sample application can be invoked to produce various summary reports. if args.summary: for summary in args.summary: distiller.model_summary(model, summary, args.dataset) return if args.export_onnx is not None: return distiller.export_img_classifier_to_onnx(model, os.path.join( msglogger.logdir, args.export_onnx), args.dataset, add_softmax=True, verbose=False) if args.qe_calibration: return acts_quant_stats_collection(model, criterion, pylogger, args) if args.activation_histograms: return acts_histogram_collection(model, criterion, pylogger, args) print('Building activations_collectors...') activations_collectors = create_activation_stats_collectors( model, *args.activation_stats) # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. print('Loading data...') train_loader, val_loader, test_loader, _ = load_data(args) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) if args.sensitivity is not None: sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2]) return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities) if args.evaluate: return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args, compression_scheduler) if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. compression_scheduler = distiller.file_config( model, optimizer, args.compress, compression_scheduler, (start_epoch - 1) if args.resumed_checkpoint_path else None) # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer) model.to(args.device) elif compression_scheduler is None: compression_scheduler = distiller.CompressionScheduler(model) if args.thinnify: # zeros_mask_dict = distiller.create_model_masks_dict(model) assert args.resumed_checkpoint_path is not None, \ "You must use --resume-from to provide a checkpoint file to thinnify" distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None) apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler, name="{}_thinned".format( args.resumed_checkpoint_path.replace( ".pth.tar", "")), dir=msglogger.logdir) print( "Note: your model may have collapsed to random inference, so you may want to fine-tune" ) return if start_epoch >= ending_epoch: msglogger.error( 'epoch count is too low, starting epoch is {} but total epochs set to {}' .format(start_epoch, ending_epoch)) raise ValueError('Epochs parameter is too low. Nothing to do.') for epoch in range(start_epoch, ending_epoch): # This is the main training loop. msglogger.info('\n') if compression_scheduler: compression_scheduler.on_epoch_begin( epoch, metrics=(total_loss if (epoch != start_epoch) else 10**6)) # Train for one epoch with collectors_context(activations_collectors["train"]) as collectors: train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers=[tflogger, pylogger], args=args) distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger]) distiller.log_activation_statsitics( epoch, "train", loggers=[tflogger], collector=collectors["sparsity"]) if args.masks_sparsity: msglogger.info( distiller.masks_sparsity_tbl_summary( model, compression_scheduler)) # evaluate on validation set with collectors_context(activations_collectors["valid"]) as collectors: loss = validate(val_loader, model, criterion, [pylogger], args, epoch) distiller.log_activation_statsitics( epoch, "valid", loggers=[tflogger], collector=collectors["sparsity"]) save_collectors_data(collectors, msglogger.logdir) lr_scheduler.step(loss) stats = ('Performance/Validation/', OrderedDict([('Loss', loss)])) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) if compression_scheduler: compression_scheduler.on_epoch_end(epoch, optimizer) # Update the list of top scores achieved so far, and save the checkpoint update_training_scores_history(perf_scores_history, model, loss, epoch, args.num_best_scores) is_best = epoch == perf_scores_history[0].epoch checkpoint_extras = { 'current_loss': loss, 'best_loss': perf_scores_history[0].loss, 'best_epoch': perf_scores_history[0].epoch } apputils.save_checkpoint(epoch, args.arch, model, optimizer=optimizer, scheduler=compression_scheduler, extras=checkpoint_extras, is_best=is_best, name=args.name, dir=msglogger.logdir) # Finally run results on the test set test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
# TODO(barrh): args.deprecated_resume is deprecated since v0.3.1 if args.deprecated_resume: msglogger.warning('The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.') if not args.reset_optimizer: msglogger.warning('If you wish to also reset the optimizer, call with: --reset-optimizer') args.reset_optimizer = True args.resumed_checkpoint_path = args.deprecated_resume # We can optionally resume from a checkpoint optimizer = None if args.resumed_checkpoint_path: model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint( model, args.resumed_checkpoint_path, model_device=args.device) elif args.load_model_path: model = apputils.load_lean_checkpoint(model, args.load_model_path, model_device=args.device) if args.reset_optimizer: start_epoch = 0 if optimizer is not None: optimizer = None msglogger.info('\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0') # Define loss function (criterion) criterion = nn.CrossEntropyLoss().to(args.device) if optimizer is None: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults)