def main(): """ Main body of script. """ args = parse_args() args.seed = init_rand(seed=args.seed) _, log_file_exist = initialize_logging( logging_dir_path=args.save_dir, logging_file_name=args.logging_file_name, script_args=args, log_packages=args.log_packages, log_pip_packages=args.log_pip_packages) use_cuda, batch_size = prepare_pt_context(num_gpus=args.num_gpus, batch_size=args.batch_size) net = prepare_model(model_name=args.model, use_pretrained=args.use_pretrained, pretrained_model_file_path=args.resume.strip(), use_cuda=use_cuda) real_net = net.module if hasattr(net, "module") else net assert (hasattr(real_net, "num_classes")) num_classes = real_net.num_classes ds_metainfo = get_dataset_metainfo(dataset_name=args.dataset) ds_metainfo.update(args=args) train_data = get_train_data_source(ds_metainfo=ds_metainfo, batch_size=batch_size, num_workers=args.num_workers) val_data = get_val_data_source(ds_metainfo=ds_metainfo, batch_size=batch_size, num_workers=args.num_workers) optimizer, lr_scheduler, start_epoch = prepare_trainer( net=net, optimizer_name=args.optimizer_name, wd=args.wd, momentum=args.momentum, lr_mode=args.lr_mode, lr=args.lr, lr_decay_period=args.lr_decay_period, lr_decay_epoch=args.lr_decay_epoch, lr_decay=args.lr_decay, num_epochs=args.num_epochs, state_file_path=args.resume_state) if args.save_dir and args.save_interval: param_names = ds_metainfo.val_metric_capts + ds_metainfo.train_metric_capts + [ "Train.Loss", "LR" ] lp_saver = TrainLogParamSaver( checkpoint_file_name_prefix="{}_{}".format(ds_metainfo.short_label, args.model), last_checkpoint_file_name_suffix="last", best_checkpoint_file_name_suffix=None, last_checkpoint_dir_path=args.save_dir, best_checkpoint_dir_path=None, last_checkpoint_file_count=2, best_checkpoint_file_count=2, checkpoint_file_save_callback=save_params, checkpoint_file_exts=(".pth", ".states"), save_interval=args.save_interval, num_epochs=args.num_epochs, param_names=param_names, acc_ind=ds_metainfo.saver_acc_ind, # bigger=[True], # mask=None, score_log_file_path=os.path.join(args.save_dir, "score.log"), score_log_attempt_value=args.attempt, best_map_log_file_path=os.path.join(args.save_dir, "best_map.log")) else: lp_saver = None train_net(batch_size=batch_size, num_epochs=args.num_epochs, start_epoch1=args.start_epoch, train_data=train_data, val_data=val_data, net=net, optimizer=optimizer, lr_scheduler=lr_scheduler, lp_saver=lp_saver, log_interval=args.log_interval, num_classes=num_classes, val_metric=get_composite_metric( ds_metainfo.val_metric_names, ds_metainfo.val_metric_extra_kwargs), train_metric=get_composite_metric( ds_metainfo.train_metric_names, ds_metainfo.train_metric_extra_kwargs), use_cuda=use_cuda)
def main(): """ Main body of script. """ args = parse_args() args.seed = init_rand(seed=args.seed) # args.num_gpus = 1 # args.num_epochs = 500 # args.dataset = "CIFAR10" # args.model = "resnet20_cifar10" args.use_pretrained = False num_non_res = 3 pretrained_model_file_path = '/exdrive/resnet20-cifar10/unfrozen-training/by-stack/non-res-stack-3-models/cifar10-non-res-stack-2-teacher.pth' _, log_file_exist = initialize_logging( logging_dir_path=args.save_dir, logging_file_name=args.logging_file_name, script_args=args, log_packages=args.log_packages, log_pip_packages=args.log_pip_packages) use_cuda, batch_size = prepare_pt_context(num_gpus=args.num_gpus, batch_size=args.batch_size) # Get model net = prepare_model( model_name=args.model, use_pretrained=args.use_pretrained, # pretrained_model_file_path=pretrained_model_file_path, pretrained_model_file_path=args.resume.strip(), use_cuda=use_cuda, remove_module=True ) # True if using our own trained model waits; o.w. False real_net = net.module if hasattr(net, "module") else net assert (hasattr(real_net, "num_classes")) num_classes = real_net.num_classes ds_metainfo = get_dataset_metainfo(dataset_name=args.dataset) ds_metainfo.update(args=args) train_data = get_train_data_source(ds_metainfo=ds_metainfo, batch_size=batch_size, num_workers=args.num_workers) val_data = get_val_data_source(ds_metainfo=ds_metainfo, batch_size=batch_size, num_workers=args.num_workers) optimizer, lr_scheduler, start_epoch = prepare_trainer( net=net, optimizer_name=args.optimizer_name, wd=args.wd, momentum=args.momentum, lr_mode=args.lr_mode, lr=args.lr, lr_decay_period=args.lr_decay_period, lr_decay_epoch=args.lr_decay_epoch, lr_decay=args.lr_decay, num_epochs=args.num_epochs, state_file_path=args.resume_state) if args.save_dir and args.save_interval: param_names = ds_metainfo.val_metric_capts + ds_metainfo.train_metric_capts + [ "Train.Loss", "LR" ] lp_saver = TrainLogParamSaver( checkpoint_file_name_prefix="{}_{}".format(ds_metainfo.short_label, args.model), last_checkpoint_file_name_suffix="last", best_checkpoint_file_name_suffix=None, last_checkpoint_dir_path=args.save_dir, best_checkpoint_dir_path=None, last_checkpoint_file_count=2, best_checkpoint_file_count=2, checkpoint_file_save_callback=save_params, checkpoint_file_exts=(".pth", ".states"), save_interval=args.save_interval, num_epochs=args.num_epochs, param_names=param_names, acc_ind=ds_metainfo.saver_acc_ind, # bigger=[True], # mask=None, score_log_file_path=os.path.join(args.save_dir, "score.log"), score_log_attempt_value=args.attempt, best_map_log_file_path=os.path.join(args.save_dir, "best_map.log")) else: lp_saver = None print(net) # summary(net, (3, 32, 32)) if torch.cuda.is_available(): print('cuda available - sending net to gpu') print('training using cuda = ', use_cuda) net.cuda() print('\n\nTraining nonresnet20 on cifar10 with num_non_res=', num_non_res) train_net(batch_size=batch_size, num_epochs=args.num_epochs, start_epoch1=args.start_epoch, train_data=train_data, val_data=val_data, net=net, optimizer=optimizer, lr_scheduler=lr_scheduler, lp_saver=lp_saver, log_interval=args.log_interval, num_classes=num_classes, val_metric=get_composite_metric( ds_metainfo.val_metric_names, ds_metainfo.val_metric_extra_kwargs), train_metric=get_composite_metric( ds_metainfo.train_metric_names, ds_metainfo.train_metric_extra_kwargs), use_cuda=use_cuda)