def main(): args = parser.parse_args() set_random_seed(args.seed) model_dir = args.model_dir model_dir.mkdir(parents=True, exist_ok=True) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = VGGClassifier().to(device) crit = torch.nn.CrossEntropyLoss() # stage 1 lr = 2e-5 size = (224, 224) epochs = 20 batch_size = 16 train_dl, valid_dl, _, _ = fetch_dataloader(args.data_dir, size, batch_size, num_workers=0) optimizer = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], lr) train(model, train_dl, valid_dl, crit, optimizer, epochs, device, model_dir / "best.pth")
def train_model(args, params, loss_fn, model, CViter, network): start_epoch = 0 best_AUC = 0 # define optimizer optimizer = torch.optim.Adam(model.parameters(), params.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=params.weight_decay, amsgrad=False) if args.resume: logging.info('Resuming Check point: {}'.format(args.resume)) start_epoch, best_AUC, model, optimizer = resume_checkpoint(args, model, optimizer, CViter) logging.info("fetch_dataloader") dataloaders = fetch_dataloader(['train', 'val'], params) loss_track = [] for epoch in range(start_epoch, start_epoch + params.epochs): logging.warning('CV [{}/{},{}/{}], Training Epoch: [{}/{}]'.format(CViter[1] + 1, params.CV_iters-1, CViter[0] + 1, params.CV_iters, epoch+1, start_epoch + params.epochs)) loss_track = loss_track + train(args, dataloaders, model, loss_fn, optimizer, epoch) #keep track of training loss # evaluate on validation set loss_track.append(get_eval_matrix(validate(dataloaders['val'], model, loss_fn))[1]) learning_rate_decay(optimizer, args.lrDecay) gc.collect() del optimizer return loss_track
def main(): args = parser.parse_args() AUCs = defaultdict(list) # define loss function loss_fn = UnevenWeightBCE_loss netlist = model_loader.get_model_list(args.network) for network in netlist: plt.clf() args.network = network set_logger(args.model_dir, args.network, args.log) params = set_params(args.model_dir, args.network) model = model_loader.loadModel(args.network, params.dropout_rate) model.cuda() cudnn.benchmark = True if args.train: for CViter in range(params.CV_iters): logging.warning( 'Cross Validation on iteration {}'.format(CViter + 1)) AUCs[network].append( save_ROC(args, params.CV_iters, outputs=validate( train_model(args, params, loss_fn, model, CViter), resume_model(args, model, CViter), loss_fn)[1])) get_next_CV_set(params.CV_iters) model.apply(model_loader.weight_reset) #add the AUC SD to the current model result add_AUC_to_ROC(args, params.CV_iters, AUCs[network]) else: logging.info('ploting ROC on full dataset for {}'.format( args.network)) for CViter in range(params.CV_iters): AUCs[network].append( save_ROC(args, 'Full_dataset', outputs=validate( fetch_dataloader([], params), resume_model(args, model, CViter), loss_fn)[1]) ) #validate model on the full dataset, display ROC curve #add the AUC SD to the current model result add_AUC_to_ROC(args, 'Full_dataset', AUCs[network]) plot_AUD_SD(AUCs, netlist, args.model_dir, args.train)
def train_model(args, params, loss_fn, model, CViter, network): start_epoch = 0 best_AUC = 0 # define optimizer optimizer = torch.optim.Adam(model.parameters(), params.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=params.weight_decay, amsgrad=False) if args.resume: logging.info('Resuming Check point: {}'.format(args.resume)) start_epoch, best_AUC, model, optimizer = resume_checkpoint( args, model, optimizer, CViter) logging.info("fetch_dataloader") dataloaders = fetch_dataloader(['train', 'val'], params) loss_track = [] for epoch in range(start_epoch, start_epoch + params.epochs): logging.warning('CV [{}/{},{}/{}], Training Epoch: [{}/{}]'.format( CViter[1] + 1, params.CV_iters - 1, CViter[0] + 1, params.CV_iters, epoch + 1, start_epoch + params.epochs)) loss_track = loss_track + train( args, dataloaders, model, loss_fn, optimizer, params.epochs) #keep track of training loss # evaluate on validation set val_loss, AUC = get_AUC(validate(dataloaders['val'], model, loss_fn)) logging.warning(' Loss {loss:.4f}; AUC {AUC:.4f}\n'.format( loss=val_loss, AUC=AUC)) # remember best loss and save checkpoint if best_AUC < AUC and args.storebest: logging.warning(' Saving Best AUC model\n') save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_AUC': AUC, 'optimizer': optimizer.state_dict(), }, args, CViter) best_AUC = max(best_AUC, AUC) learning_rate_decay(optimizer, args.lrDecay) gc.collect() del optimizer return loss_track
def main(): args = parser.parse_args() evalmatices = defaultdict(list) # define loss function loss_list = get_loss_list() netlist = model_loader.get_model_list(args.network) plt.clf() set_logger(args.model_dir, args.network, args.log) params = set_params(args.model_dir, args.network) model = model_loader.loadModel(params, netname=args.network, dropout_rate=params.dropout_rate) model.cuda() cudnn.benchmark = True for loss in loss_list: print(loss) args.loss = loss loss_fn = get_loss(loss) if args.lrDecay != 1.0: args.loss = args.loss + '_{}LrD_'.format(str(args.lrDecay)) for Testiter in range(params.CV_iters): for CViter in range(params.CV_iters - 1): if CViter != 0 or Testiter != 0: model.apply(model_loader.weight_ini) logging.warning( 'Cross Validation on iteration {}/{}, Nested CV on {}/{}' .format(Testiter + 1, params.CV_iters, CViter + 1, params.CV_iters - 1)) evalmatices[loss].append( get_eval_matrix(outputs=validate( fetch_dataloader([], params), resume_model(args, model, (Testiter, CViter)), loss_fn)[1])) get_next_CV_set(params.CV_iters) plot_AUC_SD(args.network, evalmatices, loss_list, args.lrDecay)
def get_predicted_probs(model): """Retruns predicted probs, labels for given model. """ params = {'batch_size': 100, 'num_workers': 10, 'cuda': FLAGS.cuda} data_loaders = data_loader.fetch_dataloader(['dev'], model.datafolder, params) dev_loader = data_loaders['dev'] if FLAGS.cuda: torch_model = torch.load(model.filepath) else: torch_model = torch.load(model.filepath, map_location='cpu') scores = None actual_labels = None print("Computing probabilities for model: ", model.name) num_batch = 0 for images, labels in dev_loader: if FLAGS.cuda: images, labels = images.cuda(async=True), labels.cuda(async=True) outputs = torch_model(images) print("Processing batch #", num_batch) num_batch += 1 if FLAGS.cuda: if model.name.lower().startswith("inception"): outputs = outputs[0].cuda() else: outputs = outputs.cuda() if scores is None: scores = outputs[:, 1] else: scores = torch.cat([scores.cpu(), outputs[:, 1].cpu()]) scores = scores.detach().cpu() if actual_labels is None: actual_labels = labels else: actual_labels = torch.cat([actual_labels.cpu(), labels.cpu()]) actual_labels = actual_labels.detach().cpu() print("Done. Scores shape: ", scores.shape) return scores.detach().cpu().numpy(), actual_labels.detach().cpu().numpy()
def train(): params = { 'batch_size': FLAGS.batch_size, 'num_workers': FLAGS.num_workers, 'cuda': FLAGS.cuda } data_loaders = data_loader.fetch_dataloader(['train', 'dev'], FLAGS.data_folder, params) train_loader = data_loaders['train'] dev_loader = data_loaders['dev'] model = LogisticRegression(input_size, num_classes).cuda() if FLAGS.cuda \ else LogisticRegression(input_size, num_classes) # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Training the Model start_time_secs = time.time() train_dev_error_graph_filename = get_train_dev_error_graph_filename() train_loss_graph_filename = get_training_loss_graph_filename() num_iteration = 0 for epoch in range(FLAGS.max_iter): for i, (images, labels) in enumerate(train_loader): if FLAGS.cuda: images, labels = images.cuda(async=True), labels.cuda( async=True) images = Variable(images.view(-1, input_size)) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() if (i + 1) % 10 == 0: print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' % (epoch + 1, FLAGS.max_iter, i + 1, len(train_loader) // batch_size, loss.item())) append_to_file(train_loss_graph_filename, "%d,%.4f" % (num_iteration, loss.item())) num_iteration += 1 train_acc = eval_on_train_set(model, train_loader) dev_acc, y_dev_predicted, y_dev_true = eval_on_dev_set( model, dev_loader) append_to_file( train_dev_error_graph_filename, '%s,%s,%s' % (epoch, train_acc.item() / 100, dev_acc.item() / 100)) print('Training Complete') end_time_secs = time.time() training_duration_secs = end_time_secs - start_time_secs # Test the Model on dev data print('Final Evaluations after TRAINING...') train_accuracy = eval_on_train_set(model, train_loader) # Test on the train model to see how we do on that as well. dev_accuracy, y_dev_predicted, y_dev_true = eval_on_dev_set( model, dev_loader) experiment_result_string = "-------------------\n" experiment_result_string += "\nDev Acurracy: {}%".format(dev_accuracy) experiment_result_string += "\nTrain Acurracy: {}%".format(train_accuracy) experiment_result_string += "\nTraining time(secs): {}".format( training_duration_secs) experiment_result_string += "\nMax training iterations: {}".format( FLAGS.max_iter) experiment_result_string += "\nTraining time / Max training iterations: {}".format( 1.0 * training_duration_secs / FLAGS.max_iter) print(experiment_result_string) # Save report to file write_contents_to_file(get_experiment_report_filename(), experiment_result_string) # Generate confusion matrix util.create_confusion_matrices(y_dev_predicted, y_dev_true, get_confusion_matrix_filename())
# Load the parameters from json file json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # use GPU if available params.cuda = torch.cuda.is_available() print(params.cuda) # Set the random seed for reproducible experiments # Create the input data pipeline utils.set_logger(os.path.join(args.model_dir, 'train.log')) logging.info("Loading the datasets...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader(['train', 'val'], args.data_dir, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] logging.info("- done.") # Define the model and optimizer model = net.FashioNet('resnet50',params.num_classes,params.train_blocks).cuda() if params.cuda else net.Net(params) summary(model, (3,112,112)) optimizer = optim.Adamax(model.parameters(), lr=params.learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3 ) #CosineAnnealingLR(optimizer,10) # fetch loss function and metrics loss_fn = torch.nn.CrossEntropyLoss().cuda() metrics = net.metrics
if __name__ == '__main__': start = time.time() params = { "batch_size": 64, "num_workers": 4, "shuffle": True } params_training = { "learning_rate": 1e-1, "dropout_rate": 0.8, "num_epoch": 20 } path = 'data' train_data = os.path.join(path, 'train_mix') test_data = os.path.join(path, 'test_mix') evl_data = os.path.join(path, 'val_mix') dataloaders = data_loader.fetch_dataloader(['train', 'val', 'test'], path, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] test_dl = dataloaders['test'] train(train_dl, params_training) end = time.time() print('Training Time: ', end-start)
betas=(0.9, 0.999), eps=1e-8, weight_decay=params.weight_decay) #print(optimizer) elif params.optimizer == 'SGD': optimizer = optim.SGD(para_dic, lr=params.lr, momentum=0.9, nesterov=True, weight_decay=params.weight_decay) logger.info(model) # Create the input data pipeline logger.info("Loading the datasets...") # fetch dataloaders train_dl = data_loader.fetch_dataloader('train', params) test_dl = data_loader.fetch_dataloader('test', params) logger.info("- done.") Myacc = [] if params.mode == 'train' or params.mode == 'load_train': # Train the model logger.info("Starting training for {} epoch(s)".format( params.num_epochs)) train_and_evaluate(model, ad_net, grl, ad_net_m, grl_m, Myacc, train_dl, test_dl, optimizer, loss_fn, metrics, params, args.model_dir, logger, params.restore_file) elif params.mode == 'test': test_only(model, train_dl, test_dl, optimizer, loss_fn, metrics, params, args.model_dir, logger, params.restore_file) else: print('mode input error!')
# check output folder exist and if it is rel path if not os.path.isdir(params.output_dir): os.mkdir(params.output_dir) writer = utils.set_writer(params.output_dir) params.device = torch.device('cuda:{}'.format( args.cuda) if torch.cuda.is_available() and args.cuda else 'cpu') # set random seed torch.manual_seed(11052018) if params.device.type is 'cuda': torch.cuda.manual_seed(11052018) # input dataloader = fetch_dataloader(params, train=False) # load model model = model.STN(getattr(model, params.stn_module), params).to(params.device) utils.load_checkpoint(args.restore_file, model) # run inference print('\nEvaluating with model:\n', model) print('\n.. and parameters:\n', pprint.pformat(params)) accuracy = evaluate(model, dataloader, writer, params) visualize_sample(model, dataloader.dataset, writer, params, None) print('Evaluation accuracy: {:.5f}'.format(accuracy)) writer.close()
# use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Get the logger utils.set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader(['test'], args.data_dir, params) test_dl = dataloaders['test'] logging.info("- done.") # Define the model model = net.Net(params).cuda() if params.cuda else net.Net(params) loss_fn = net.loss_fn metrics = net.metrics logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint( os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model)
params.beam_blur = 30 params.batch_size = int(params.batch_size) params.numIter = int(params.numIter) params.noise_dims = int(params.noise_dims) params.label_dims = int(params.label_dims) params.gkernlen = int(params.gkernlen) params.step_size = int(params.step_size) params.iters_step, params.save_step, params.iters, params.noise_level = 100, 100, 0, 0.1 params.iters_scheme = [0, 0, 0, 1] #params.iters_scheme = [0,0,0,1] #params.iters_scheme = [0,0,0,1] params.max_res = len(params.iters_scheme) params.size_temp = 64 # fetch dataloader dataloader, gen_loss_list, dis_loss_list = fetch_dataloader( train_path, params), [], [] # Define the models if args.model == 'shallow': generator = Generator(params) discriminator = Discriminator(params) elif args.model == 'deep': generator = ResGenerator(params) discriminator = ResDiscriminator(params) if params.cuda: generator.cuda() discriminator.cuda() # Define the optimizers optimizer_G, params.gen_loss_list = torch.optim.Adam(
params = utils.Params(json_path) # Add attributes to params params.output_dir = output_dir params.lambda_gp = 10.0 params.n_critic = 1 params.cuda = torch.cuda.is_available() params.batch_size = int(params.batch_size) params.numIter = int(params.numIter) params.noise_dims = int(params.noise_dims) params.label_dims = int(params.label_dims) params.gkernlen = int(params.gkernlen) # fetch dataloader dataloader = fetch_dataloader(train_path, params) # Define the models generator = Generator(params) discriminator = Discriminator(params) if params.cuda: generator.cuda() discriminator.cuda() # Define the optimizers optimizer_G = torch.optim.Adam(generator.parameters(), lr=params.lr_gen, betas=(params.beta1_gen, params.beta2_gen)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=params.lr_dis, betas=(params.beta1_dis, params.beta2_dis))
def main(): # Load the parameters from json file args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # Set the random seed for reproducible experiments random.seed(230) torch.manual_seed(230) np.random.seed(230) torch.cuda.manual_seed(230) warnings.filterwarnings("ignore") # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders, considering full-set vs. sub-set scenarios if params.subset_percent < 1.0: train_dl = data_loader.fetch_subset_dataloader('train', params) else: train_dl = data_loader.fetch_dataloader('train', params) dev_dl = data_loader.fetch_dataloader('dev', params) logging.info("- done.") """ Load student and teacher model """ if "distill" in params.model_version: # Specify the student models if params.model_version == "cnn_distill": # 5-layers Plain CNN print("Student model: {}".format(params.model_version)) model = net.Net(params).cuda() elif params.model_version == "shufflenet_v2_distill": print("Student model: {}".format(params.model_version)) model = shufflenet.shufflenetv2(class_num=args.num_class).cuda() elif params.model_version == "mobilenet_v2_distill": print("Student model: {}".format(params.model_version)) model = mobilenet.mobilenetv2(class_num=args.num_class).cuda() elif params.model_version == 'resnet18_distill': print("Student model: {}".format(params.model_version)) model = resnet.ResNet18(num_classes=args.num_class).cuda() elif params.model_version == 'resnet50_distill': print("Student model: {}".format(params.model_version)) model = resnet.ResNet50(num_classes=args.num_class).cuda() elif params.model_version == "alexnet_distill": print("Student model: {}".format(params.model_version)) model = alexnet.alexnet(num_classes=args.num_class).cuda() elif params.model_version == "vgg19_distill": print("Student model: {}".format(params.model_version)) model = models.vgg19_bn(num_classes=args.num_class).cuda() elif params.model_version == "googlenet_distill": print("Student model: {}".format(params.model_version)) model = googlenet.GoogleNet(num_class=args.num_class).cuda() elif params.model_version == "resnext29_distill": print("Student model: {}".format(params.model_version)) model = resnext.CifarResNeXt(cardinality=8, depth=29, num_classes=args.num_class).cuda() elif params.model_version == "densenet121_distill": print("Student model: {}".format(params.model_version)) model = densenet.densenet121(num_class=args.num_class).cuda() # optimizer if params.model_version == "cnn_distill": optimizer = optim.Adam(model.parameters(), lr=params.learning_rate * (params.batch_size / 128)) else: optimizer = optim.SGD(model.parameters(), lr=params.learning_rate * (params.batch_size / 128), momentum=0.9, weight_decay=5e-4) iter_per_epoch = len(train_dl) warmup_scheduler = utils.WarmUpLR( optimizer, iter_per_epoch * args.warm) # warmup the learning rate in the first epoch # specify loss function if args.self_training: print( '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>self training>>>>>>>>>>>>>>>>>>>>>>>>>>>>>' ) loss_fn_kd = loss_kd_self else: loss_fn_kd = loss_kd """ Specify the pre-trained teacher models for knowledge distillation Checkpoints can be obtained by regular training or downloading our pretrained models For model which is pretrained in multi-GPU, use "nn.DaraParallel" to correctly load the model weights. """ if params.teacher == "resnet18": print("Teacher model: {}".format(params.teacher)) teacher_model = resnet.ResNet18(num_classes=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/best.pth.tar' if args.pt_teacher: # poorly-trained teacher for Defective KD experiments teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/0.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "alexnet": print("Teacher model: {}".format(params.teacher)) teacher_model = alexnet.alexnet(num_classes=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_alexnet/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "googlenet": print("Teacher model: {}".format(params.teacher)) teacher_model = googlenet.GoogleNet(num_class=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_googlenet/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "vgg19": print("Teacher model: {}".format(params.teacher)) teacher_model = models.vgg19_bn(num_classes=args.num_class) teacher_checkpoint = 'experiments/pretrained_teacher_models/base_vgg19/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "resnet50": print("Teacher model: {}".format(params.teacher)) teacher_model = resnet.ResNet50(num_classes=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/best.pth.tar' if args.pt_teacher: # poorly-trained teacher for Defective KD experiments teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/50.pth.tar' elif params.teacher == "resnet101": print("Teacher model: {}".format(params.teacher)) teacher_model = resnet.ResNet101(num_classes=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet101/best.pth.tar' teacher_model = teacher_model.cuda() elif params.teacher == "densenet121": print("Teacher model: {}".format(params.teacher)) teacher_model = densenet.densenet121( num_class=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_densenet121/best.pth.tar' # teacher_model = nn.DataParallel(teacher_model).cuda() elif params.teacher == "resnext29": print("Teacher model: {}".format(params.teacher)) teacher_model = resnext.CifarResNeXt( cardinality=8, depth=29, num_classes=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/best.pth.tar' if args.pt_teacher: # poorly-trained teacher for Defective KD experiments teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/50.pth.tar' teacher_model = nn.DataParallel(teacher_model).cuda() elif params.teacher == "mobilenet_v2": print("Teacher model: {}".format(params.teacher)) teacher_model = mobilenet.mobilenetv2( class_num=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_mobilenet_v2/best.pth.tar' elif params.teacher == "shufflenet_v2": print("Teacher model: {}".format(params.teacher)) teacher_model = shufflenet.shufflenetv2( class_num=args.num_class).cuda() teacher_checkpoint = 'experiments/pretrained_teacher_models/base_shufflenet_v2/best.pth.tar' utils.load_checkpoint(teacher_checkpoint, teacher_model) # Train the model with KD logging.info("Starting training for {} epoch(s)".format( params.num_epochs)) train_and_evaluate_kd(model, teacher_model, train_dl, dev_dl, optimizer, loss_fn_kd, warmup_scheduler, params, args, args.restore_file) # non-KD mode: regular training to obtain a baseline model else: print("Train base model") if params.model_version == "cnn": model = net.Net(params).cuda() elif params.model_version == "mobilenet_v2": print("model: {}".format(params.model_version)) model = mobilenet.mobilenetv2(class_num=args.num_class).cuda() elif params.model_version == "shufflenet_v2": print("model: {}".format(params.model_version)) model = shufflenet.shufflenetv2(class_num=args.num_class).cuda() elif params.model_version == "alexnet": print("model: {}".format(params.model_version)) model = alexnet.alexnet(num_classes=args.num_class).cuda() elif params.model_version == "vgg19": print("model: {}".format(params.model_version)) model = models.vgg19_bn(num_classes=args.num_class).cuda() elif params.model_version == "googlenet": print("model: {}".format(params.model_version)) model = googlenet.GoogleNet(num_class=args.num_class).cuda() elif params.model_version == "densenet121": print("model: {}".format(params.model_version)) model = densenet.densenet121(num_class=args.num_class).cuda() elif params.model_version == "resnet18": model = resnet.ResNet18(num_classes=args.num_class).cuda() elif params.model_version == "resnet50": model = resnet.ResNet50(num_classes=args.num_class).cuda() elif params.model_version == "resnet101": model = resnet.ResNet101(num_classes=args.num_class).cuda() elif params.model_version == "resnet152": model = resnet.ResNet152(num_classes=args.num_class).cuda() elif params.model_version == "resnext29": model = resnext.CifarResNeXt(cardinality=8, depth=29, num_classes=args.num_class).cuda() # model = nn.DataParallel(model).cuda() if args.regularization: print( ">>>>>>>>>>>>>>>>>>>>>>>>Loss of Regularization>>>>>>>>>>>>>>>>>>>>>>>>" ) loss_fn = loss_kd_regularization elif args.label_smoothing: print( ">>>>>>>>>>>>>>>>>>>>>>>>Label Smoothing>>>>>>>>>>>>>>>>>>>>>>>>" ) loss_fn = loss_label_smoothing else: print( ">>>>>>>>>>>>>>>>>>>>>>>>Normal Training>>>>>>>>>>>>>>>>>>>>>>>>" ) loss_fn = nn.CrossEntropyLoss() if args.double_training: # double training, compare to self-KD print( ">>>>>>>>>>>>>>>>>>>>>>>>Double Training>>>>>>>>>>>>>>>>>>>>>>>>" ) checkpoint = 'experiments/pretrained_teacher_models/base_' + str( params.model_version) + '/best.pth.tar' utils.load_checkpoint(checkpoint, model) if params.model_version == "cnn": optimizer = optim.Adam(model.parameters(), lr=params.learning_rate * (params.batch_size / 128)) else: optimizer = optim.SGD(model.parameters(), lr=params.learning_rate * (params.batch_size / 128), momentum=0.9, weight_decay=5e-4) iter_per_epoch = len(train_dl) warmup_scheduler = utils.WarmUpLR(optimizer, iter_per_epoch * args.warm) # Train the model logging.info("Starting training for {} epoch(s)".format( params.num_epochs)) train_and_evaluate(model, train_dl, dev_dl, optimizer, loss_fn, params, args.model_dir, warmup_scheduler, args, args.restore_file)
def train(): params = { 'batch_size': FLAGS.batch_size, 'num_workers': FLAGS.num_workers, 'cuda': FLAGS.cuda } data_loaders = data_loader.fetch_dataloader(['train', 'dev'], FLAGS.data_folder, params) train_loader = data_loaders['train'] dev_loader = data_loaders['dev'] learning_rate = FLAGS.learning_rate model = get_model() # Loss and Optimizer # Softmax is internally computed. # Set parameters to be updated. criterion = nn.CrossEntropyLoss() #optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=FLAGS.l2_regularization) # Training the Model start_time_secs = time.time() train_dev_error_graph_filename = get_train_dev_error_graph_filename() train_loss_graph_filename = get_training_loss_graph_filename() num_iteration = 0 print("Model arch: ", model) print("Model size is ", sum([param.nelement() for param in model.parameters()])) dev_accuracy_list = [] for epoch in range(FLAGS.max_iter): for i, (images, labels) in enumerate(train_loader): if FLAGS.cuda: images, labels = images.cuda(async=True), labels.cuda( async=True) images = Variable(images) labels = Variable(labels) # Forward + Backward + Optimize optimizer.zero_grad() if FLAGS.model_name.lower().startswith("inception"): outputs, _ = model(images) else: outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() total_data_set_size = len(train_loader.dataset) num_steps = total_data_set_size // batch_size if (i + 1) % 10 == 0: print( 'Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' % (epoch + 1, FLAGS.max_iter, i + 1, num_steps, loss.item())) append_to_file(train_loss_graph_filename, "%d,%.4f" % (num_iteration, loss.item())) num_iteration += 1 train_acc = eval_on_train_set(model, train_loader) dev_acc, y_dev_predicted, y_dev_true = eval_on_dev_set( model, dev_loader) dev_precision, dev_recall, dev_f1score = util.compute_precision_recall_f1_score( y_dev_predicted, y_dev_true) dev_accuracy_list.append(dev_acc) append_to_file( train_dev_error_graph_filename, '%s,%s,%s,%s,%s,%s' % (epoch, train_acc.item() / 100.0, dev_acc.item() / 100.0, str(dev_precision), str(dev_recall), str(dev_f1score))) if (epoch + 1) % FLAGS.save_model_every_num_epoch == 0: print('Checkpointing model...') torch.save(model, get_model_checkpoint_path()) print('Training Complete') end_time_secs = time.time() training_duration_secs = end_time_secs - start_time_secs print('Checkpointing FINAL trained model...') torch.save(model, get_model_checkpoint_path()) print('Final Evaluations after TRAINING...') # Test on the train model to see how we do on that as well. train_accuracy = eval_on_train_set(model, train_loader) # Test the Model on dev data dev_accuracy, y_dev_predicted, y_dev_true = eval_on_dev_set( model, dev_loader) dev_precision, dev_recall, dev_f1score = util.compute_precision_recall_f1_score( y_dev_predicted, y_dev_true) dev_accuracy_list.append(dev_accuracy) best_dev_accuracy = max(dev_accuracy_list) best_dev_accuracy_index = dev_accuracy_list.index(best_dev_accuracy) best_dev_accuracy_epoch = best_dev_accuracy_index + 1 experiment_result_string = "-------------------\n" experiment_result_string += "\nDev Acurracy: {}%".format(dev_accuracy) experiment_result_string += "\nBest Dev Acurracy over training: {}% seen at epoch {}".format( best_dev_accuracy, best_dev_accuracy_epoch) experiment_result_string += "\nDev Precision: {}%".format(dev_precision) experiment_result_string += "\nDev Recall: {}%".format(dev_recall) experiment_result_string += "\nDev F1 Score: {}%".format(dev_f1score) experiment_result_string += "\nTrain Acurracy: {}%".format(train_accuracy) experiment_result_string += "\nTraining time(secs): {}".format( training_duration_secs) experiment_result_string += "\nMax training iterations: {}".format( FLAGS.max_iter) experiment_result_string += "\nTraining time / Max training iterations: {}".format( 1.0 * training_duration_secs / FLAGS.max_iter) print(experiment_result_string) # Save report to file write_contents_to_file(get_experiment_report_filename(), experiment_result_string) # Generate confusion matrix util.create_confusion_matrices(y_dev_predicted, y_dev_true, get_confusion_matrix_filename()) return best_dev_accuracy
if not os.path.isdir(params.output_dir): os.mkdir(params.output_dir) writer = utils.set_writer(params.output_dir if args.restore_file is None else os.path.dirname(args.restore_file)) params.device = torch.device('cuda:{}'.format( args.cuda) if torch.cuda.is_available() and args.cuda else 'cpu') print('device: ', params.device) # set random seed torch.manual_seed(11052018) if params.device.type is 'cuda': torch.cuda.manual_seed(11052018) # input train_dataloader = fetch_dataloader(params, train=True) val_dataloader = fetch_dataloader(params, train=False) # construct model # dims out (pytorch affine grid requires 2x3 matrix output; else perspective transform requires 8) model = model.STN(getattr(model, params.stn_module), params).to(params.device) # initialize initialize(model) capacity = sum(p.numel() for p in model.parameters()) loss_fn = torch.nn.CrossEntropyLoss().to(params.device) optimizer = torch.optim.Adam([{ 'params': model.transformer.parameters(), 'lr': params.transformer_lr }, {