def main():
    args = parser.parse_args()

    set_random_seed(args.seed)

    model_dir = args.model_dir
    model_dir.mkdir(parents=True, exist_ok=True)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = VGGClassifier().to(device)
    crit = torch.nn.CrossEntropyLoss()

    # stage 1
    lr = 2e-5
    size = (224, 224)
    epochs = 20
    batch_size = 16
    train_dl, valid_dl, _, _ = fetch_dataloader(args.data_dir,
                                                size,
                                                batch_size,
                                                num_workers=0)
    optimizer = torch.optim.Adam(
        [p for p in model.parameters() if p.requires_grad], lr)
    train(model, train_dl, valid_dl, crit, optimizer, epochs, device,
          model_dir / "best.pth")
Exemple #2
0
def train_model(args, params, loss_fn, model, CViter, network):
	start_epoch = 0
	best_AUC = 0
	# define optimizer		
	optimizer = torch.optim.Adam(model.parameters(), params.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=params.weight_decay, amsgrad=False)

	if args.resume:
		logging.info('Resuming Check point: {}'.format(args.resume))
		start_epoch, best_AUC, model, optimizer = resume_checkpoint(args, model, optimizer, CViter)

	logging.info("fetch_dataloader")
	dataloaders = fetch_dataloader(['train', 'val'], params) 
	loss_track = []

	for epoch in range(start_epoch, start_epoch + params.epochs):
		logging.warning('CV [{}/{},{}/{}], Training Epoch: [{}/{}]'.format(CViter[1] + 1, params.CV_iters-1, CViter[0] + 1, params.CV_iters, epoch+1, start_epoch + params.epochs))

		loss_track = loss_track + train(args, dataloaders, model, loss_fn, optimizer, epoch) #keep track of training loss

		# evaluate on validation set
		loss_track.append(get_eval_matrix(validate(dataloaders['val'], model, loss_fn))[1])
		

		learning_rate_decay(optimizer, args.lrDecay)
	gc.collect()
	del optimizer
	return loss_track
Exemple #3
0
def main():
    args = parser.parse_args()
    AUCs = defaultdict(list)

    # define loss function
    loss_fn = UnevenWeightBCE_loss
    netlist = model_loader.get_model_list(args.network)
    for network in netlist:
        plt.clf()
        args.network = network
        set_logger(args.model_dir, args.network, args.log)

        params = set_params(args.model_dir, args.network)

        model = model_loader.loadModel(args.network, params.dropout_rate)
        model.cuda()

        cudnn.benchmark = True

        if args.train:
            for CViter in range(params.CV_iters):
                logging.warning(
                    'Cross Validation on iteration {}'.format(CViter + 1))
                AUCs[network].append(
                    save_ROC(args,
                             params.CV_iters,
                             outputs=validate(
                                 train_model(args, params, loss_fn, model,
                                             CViter),
                                 resume_model(args, model, CViter),
                                 loss_fn)[1]))
                get_next_CV_set(params.CV_iters)
                model.apply(model_loader.weight_reset)

            #add the AUC SD to the current model result
            add_AUC_to_ROC(args, params.CV_iters, AUCs[network])
        else:
            logging.info('ploting ROC on full dataset for {}'.format(
                args.network))
            for CViter in range(params.CV_iters):
                AUCs[network].append(
                    save_ROC(args,
                             'Full_dataset',
                             outputs=validate(
                                 fetch_dataloader([], params),
                                 resume_model(args, model, CViter),
                                 loss_fn)[1])
                )  #validate model on the full dataset, display ROC curve
            #add the AUC SD to the current model result
            add_AUC_to_ROC(args, 'Full_dataset', AUCs[network])

    plot_AUD_SD(AUCs, netlist, args.model_dir, args.train)
Exemple #4
0
def train_model(args, params, loss_fn, model, CViter, network):
    start_epoch = 0
    best_AUC = 0
    # define optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 params.learning_rate,
                                 betas=(0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=params.weight_decay,
                                 amsgrad=False)

    if args.resume:
        logging.info('Resuming Check point: {}'.format(args.resume))
        start_epoch, best_AUC, model, optimizer = resume_checkpoint(
            args, model, optimizer, CViter)

    logging.info("fetch_dataloader")
    dataloaders = fetch_dataloader(['train', 'val'], params)
    loss_track = []

    for epoch in range(start_epoch, start_epoch + params.epochs):
        logging.warning('CV [{}/{},{}/{}], Training Epoch: [{}/{}]'.format(
            CViter[1] + 1, params.CV_iters - 1, CViter[0] + 1, params.CV_iters,
            epoch + 1, start_epoch + params.epochs))

        loss_track = loss_track + train(
            args, dataloaders, model, loss_fn, optimizer,
            params.epochs)  #keep track of training loss

        # evaluate on validation set
        val_loss, AUC = get_AUC(validate(dataloaders['val'], model, loss_fn))
        logging.warning('    Loss {loss:.4f};  AUC {AUC:.4f}\n'.format(
            loss=val_loss, AUC=AUC))

        # remember best loss and save checkpoint
        if best_AUC < AUC and args.storebest:
            logging.warning('    Saving Best AUC model\n')
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_AUC': AUC,
                    'optimizer': optimizer.state_dict(),
                }, args, CViter)
        best_AUC = max(best_AUC, AUC)
        learning_rate_decay(optimizer, args.lrDecay)
    gc.collect()
    del optimizer
    return loss_track
Exemple #5
0
def main():
    args = parser.parse_args()
    evalmatices = defaultdict(list)

    # define loss function
    loss_list = get_loss_list()
    netlist = model_loader.get_model_list(args.network)

    plt.clf()
    set_logger(args.model_dir, args.network, args.log)

    params = set_params(args.model_dir, args.network)

    model = model_loader.loadModel(params,
                                   netname=args.network,
                                   dropout_rate=params.dropout_rate)
    model.cuda()

    cudnn.benchmark = True
    for loss in loss_list:
        print(loss)
        args.loss = loss
        loss_fn = get_loss(loss)
        if args.lrDecay != 1.0:
            args.loss = args.loss + '_{}LrD_'.format(str(args.lrDecay))
        for Testiter in range(params.CV_iters):
            for CViter in range(params.CV_iters - 1):
                if CViter != 0 or Testiter != 0:
                    model.apply(model_loader.weight_ini)
                    logging.warning(
                        'Cross Validation on iteration {}/{}, Nested CV on {}/{}'
                        .format(Testiter + 1, params.CV_iters, CViter + 1,
                                params.CV_iters - 1))

                    evalmatices[loss].append(
                        get_eval_matrix(outputs=validate(
                            fetch_dataloader([], params),
                            resume_model(args, model, (Testiter,
                                                       CViter)), loss_fn)[1]))
                get_next_CV_set(params.CV_iters)

    plot_AUC_SD(args.network, evalmatices, loss_list, args.lrDecay)
def get_predicted_probs(model):
    """Retruns predicted probs, labels for given model. """
    params = {'batch_size': 100, 'num_workers': 10, 'cuda': FLAGS.cuda}
    data_loaders = data_loader.fetch_dataloader(['dev'], model.datafolder,
                                                params)
    dev_loader = data_loaders['dev']
    if FLAGS.cuda:
        torch_model = torch.load(model.filepath)
    else:
        torch_model = torch.load(model.filepath, map_location='cpu')
    scores = None
    actual_labels = None
    print("Computing probabilities for model: ", model.name)
    num_batch = 0
    for images, labels in dev_loader:
        if FLAGS.cuda:
            images, labels = images.cuda(async=True), labels.cuda(async=True)
        outputs = torch_model(images)
        print("Processing batch #", num_batch)
        num_batch += 1
        if FLAGS.cuda:
            if model.name.lower().startswith("inception"):
                outputs = outputs[0].cuda()
            else:
                outputs = outputs.cuda()

        if scores is None:
            scores = outputs[:, 1]
        else:
            scores = torch.cat([scores.cpu(), outputs[:, 1].cpu()])
            scores = scores.detach().cpu()

        if actual_labels is None:
            actual_labels = labels
        else:
            actual_labels = torch.cat([actual_labels.cpu(), labels.cpu()])
            actual_labels = actual_labels.detach().cpu()

    print("Done. Scores shape: ", scores.shape)
    return scores.detach().cpu().numpy(), actual_labels.detach().cpu().numpy()
Exemple #7
0
def train():
    params = {
        'batch_size': FLAGS.batch_size,
        'num_workers': FLAGS.num_workers,
        'cuda': FLAGS.cuda
    }
    data_loaders = data_loader.fetch_dataloader(['train', 'dev'],
                                                FLAGS.data_folder, params)
    train_loader = data_loaders['train']
    dev_loader = data_loaders['dev']

    model = LogisticRegression(input_size, num_classes).cuda() if FLAGS.cuda \
        else LogisticRegression(input_size, num_classes)

    # Loss and Optimizer
    # Softmax is internally computed.
    # Set parameters to be updated.
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    # Training the Model
    start_time_secs = time.time()
    train_dev_error_graph_filename = get_train_dev_error_graph_filename()

    train_loss_graph_filename = get_training_loss_graph_filename()

    num_iteration = 0

    for epoch in range(FLAGS.max_iter):
        for i, (images, labels) in enumerate(train_loader):

            if FLAGS.cuda:
                images, labels = images.cuda(async=True), labels.cuda(
                    async=True)

            images = Variable(images.view(-1, input_size))
            labels = Variable(labels)

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            if (i + 1) % 10 == 0:
                print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' %
                      (epoch + 1, FLAGS.max_iter, i + 1,
                       len(train_loader) // batch_size, loss.item()))
            append_to_file(train_loss_graph_filename,
                           "%d,%.4f" % (num_iteration, loss.item()))
            num_iteration += 1

        train_acc = eval_on_train_set(model, train_loader)
        dev_acc, y_dev_predicted, y_dev_true = eval_on_dev_set(
            model, dev_loader)
        append_to_file(
            train_dev_error_graph_filename,
            '%s,%s,%s' % (epoch, train_acc.item() / 100, dev_acc.item() / 100))

    print('Training Complete')
    end_time_secs = time.time()
    training_duration_secs = end_time_secs - start_time_secs

    # Test the Model on dev data
    print('Final Evaluations after TRAINING...')
    train_accuracy = eval_on_train_set(model, train_loader)
    # Test on the train model to see how we do on that as well.
    dev_accuracy, y_dev_predicted, y_dev_true = eval_on_dev_set(
        model, dev_loader)

    experiment_result_string = "-------------------\n"
    experiment_result_string += "\nDev Acurracy: {}%".format(dev_accuracy)
    experiment_result_string += "\nTrain Acurracy: {}%".format(train_accuracy)
    experiment_result_string += "\nTraining time(secs): {}".format(
        training_duration_secs)
    experiment_result_string += "\nMax training iterations: {}".format(
        FLAGS.max_iter)
    experiment_result_string += "\nTraining time / Max training iterations: {}".format(
        1.0 * training_duration_secs / FLAGS.max_iter)

    print(experiment_result_string)
    # Save report to file
    write_contents_to_file(get_experiment_report_filename(),
                           experiment_result_string)

    # Generate confusion matrix
    util.create_confusion_matrices(y_dev_predicted, y_dev_true,
                                   get_confusion_matrix_filename())
Exemple #8
0
    # Load the parameters from json file
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    # use GPU if available
    params.cuda = torch.cuda.is_available()
    print(params.cuda)
    # Set the random seed for reproducible experiments

    # Create the input data pipeline
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))
    logging.info("Loading the datasets...")

    # fetch dataloaders
    dataloaders = data_loader.fetch_dataloader(['train', 'val'], args.data_dir, params)
    train_dl = dataloaders['train']
    val_dl = dataloaders['val']

    logging.info("- done.")

    # Define the model and optimizer
    model = net.FashioNet('resnet50',params.num_classes,params.train_blocks).cuda() if params.cuda else net.Net(params)
    summary(model, (3,112,112))
    optimizer = optim.Adamax(model.parameters(), lr=params.learning_rate)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3 ) #CosineAnnealingLR(optimizer,10)
    # fetch loss function and metrics
    loss_fn = torch.nn.CrossEntropyLoss().cuda()
    metrics = net.metrics
Exemple #9
0

if __name__ == '__main__':
    start = time.time()

    params = {
        "batch_size": 64,
        "num_workers": 4,
        "shuffle": True
    }

    params_training = {
        "learning_rate": 1e-1,
        "dropout_rate": 0.8,
        "num_epoch": 20
    }

    path = 'data'
    train_data = os.path.join(path, 'train_mix')
    test_data = os.path.join(path, 'test_mix')
    evl_data = os.path.join(path, 'val_mix')

    dataloaders = data_loader.fetch_dataloader(['train', 'val', 'test'], path, params)
    train_dl = dataloaders['train']
    val_dl = dataloaders['val']
    test_dl = dataloaders['test']

    train(train_dl, params_training)

    end = time.time()
    print('Training Time: ', end-start)
Exemple #10
0
                               betas=(0.9, 0.999),
                               eps=1e-8,
                               weight_decay=params.weight_decay)
        #print(optimizer)
    elif params.optimizer == 'SGD':
        optimizer = optim.SGD(para_dic,
                              lr=params.lr,
                              momentum=0.9,
                              nesterov=True,
                              weight_decay=params.weight_decay)

    logger.info(model)
    # Create the input data pipeline
    logger.info("Loading the datasets...")
    # fetch dataloaders
    train_dl = data_loader.fetch_dataloader('train', params)
    test_dl = data_loader.fetch_dataloader('test', params)
    logger.info("- done.")
    Myacc = []
    if params.mode == 'train' or params.mode == 'load_train':
        # Train the model
        logger.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        train_and_evaluate(model, ad_net, grl, ad_net_m, grl_m, Myacc,
                           train_dl, test_dl, optimizer, loss_fn, metrics,
                           params, args.model_dir, logger, params.restore_file)
    elif params.mode == 'test':
        test_only(model, train_dl, test_dl, optimizer, loss_fn, metrics,
                  params, args.model_dir, logger, params.restore_file)
    else:
        print('mode input error!')
Exemple #11
0
    # check output folder exist and if it is rel path
    if not os.path.isdir(params.output_dir):
        os.mkdir(params.output_dir)

    writer = utils.set_writer(params.output_dir)

    params.device = torch.device('cuda:{}'.format(
        args.cuda) if torch.cuda.is_available() and args.cuda else 'cpu')

    # set random seed
    torch.manual_seed(11052018)
    if params.device.type is 'cuda': torch.cuda.manual_seed(11052018)

    # input
    dataloader = fetch_dataloader(params, train=False)

    # load model
    model = model.STN(getattr(model, params.stn_module),
                      params).to(params.device)
    utils.load_checkpoint(args.restore_file, model)

    # run inference
    print('\nEvaluating with model:\n', model)
    print('\n.. and parameters:\n', pprint.pformat(params))
    accuracy = evaluate(model, dataloader, writer, params)
    visualize_sample(model, dataloader.dataset, writer, params, None)
    print('Evaluation accuracy: {:.5f}'.format(accuracy))

    writer.close()
    # use GPU if available
    params.cuda = torch.cuda.is_available()  # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)

    # Get the logger
    utils.set_logger(os.path.join(args.model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # fetch dataloaders
    dataloaders = data_loader.fetch_dataloader(['test'], args.data_dir, params)
    test_dl = dataloaders['test']

    logging.info("- done.")

    # Define the model
    model = net.Net(params).cuda() if params.cuda else net.Net(params)

    loss_fn = net.loss_fn
    metrics = net.metrics

    logging.info("Starting evaluation")

    # Reload weights from the saved file
    utils.load_checkpoint(
        os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model)
Exemple #13
0
    params.beam_blur = 30

    params.batch_size = int(params.batch_size)
    params.numIter = int(params.numIter)
    params.noise_dims = int(params.noise_dims)
    params.label_dims = int(params.label_dims)
    params.gkernlen = int(params.gkernlen)
    params.step_size = int(params.step_size)
    params.iters_step, params.save_step, params.iters, params.noise_level = 100, 100, 0, 0.1
    params.iters_scheme = [0, 0, 0, 1]
    #params.iters_scheme = [0,0,0,1]
    #params.iters_scheme = [0,0,0,1]
    params.max_res = len(params.iters_scheme)
    params.size_temp = 64
    # fetch dataloader
    dataloader, gen_loss_list, dis_loss_list = fetch_dataloader(
        train_path, params), [], []

    # Define the models
    if args.model == 'shallow':
        generator = Generator(params)
        discriminator = Discriminator(params)
    elif args.model == 'deep':
        generator = ResGenerator(params)
        discriminator = ResDiscriminator(params)

    if params.cuda:
        generator.cuda()
        discriminator.cuda()

    # Define the optimizers
    optimizer_G, params.gen_loss_list = torch.optim.Adam(
Exemple #14
0
    params = utils.Params(json_path)

    # Add attributes to params
    params.output_dir = output_dir
    params.lambda_gp = 10.0
    params.n_critic = 1
    params.cuda = torch.cuda.is_available()

    params.batch_size = int(params.batch_size)
    params.numIter = int(params.numIter)
    params.noise_dims = int(params.noise_dims)
    params.label_dims = int(params.label_dims)
    params.gkernlen = int(params.gkernlen)

    # fetch dataloader
    dataloader = fetch_dataloader(train_path, params)

    # Define the models
    generator = Generator(params)
    discriminator = Discriminator(params)
    if params.cuda:
        generator.cuda()
        discriminator.cuda()

    # Define the optimizers
    optimizer_G = torch.optim.Adam(generator.parameters(),
                                   lr=params.lr_gen,
                                   betas=(params.beta1_gen, params.beta2_gen))
    optimizer_D = torch.optim.Adam(discriminator.parameters(),
                                   lr=params.lr_dis,
                                   betas=(params.beta1_dis, params.beta2_dis))
Exemple #15
0
def main():
    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    # Set the random seed for reproducible experiments
    random.seed(230)
    torch.manual_seed(230)
    np.random.seed(230)
    torch.cuda.manual_seed(230)
    warnings.filterwarnings("ignore")

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # fetch dataloaders, considering full-set vs. sub-set scenarios
    if params.subset_percent < 1.0:
        train_dl = data_loader.fetch_subset_dataloader('train', params)
    else:
        train_dl = data_loader.fetch_dataloader('train', params)

    dev_dl = data_loader.fetch_dataloader('dev', params)

    logging.info("- done.")
    """
    Load student and teacher model
    """
    if "distill" in params.model_version:

        # Specify the student models
        if params.model_version == "cnn_distill":  # 5-layers Plain CNN
            print("Student model: {}".format(params.model_version))
            model = net.Net(params).cuda()

        elif params.model_version == "shufflenet_v2_distill":
            print("Student model: {}".format(params.model_version))
            model = shufflenet.shufflenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "mobilenet_v2_distill":
            print("Student model: {}".format(params.model_version))
            model = mobilenet.mobilenetv2(class_num=args.num_class).cuda()

        elif params.model_version == 'resnet18_distill':
            print("Student model: {}".format(params.model_version))
            model = resnet.ResNet18(num_classes=args.num_class).cuda()

        elif params.model_version == 'resnet50_distill':
            print("Student model: {}".format(params.model_version))
            model = resnet.ResNet50(num_classes=args.num_class).cuda()

        elif params.model_version == "alexnet_distill":
            print("Student model: {}".format(params.model_version))
            model = alexnet.alexnet(num_classes=args.num_class).cuda()

        elif params.model_version == "vgg19_distill":
            print("Student model: {}".format(params.model_version))
            model = models.vgg19_bn(num_classes=args.num_class).cuda()

        elif params.model_version == "googlenet_distill":
            print("Student model: {}".format(params.model_version))
            model = googlenet.GoogleNet(num_class=args.num_class).cuda()

        elif params.model_version == "resnext29_distill":
            print("Student model: {}".format(params.model_version))
            model = resnext.CifarResNeXt(cardinality=8,
                                         depth=29,
                                         num_classes=args.num_class).cuda()

        elif params.model_version == "densenet121_distill":
            print("Student model: {}".format(params.model_version))
            model = densenet.densenet121(num_class=args.num_class).cuda()

        # optimizer
        if params.model_version == "cnn_distill":
            optimizer = optim.Adam(model.parameters(),
                                   lr=params.learning_rate *
                                   (params.batch_size / 128))
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=params.learning_rate *
                                  (params.batch_size / 128),
                                  momentum=0.9,
                                  weight_decay=5e-4)

        iter_per_epoch = len(train_dl)
        warmup_scheduler = utils.WarmUpLR(
            optimizer, iter_per_epoch *
            args.warm)  # warmup the learning rate in the first epoch

        # specify loss function
        if args.self_training:
            print(
                '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>self training>>>>>>>>>>>>>>>>>>>>>>>>>>>>>'
            )
            loss_fn_kd = loss_kd_self
        else:
            loss_fn_kd = loss_kd
        """ 
            Specify the pre-trained teacher models for knowledge distillation
            Checkpoints can be obtained by regular training or downloading our pretrained models
            For model which is pretrained in multi-GPU, use "nn.DaraParallel" to correctly load the model weights.
        """
        if params.teacher == "resnet18":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet18(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/0.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "alexnet":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = alexnet.alexnet(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_alexnet/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "googlenet":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = googlenet.GoogleNet(num_class=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_googlenet/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "vgg19":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = models.vgg19_bn(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_vgg19/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "resnet50":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet50(num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/50.pth.tar'

        elif params.teacher == "resnet101":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet101(num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet101/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "densenet121":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = densenet.densenet121(
                num_class=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_densenet121/best.pth.tar'
            # teacher_model = nn.DataParallel(teacher_model).cuda()

        elif params.teacher == "resnext29":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnext.CifarResNeXt(
                cardinality=8, depth=29, num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/50.pth.tar'
                teacher_model = nn.DataParallel(teacher_model).cuda()

        elif params.teacher == "mobilenet_v2":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = mobilenet.mobilenetv2(
                class_num=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_mobilenet_v2/best.pth.tar'

        elif params.teacher == "shufflenet_v2":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = shufflenet.shufflenetv2(
                class_num=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_shufflenet_v2/best.pth.tar'

        utils.load_checkpoint(teacher_checkpoint, teacher_model)

        # Train the model with KD
        logging.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        train_and_evaluate_kd(model, teacher_model, train_dl, dev_dl,
                              optimizer, loss_fn_kd, warmup_scheduler, params,
                              args, args.restore_file)

    # non-KD mode: regular training to obtain a baseline model
    else:
        print("Train base model")
        if params.model_version == "cnn":
            model = net.Net(params).cuda()

        elif params.model_version == "mobilenet_v2":
            print("model: {}".format(params.model_version))
            model = mobilenet.mobilenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "shufflenet_v2":
            print("model: {}".format(params.model_version))
            model = shufflenet.shufflenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "alexnet":
            print("model: {}".format(params.model_version))
            model = alexnet.alexnet(num_classes=args.num_class).cuda()

        elif params.model_version == "vgg19":
            print("model: {}".format(params.model_version))
            model = models.vgg19_bn(num_classes=args.num_class).cuda()

        elif params.model_version == "googlenet":
            print("model: {}".format(params.model_version))
            model = googlenet.GoogleNet(num_class=args.num_class).cuda()

        elif params.model_version == "densenet121":
            print("model: {}".format(params.model_version))
            model = densenet.densenet121(num_class=args.num_class).cuda()

        elif params.model_version == "resnet18":
            model = resnet.ResNet18(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet50":
            model = resnet.ResNet50(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet101":
            model = resnet.ResNet101(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet152":
            model = resnet.ResNet152(num_classes=args.num_class).cuda()

        elif params.model_version == "resnext29":
            model = resnext.CifarResNeXt(cardinality=8,
                                         depth=29,
                                         num_classes=args.num_class).cuda()
            # model = nn.DataParallel(model).cuda()

        if args.regularization:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Loss of Regularization>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = loss_kd_regularization
        elif args.label_smoothing:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Label Smoothing>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = loss_label_smoothing
        else:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Normal Training>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = nn.CrossEntropyLoss()
            if args.double_training:  # double training, compare to self-KD
                print(
                    ">>>>>>>>>>>>>>>>>>>>>>>>Double Training>>>>>>>>>>>>>>>>>>>>>>>>"
                )
                checkpoint = 'experiments/pretrained_teacher_models/base_' + str(
                    params.model_version) + '/best.pth.tar'
                utils.load_checkpoint(checkpoint, model)

        if params.model_version == "cnn":
            optimizer = optim.Adam(model.parameters(),
                                   lr=params.learning_rate *
                                   (params.batch_size / 128))
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=params.learning_rate *
                                  (params.batch_size / 128),
                                  momentum=0.9,
                                  weight_decay=5e-4)

        iter_per_epoch = len(train_dl)
        warmup_scheduler = utils.WarmUpLR(optimizer,
                                          iter_per_epoch * args.warm)

        # Train the model
        logging.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        train_and_evaluate(model, train_dl, dev_dl, optimizer, loss_fn, params,
                           args.model_dir, warmup_scheduler, args,
                           args.restore_file)
Exemple #16
0
def train():
    params = {
        'batch_size': FLAGS.batch_size,
        'num_workers': FLAGS.num_workers,
        'cuda': FLAGS.cuda
    }
    data_loaders = data_loader.fetch_dataloader(['train', 'dev'],
                                                FLAGS.data_folder, params)
    train_loader = data_loaders['train']
    dev_loader = data_loaders['dev']
    learning_rate = FLAGS.learning_rate

    model = get_model()

    # Loss and Optimizer
    # Softmax is internally computed.
    # Set parameters to be updated.
    criterion = nn.CrossEntropyLoss()
    #optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=FLAGS.l2_regularization)

    # Training the Model
    start_time_secs = time.time()
    train_dev_error_graph_filename = get_train_dev_error_graph_filename()
    train_loss_graph_filename = get_training_loss_graph_filename()
    num_iteration = 0

    print("Model arch: ", model)
    print("Model size is ",
          sum([param.nelement() for param in model.parameters()]))
    dev_accuracy_list = []
    for epoch in range(FLAGS.max_iter):
        for i, (images, labels) in enumerate(train_loader):

            if FLAGS.cuda:
                images, labels = images.cuda(async=True), labels.cuda(
                    async=True)

            images = Variable(images)
            labels = Variable(labels)

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            if FLAGS.model_name.lower().startswith("inception"):
                outputs, _ = model(images)
            else:
                outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            total_data_set_size = len(train_loader.dataset)
            num_steps = total_data_set_size // batch_size
            if (i + 1) % 10 == 0:
                print(
                    'Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' %
                    (epoch + 1, FLAGS.max_iter, i + 1, num_steps, loss.item()))
            append_to_file(train_loss_graph_filename,
                           "%d,%.4f" % (num_iteration, loss.item()))
            num_iteration += 1

        train_acc = eval_on_train_set(model, train_loader)
        dev_acc, y_dev_predicted, y_dev_true = eval_on_dev_set(
            model, dev_loader)
        dev_precision, dev_recall, dev_f1score = util.compute_precision_recall_f1_score(
            y_dev_predicted, y_dev_true)
        dev_accuracy_list.append(dev_acc)
        append_to_file(
            train_dev_error_graph_filename, '%s,%s,%s,%s,%s,%s' %
            (epoch, train_acc.item() / 100.0, dev_acc.item() / 100.0,
             str(dev_precision), str(dev_recall), str(dev_f1score)))

        if (epoch + 1) % FLAGS.save_model_every_num_epoch == 0:
            print('Checkpointing model...')
            torch.save(model, get_model_checkpoint_path())

    print('Training Complete')
    end_time_secs = time.time()
    training_duration_secs = end_time_secs - start_time_secs

    print('Checkpointing FINAL trained model...')
    torch.save(model, get_model_checkpoint_path())

    print('Final Evaluations after TRAINING...')
    # Test on the train model to see how we do on that as well.
    train_accuracy = eval_on_train_set(model, train_loader)
    # Test the Model on dev data
    dev_accuracy, y_dev_predicted, y_dev_true = eval_on_dev_set(
        model, dev_loader)
    dev_precision, dev_recall, dev_f1score = util.compute_precision_recall_f1_score(
        y_dev_predicted, y_dev_true)

    dev_accuracy_list.append(dev_accuracy)
    best_dev_accuracy = max(dev_accuracy_list)
    best_dev_accuracy_index = dev_accuracy_list.index(best_dev_accuracy)
    best_dev_accuracy_epoch = best_dev_accuracy_index + 1

    experiment_result_string = "-------------------\n"
    experiment_result_string += "\nDev Acurracy: {}%".format(dev_accuracy)
    experiment_result_string += "\nBest Dev Acurracy over training: {}% seen at epoch {}".format(
        best_dev_accuracy, best_dev_accuracy_epoch)
    experiment_result_string += "\nDev Precision: {}%".format(dev_precision)
    experiment_result_string += "\nDev Recall: {}%".format(dev_recall)
    experiment_result_string += "\nDev F1 Score: {}%".format(dev_f1score)
    experiment_result_string += "\nTrain Acurracy: {}%".format(train_accuracy)
    experiment_result_string += "\nTraining time(secs): {}".format(
        training_duration_secs)
    experiment_result_string += "\nMax training iterations: {}".format(
        FLAGS.max_iter)
    experiment_result_string += "\nTraining time / Max training iterations: {}".format(
        1.0 * training_duration_secs / FLAGS.max_iter)

    print(experiment_result_string)
    # Save report to file
    write_contents_to_file(get_experiment_report_filename(),
                           experiment_result_string)

    # Generate confusion matrix
    util.create_confusion_matrices(y_dev_predicted, y_dev_true,
                                   get_confusion_matrix_filename())

    return best_dev_accuracy
Exemple #17
0
    if not os.path.isdir(params.output_dir):
        os.mkdir(params.output_dir)

    writer = utils.set_writer(params.output_dir if args.restore_file is None
                              else os.path.dirname(args.restore_file))

    params.device = torch.device('cuda:{}'.format(
        args.cuda) if torch.cuda.is_available() and args.cuda else 'cpu')
    print('device: ', params.device)

    # set random seed
    torch.manual_seed(11052018)
    if params.device.type is 'cuda': torch.cuda.manual_seed(11052018)

    # input
    train_dataloader = fetch_dataloader(params, train=True)
    val_dataloader = fetch_dataloader(params, train=False)

    # construct model
    # dims out (pytorch affine grid requires 2x3 matrix output; else perspective transform requires 8)
    model = model.STN(getattr(model, params.stn_module),
                      params).to(params.device)
    # initialize
    initialize(model)
    capacity = sum(p.numel() for p in model.parameters())

    loss_fn = torch.nn.CrossEntropyLoss().to(params.device)
    optimizer = torch.optim.Adam([{
        'params': model.transformer.parameters(),
        'lr': params.transformer_lr
    }, {