Exemple #1
0
 def __init__(self, opt, name='train'):
     # self.opt = opt
     #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name))
     #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt')
     self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name))
     self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt')
     with open(self.log_name, "a") as log_file:
         now = time.strftime("%c")
         log_file.write('================ Training Loss (%s) ================\n' % now)
def train_alexnet(experiment_name, resume_experiment=False):
    prefix = "only_alexnet"
    print("loading classifier")
    logger = tf_logger.Logger("tf_log/{}_{}".format(prefix, experiment_name))
    classifier = Alexnet_Deco(
        batch_norm2d=True
    )
    experiment_epoch = -1

    if resume_experiment:
        raise NotImplementedError("not implemented for pointnet")

    if opt.gpu != "-1":
        print("loading classifier in GPU")
        classifier.cuda()

    # train_loader, test_loader = bi_deco.datasets.washington.load_dataset(
    train_loader, test_loader = bi_deco.datasets.washington_alexnet.load_dataset(
        data_dir='/scratch/dataset/',
        split=opt.split,
        batch_size=opt.batch_size,
        rgb=True,
    )

    print("loss and optimizer")
    crossEntropyLoss = torch.nn.CrossEntropyLoss().cuda()
    if experiment_epoch > 40 and opt.decimate_lr:
        learning_rate = opt.lr / 10
    else:
        learning_rate = opt.lr

    if opt.use_adam:
        class_optimizer = torch.optim.Adam(utils.get_trainable_params(classifier), lr=3e-4)
    else:
        class_optimizer = torch.optim.SGD(
            utils.get_trainable_params(classifier),
            lr=learning_rate,
            momentum=0.9,
            nesterov=True,
            weight_decay=0.0001,
        )
        # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        #     class_optimizer, 'min',
        #     patience=3,
        #     verbose=True,
        # )

    last_test_accuracy = -1
    for epoch in range(experiment_epoch + 1, opt.nepoch):
        if opt.decimate_lr and epoch == 40:
            class_optimizer.param_groups[0]['lr'] = learning_rate / 10.

        classifier.train()
        progress_bar = tqdm.tqdm(total=len(train_loader) * (50 - experiment_epoch - 1))
        criterion = torch.nn.CrossEntropyLoss()

        for step, (inputs, labels) in enumerate(train_loader, 0):
            if opt.skip_training and step > 5:
                break
            progress_bar.update(1)

            images = Variable(inputs).cuda()
            labels = Variable(labels).cuda()
            labels = labels.long()
            class_optimizer.zero_grad()
            outputs_0 = classifier(images)
            loss_0 = criterion(outputs_0, labels)
            loss_0.backward()
            class_optimizer.step()

            # target_Variable = torch.LongTensor(opt.batch_size)
            # labels = target_Variable.copy_(labels)

            # inputs, labels = Variable(inputs), Variable(labels)
            # if opt.gpu != "-1":
            #     inputs, labels = inputs.cuda(), labels.cuda()
            # class_pred = classifier(inputs)
            # class_loss = crossEntropyLoss(class_pred, labels)
            # class_optimizer.zero_grad()
            # class_loss.backward()
            # class_optimizer.step()
            # loss_ = class_loss.data[0]

            logger.scalar_summary("loss/train_loss", loss_0, step + opt.nepoch * epoch)
            progress_bar.set_description("epoch {} lr {}".format(epoch, class_optimizer.param_groups[0]['lr']),
                                         last_test_accuracy)

        del inputs
        del labels
        test_accuracy, test_loss = test(crossEntropyLoss, classifier, opt, test_loader)
        last_test_accuracy = test_accuracy

        logger.scalar_summary("loss/test_loss", test_loss, step + opt.nepoch * epoch)
        logger.scalar_summary("loss/test_accuracy", test_accuracy, step + opt.nepoch * epoch)
        # if scheduler:
        #    scheduler.step(test_loss)

        if not opt.skip_training:
            try:
                os.mkdir('state_dicts/')
            except OSError:
                pass
            torch.save(classifier.state_dict(),
                       'state_dicts/{}{}cls_model_{:d}.pth'.format(prefix, experiment_name, epoch))
optimizer_0 = torch.optim.SGD(params=parameters_0,
                              lr=lr,
                              weight_decay=opt.wd,
                              momentum=0.9,
                              nesterov=True)

# Training
num_epoch = opt.nepoch
if opt.model:
    model_0.load_state_dict(torch.load(opt.model))
    print("LOADING MODEL SNAPSHOT")

import time
experiment_name = time.strftime("%Y_%m_%d-%H_%M_%S")
prefix = "working_alexnet"
logger = tf_logger.Logger("tf_log/{}_{}".format(prefix, experiment_name))
tensorboard_step = 0


def train(epoch, lr):
    global tensorboard_step
    global parameters_0
    global optimizer_0
    global correct_train_0
    global total_train_0
    correct_train_0 = 0.0
    total_train_0 = 0.0
    model_0.train()
    for i, (images, labels) in enumerate(dataloader):
        tensorboard_step += 1