def __init__(self, opt, name='train'): # self.opt = opt #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt') with open(self.log_name, "a") as log_file: now = time.strftime("%c") log_file.write('================ Training Loss (%s) ================\n' % now)
def train_alexnet(experiment_name, resume_experiment=False): prefix = "only_alexnet" print("loading classifier") logger = tf_logger.Logger("tf_log/{}_{}".format(prefix, experiment_name)) classifier = Alexnet_Deco( batch_norm2d=True ) experiment_epoch = -1 if resume_experiment: raise NotImplementedError("not implemented for pointnet") if opt.gpu != "-1": print("loading classifier in GPU") classifier.cuda() # train_loader, test_loader = bi_deco.datasets.washington.load_dataset( train_loader, test_loader = bi_deco.datasets.washington_alexnet.load_dataset( data_dir='/scratch/dataset/', split=opt.split, batch_size=opt.batch_size, rgb=True, ) print("loss and optimizer") crossEntropyLoss = torch.nn.CrossEntropyLoss().cuda() if experiment_epoch > 40 and opt.decimate_lr: learning_rate = opt.lr / 10 else: learning_rate = opt.lr if opt.use_adam: class_optimizer = torch.optim.Adam(utils.get_trainable_params(classifier), lr=3e-4) else: class_optimizer = torch.optim.SGD( utils.get_trainable_params(classifier), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=0.0001, ) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( # class_optimizer, 'min', # patience=3, # verbose=True, # ) last_test_accuracy = -1 for epoch in range(experiment_epoch + 1, opt.nepoch): if opt.decimate_lr and epoch == 40: class_optimizer.param_groups[0]['lr'] = learning_rate / 10. classifier.train() progress_bar = tqdm.tqdm(total=len(train_loader) * (50 - experiment_epoch - 1)) criterion = torch.nn.CrossEntropyLoss() for step, (inputs, labels) in enumerate(train_loader, 0): if opt.skip_training and step > 5: break progress_bar.update(1) images = Variable(inputs).cuda() labels = Variable(labels).cuda() labels = labels.long() class_optimizer.zero_grad() outputs_0 = classifier(images) loss_0 = criterion(outputs_0, labels) loss_0.backward() class_optimizer.step() # target_Variable = torch.LongTensor(opt.batch_size) # labels = target_Variable.copy_(labels) # inputs, labels = Variable(inputs), Variable(labels) # if opt.gpu != "-1": # inputs, labels = inputs.cuda(), labels.cuda() # class_pred = classifier(inputs) # class_loss = crossEntropyLoss(class_pred, labels) # class_optimizer.zero_grad() # class_loss.backward() # class_optimizer.step() # loss_ = class_loss.data[0] logger.scalar_summary("loss/train_loss", loss_0, step + opt.nepoch * epoch) progress_bar.set_description("epoch {} lr {}".format(epoch, class_optimizer.param_groups[0]['lr']), last_test_accuracy) del inputs del labels test_accuracy, test_loss = test(crossEntropyLoss, classifier, opt, test_loader) last_test_accuracy = test_accuracy logger.scalar_summary("loss/test_loss", test_loss, step + opt.nepoch * epoch) logger.scalar_summary("loss/test_accuracy", test_accuracy, step + opt.nepoch * epoch) # if scheduler: # scheduler.step(test_loss) if not opt.skip_training: try: os.mkdir('state_dicts/') except OSError: pass torch.save(classifier.state_dict(), 'state_dicts/{}{}cls_model_{:d}.pth'.format(prefix, experiment_name, epoch))
optimizer_0 = torch.optim.SGD(params=parameters_0, lr=lr, weight_decay=opt.wd, momentum=0.9, nesterov=True) # Training num_epoch = opt.nepoch if opt.model: model_0.load_state_dict(torch.load(opt.model)) print("LOADING MODEL SNAPSHOT") import time experiment_name = time.strftime("%Y_%m_%d-%H_%M_%S") prefix = "working_alexnet" logger = tf_logger.Logger("tf_log/{}_{}".format(prefix, experiment_name)) tensorboard_step = 0 def train(epoch, lr): global tensorboard_step global parameters_0 global optimizer_0 global correct_train_0 global total_train_0 correct_train_0 = 0.0 total_train_0 = 0.0 model_0.train() for i, (images, labels) in enumerate(dataloader): tensorboard_step += 1