def main(config, resume): # parameters batch_size = config.get('batch_size', 32) start_epoch = config['epoch']['start'] max_epoch = config['epoch']['max'] lr = config.get('lr', 0.0005) use_conf = config.get('use_conf', False) ## path save_path = config['save_path'] timestamp = datetime.now().strftime(r"%Y-%m-%d_%H-%M-%S") save_path = os.path.join(save_path, timestamp) result_path = os.path.join(save_path, 'result') if not os.path.exists(result_path): os.makedirs(result_path) model_path = os.path.join(save_path, 'model') if not os.path.exists(model_path): os.makedirs(model_path) dest = shutil.copy('train.py', save_path) print("save to: ", dest) ## cuda or cpu if config['n_gpu'] == 0 or not torch.cuda.is_available(): device = torch.device("cpu") print("using CPU") else: device = torch.device("cuda:0") ## dataloader dataset = Dataset(phase='train', do_augmentations=False) data_loader = DataLoader( dataset, batch_size=int(batch_size), num_workers=1, shuffle=True, drop_last=True, pin_memory=True, # **loader_kwargs, ) val_dataset = Dataset(phase='val', do_augmentations=False) val_data_loader = DataLoader( val_dataset, batch_size=int(batch_size), num_workers=1, shuffle=True, drop_last=True, pin_memory=True, # **loader_kwargs, ) ## few shot do_few_shot = True if do_few_shot: fs_dataset = Dataset( phase='train', do_augmentations=False, metafile_path='metadata/detection_train_images.json') fs_data_loader = DataLoader( fs_dataset, batch_size=int(128), num_workers=1, shuffle=True, pin_memory=True, # **loader_kwargs, ) ## CNN model output_dim = 3 model = MyNet(output_dim) model = model.to(device) model.train() print(model) ## loss criterion = nn.CrossEntropyLoss(reduction='none') ## optimizer params = list(filter(lambda p: p.requires_grad, model.parameters())) optim_params = { 'lr': lr, 'weight_decay': 0, 'amsgrad': False, } optimizer = torch.optim.Adam(params, **optim_params) lr_params = { 'milestones': [10], 'gamma': 0.1, } lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, **lr_params) loss_avg = AverageMeter() acc_avg = AverageMeter() fs_loss_avg = AverageMeter() fs_acc_avg = AverageMeter() logger = SimpleLogger(['train_loss', 'train_acc', 'val_loss', 'val_acc']) ## loop for epoch in range(start_epoch, max_epoch): loss_avg.reset() for batch_idx, batch in tqdm( enumerate(data_loader), total=len(data_loader), ncols=80, desc=f'training epoch {epoch}', ): data = batch[0].to(device) gt_lbls = batch[1].to(device) gt_gt_lbls = batch[2].to(device) ## set zerograd optimizer.zero_grad() ## run forward pass out = model(data) ## logits: [B, NC]; conf: [B, 1] preds = torch.max(out, dim=-1)[1] # print("out shape: ", out.shape) weights = model.compute_entropy_weight(out) # print("weights shape: ", weights.shape) ## compute loss class_loss = criterion(out, gt_lbls) ## [B, 1] # print("class_loss shape: ", class_loss.shape) if use_conf: loss = (class_loss * (weights**2) + (1 - weights)**2).mean() else: loss = class_loss.mean() ## record loss_avg.update(loss.item(), batch_size) positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum() batch_acc = positive.to(torch.float) / batch_size acc_avg.update(batch_acc.item(), batch_size) ## run backward pass loss.backward() optimizer.step() ## update ## each epoch logger.update(loss_avg.avg, 'train_loss') logger.update(acc_avg.avg, 'train_acc') print("train loss: ", loss_avg.avg) print("train acc: ", acc_avg.avg) if do_few_shot and fs_data_loader is not None: for batch_idx, batch in tqdm( enumerate(fs_data_loader), total=len(fs_data_loader), ncols=80, desc=f'training epoch {epoch}', ): data = batch[0].to(device) gt_lbls = batch[1].to(device) gt_gt_lbls = batch[2].to(device) ## set zerograd optimizer.zero_grad() ## run forward pass out = model(data) ## logits: [B, NC]; conf: [B, 1] preds = torch.max(out, dim=-1)[1] # print("out shape: ", out.shape) weights = model.compute_entropy_weight(out) # print("weights shape: ", weights.shape) ## compute loss class_loss = criterion(out, gt_lbls) ## [B, 1] # print("class_loss shape: ", class_loss.shape) if use_conf: loss = (class_loss * (weights**2) + (1 - weights)**2).mean() else: loss = class_loss.mean() ## record positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum() batch_acc = positive.to(torch.float) / data.shape[0] fs_loss_avg.update(loss.item(), data.shape[0]) fs_acc_avg.update(batch_acc.item(), data.shape[0]) ## run backward pass loss = loss * 1.0 loss.backward() optimizer.step() ## update # print(f"\nfew-shot: {preds}, {gt_gt_lbls}") ## each epoch print("fs train loss: ", fs_loss_avg.avg) print("fs train acc: ", fs_acc_avg.avg) if val_data_loader is not None: log = evaluate(model.eval(), val_data_loader, device, use_conf=use_conf) model.train() logger.update(log['loss'], 'val_loss') logger.update(log['acc'], 'val_acc') print("val loss: ", log['loss']) print("val acc: ", log['acc']) best_idx = logger.get_best('val_acc', best='max') if best_idx == epoch: print('save ckpt') ## save ckpt _save_checkpoint(model_path, epoch, model) lr_scheduler.step() print() ## save final model _save_checkpoint(model_path, epoch, model)
YTest = Variable(torch.FloatTensor(YTest)) train_set = MyDataset(x=XTrain, y=YTrain) train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) model = MyNet(n_in=1, n_out=1).to(device) model.eval() YPRED = model.forward(XTest) criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=INITIAL_LEARNING_RATE) scheduler = StepLR(optimizer, 20) for epoch in range(N_EPOCHS): model.train() epoch_loss = 0 for i, (x, y) in enumerate(train_loader): x.to(device) y.to(device) # set_trace() optimizer.zero_grad() y_pred = model(x) loss = criterion(y_pred, y) epoch_loss += loss # plt.figure(1) # plt.plot(i, loss.detach().numpy(), '*') # plt.pause(1) loss.backward() optimizer.step() epoch_loss = epoch_loss / len(train_loader)