def train(args): # CONFIGS = yaml.load(open(args.config)) # deprecated, please set the configs in parse_args() # Set device if torch.cuda.is_available(): os.environ["CUDA_VISIBLE_DEVICES"] = args.device.strip() device = torch.device("cuda") else: device = torch.device("cpu") # Not suggested # Set save folder & logging config subfolder = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time())) if not args.save_folder or (not os.path.isdir(args.save_folder)): print( "Warning: Not invalid value of 'save_folder', set as default value: './save_folder'.." ) save_folder = "./save_folder" else: save_folder = args.save_folder if not os.path.exists(save_folder): os.mkdir(save_folder) save_folder = os.path.join(save_folder, subfolder) os.mkdir(save_folder) #TODO:logging # Load Dataset trainloader = get_loader(args.train_gtfile, batch_size=args.batch_size, num_thread=args.num_workers) valloader = get_loader(args.val_gtfile, batch_size=args.batch_size, num_thread=args.num_workers) # Init Net model = Net(numAngle=args.num_angle, numRho=args.num_rho, backbone=args.backbone) if args.resume: model.load_state_dict(torch.load(args.resume)) model = torch.nn.DataParallel(model).to(device) # Optimizer optimizer = optim.Adam(model.parameters()) # Loss criterion = torch.nn.CrossEntropyLoss() losses = AverageMeter() # Start Training model.train() iter = 0 # iter id start from 1 for epoch in range(args.max_epoch): for batch in trainloader: start = time.time() iter += 1 img_tensor, gt_tensor = batch optimizer.zero_grad() # Forwarding preds = model(img_tensor) # Calculate Loss loss = criterion(preds, gt_tensor) loss.backward() optimizer.step() losses.update(loss.item(), args.batch_size) if iter % args.show_interval == 0: logging.info( f"Training [{epoch}/{args.max_epoch}][{iter}] Loss:{losses.avg} Time:{time.time()-start:.1f}s" ) if iter % args.val_interval == 0: pass
def main(): logger.info(args) assert os.path.isdir(CONFIGS["DATA"]["DIR"]) if CONFIGS['TRAIN']['SEED'] is not None: random.seed(CONFIGS['TRAIN']['SEED']) torch.manual_seed(CONFIGS['TRAIN']['SEED']) cudnn.deterministic = True model = Net(numAngle=CONFIGS["MODEL"]["NUMANGLE"], numRho=CONFIGS["MODEL"]["NUMRHO"], backbone=CONFIGS["MODEL"]["BACKBONE"]) if CONFIGS["TRAIN"]["DATA_PARALLEL"]: logger.info("Model Data Parallel") model = nn.DataParallel(model).cuda() else: model = model.cuda(device=CONFIGS["TRAIN"]["GPU_ID"]) # optimizer optimizer = torch.optim.Adam( model.parameters(), lr=CONFIGS["OPTIMIZER"]["LR"], weight_decay=CONFIGS["OPTIMIZER"]["WEIGHT_DECAY"]) # learning rate scheduler scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=CONFIGS["OPTIMIZER"]["STEPS"], gamma=CONFIGS["OPTIMIZER"]["GAMMA"]) best_acc1 = 0 if args.resume: if isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format(args.resume)) # dataloader train_loader = get_loader(CONFIGS["DATA"]["DIR"], CONFIGS["DATA"]["LABEL_FILE"], batch_size=CONFIGS["DATA"]["BATCH_SIZE"], num_thread=CONFIGS["DATA"]["WORKERS"], split='train') val_loader = get_loader(CONFIGS["DATA"]["VAL_DIR"], CONFIGS["DATA"]["VAL_LABEL_FILE"], batch_size=1, num_thread=CONFIGS["DATA"]["WORKERS"], split='val') logger.info("Data loading done.") # Tensorboard summary writer = SummaryWriter(log_dir=os.path.join(CONFIGS["MISC"]["TMP"])) start_epoch = 0 best_acc = best_acc1 is_best = False start_time = time.time() if CONFIGS["TRAIN"]["RESUME"] is not None: raise (NotImplementedError) if CONFIGS["TRAIN"]["TEST"]: validate(val_loader, model, 0, writer, args) return logger.info("Start training.") for epoch in range(start_epoch, CONFIGS["TRAIN"]["EPOCHS"]): train(train_loader, model, optimizer, epoch, writer, args) acc = validate(val_loader, model, epoch, writer, args) #return scheduler.step() if best_acc < acc: is_best = True best_acc = acc else: is_best = False save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc1': best_acc, 'optimizer': optimizer.state_dict() }, is_best, path=CONFIGS["MISC"]["TMP"]) t = time.time() - start_time elapsed = DayHourMinute(t) t /= (epoch + 1) - start_epoch # seconds per epoch t = (CONFIGS["TRAIN"]["EPOCHS"] - epoch - 1) * t remaining = DayHourMinute(t) logger.info( "Epoch {0}/{1} finishied, auxiliaries saved to {2} .\t" "Elapsed {elapsed.days:d} days {elapsed.hours:d} hours {elapsed.minutes:d} minutes.\t" "Remaining {remaining.days:d} days {remaining.hours:d} hours {remaining.minutes:d} minutes." .format(epoch, CONFIGS["TRAIN"]["EPOCHS"], CONFIGS["MISC"]["TMP"], elapsed=elapsed, remaining=remaining)) logger.info("Optimization done, ALL results saved to %s." % CONFIGS["MISC"]["TMP"])
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False) device = 'cuda' if torch.cuda.device_count() > 1 and device == 'cuda': print("Let's use", torch.cuda.device_count(), "GPUs!") model = Net(num_classes=n_classes) # pretrained model in my pc. now i will train on all images for 2 epochs # model.load_state_dict(torch.load('./epoch_5_val_loss_7.03_auc_0.844.pth')) model = nn.DataParallel(model).to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4) criterion = torch.nn.CrossEntropyLoss() train_loss, val_loss = [], [] for epoch in range(epochs): print('Epoch {}/{}'.format(epoch, epochs - 1)) print('-' * 10) model.train() running_loss = 0 tk0 = tqdm(train_loader, total=int(len(train_loader))) for im, labels in tk0: inputs = im["image"].to(device, dtype=torch.float) labels = labels.to(device, dtype=torch.long) optimizer.zero_grad() outputs = model(inputs)
shuffle=False, num_workers=2) # dataiter = iter(loader) # images, labels = dataiter.next() # print (images) # images=tensor_to_img(images) # print (labels) # print (images) net = Net(14 * batch_size) lstm = LSTMLayer(7 * 7 * (16 + 5 * 2), 64, 14 * 14 * (num_class + 5 * 2), 2, batch_size) lossfunction = Loss(batch_size) optimizer = optim.Adam([{ 'params': net.parameters() }, { 'params': lstm.parameters(), 'lr': 0.0001 }], lr=0, weight_decay=0) if load_checkpoint: net.load_state_dict(torch.load(SAVE_PATH)) net.cuda() optimizer = optim.Adam(net.parameters(), lr=0.0001) for epoch in range(2000): for i, data in enumerate(loader, 0): # get the inputs