def main(): args = get_args() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = Inpaint() model = model.to(device) optim = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) save, load = bind_nsml(model, optim) if args.pause == 1: nsml.paused(scope=locals()) if args.mode == 'train': path_train = os.path.join(dir_data_root, 'train') path_train_data = os.path.join(dir_data_root, 'train', 'train_data') tr_loader, val_loader = data_loader_with_split(path_train, batch_size=args.batch_size) postfix = dict() total_step = 0 for epoch in trange(args.num_epochs, disable=use_nsml): pbar = tqdm(enumerate(tr_loader), total=len(tr_loader), disable=use_nsml) for step, (_, x_input, mask, x_GT) in pbar: total_step += 1 x_GT = x_GT.to(device) x_input = x_input.to(device) mask = mask.to(device) x_mask = torch.cat([x_input, mask], dim=1) model.zero_grad() x_hat = model(x_mask) x_composed = compose(x_input, x_hat, mask) loss = l1_loss(x_composed, x_GT) loss.backward() optim.step() postfix['loss'] = loss.item() if use_nsml: postfix['epoch'] = epoch postfix['step_'] = step postfix['total_step'] = total_step postfix['steps_per_epoch'] = len(tr_loader) if step % args.eval_every == 0: vutils.save_image(x_GT, 'x_GT.png', normalize=True) vutils.save_image(x_input, 'x_input.png', normalize=True) vutils.save_image(x_hat, 'x_hat.png', normalize=True) vutils.save_image(mask, 'mask.png', normalize=True) metric_eval = local_eval(model, val_loader, path_train_data) postfix['metric_eval'] = metric_eval if use_nsml: if step % args.print_every == 0: print(postfix) nsml.report(**postfix, scope=locals(), step=total_step) else: pbar.set_postfix(postfix) if use_nsml: nsml.save(epoch) else: save(epoch)
def main(): global opt, model opt = parser.parse_args() cudnn.benchmark = True log = Logger() # Building model module_net = import_module('model.' + opt.network_archi) model = getattr(module_net, 'Net')() criterion = getattr(module_net, 'criterion')() model = model.cuda() criterion = criterion.cuda() # Setting Optimizer optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) # *** Reserved for nsml *** bind_nsml(model, optimizer) if opt.pause: nsml.paused(scope=locals()) # *** Reserved for nsml *** (end) if opt.mode == "train": if IS_ON_NSML: opt.dataset_path = os.path.join(DATASET_PATH, 'train', 'train_data') else: opt.dataset_path = '/home/data/nipa_faces_sr_tmp2/train/train_data' # local datapath training_data_loader, val_loader = data_loader_with_split( opt.dataset_path, train_split=0.9, batch_size=opt.batchSize) # Training for epoch in range(opt.nEpochs): if opt.network_archi.startswith("edsr"): average_epoch_loss_train = train(training_data_loader, val_loader, optimizer, model, criterion, epoch) info = {'train_loss': average_epoch_loss_train} nsml.save(str(epoch + 1)) for tag, value in info.items(): log.scalar_summary(tag, value, epoch)
if cuda: model = model.cuda() loss_fn = loss_fn.cuda() optimizer = Adam( [param for param in model.parameters() if param.requires_grad], lr=base_lr, weight_decay=1e-4) scheduler = StepLR(optimizer, step_size=40, gamma=0.1) bind_nsml(model, optimizer, scheduler) if config.pause: nsml.paused(scope=locals()) if mode == 'train': tr_loader, val_loader, val_label_file = data_loader_with_split(root=TRAIN_DATASET_PATH, train_split=train_split) time_ = datetime.datetime.now() num_batches = len(tr_loader) local_eval(model, val_loader, val_label_file) for epoch in range(num_epochs): scheduler.step() model.train() for iter_, data in enumerate(tr_loader): x, label = data if cuda: x = x.cuda() label = label.cuda() pred = model(x)
# for lb_id in range(num_classes): # if lbs.count(lb_id) > 150: # continue # targets_only.append(lb_id) # print(targets_only) if config.transfer: # nsml.load(checkpoint='transfer', session='team_286/4_cls_food/89') nsml.load(checkpoint='100', session='team_286/4_cls_food/103') # cv=1 cutmix 0.5 # nsml.load(checkpoint='55', session='team_286/7_icls_face/2') # nsml.load(checkpoint='transfer', session='team_286/8_iret_food/12') # nsml.load(checkpoint='20', session='team_286/9_iret_car/16') nsml.save('resave') sys.exit(0) tr_loader, val_loader, val_label = data_loader_with_split(root=TRAIN_DATASET_PATH, cv_ratio=config.ratio, cv=config.cv, batch_size=C.get()['batch']) time_ = datetime.datetime.now() best_val_top1 = 0 dataiter = iter(tr_loader) num_steps = 100000 // C.get()['batch'] from pystopwatch2 import PyStopwatch for epoch in range(C.get()['epochs']): w = PyStopwatch() metrics = Accumulator() scheduler.step() model.train() cnt = 0 for iter_ in range(num_steps):
bind_nsml(model, optimizer, scheduler) if config.pause: nsml.paused(scope=locals()) if mode == "train": if not IS_ON_NSML: logger = logging.getLogger("ResNet") logger.setLevel(logging.INFO) fileHandler = logging.FileHandler("./test.log") streamHandler = logging.StreamHandler() logger.addHandler(fileHandler) logger.addHandler(streamHandler) tr_loader, val_loader, val_label = data_loader_with_split( root=TRAIN_DATASET_PATH, train_split=train_split, batch_size=batch_size) time_ = datetime.datetime.now() num_batches = len(tr_loader) eval_result = local_eval(model, val_loader, val_label) for epoch in range(num_epochs): epoch_start_time_ = datetime.datetime.now() scheduler.step() model.train() for iter_, data in enumerate(tr_loader): _, x, label = data if cuda: x = x.cuda() label = label.cuda()
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.) bind_nsml(model, optimizer, scheduler) if config.pause: nsml.paused(scope=locals()) if config.transfer: nsml.load(checkpoint='100', session='team_286/12_idet_food/41') nsml.save('resave') sys.exit(0) if mode == 'train': tr_loader, val_loader, val_label_file = data_loader_with_split( root=TRAIN_DATASET_PATH, train_split=train_split, batch_size=config.batch) time_ = datetime.datetime.now() num_batches = len(tr_loader) local_eval(model, val_loader, val_label_file) best_iou = 0. for epoch in range(num_epochs): metrics = Accumulator() scheduler.step() model.train() cnt = 0 for iter_, data in enumerate(tr_loader): x, label = data label[:, :, 2:] = label[:, :,