def createLang(name, dataDir): lang = Lang(name, splt) smalldatasets = DramaDataset(basedir=dataDir, maxFrame=0) print("Data size: {}".format(len(smalldatasets))) timer = Timer() for s1, s2, _ in smalldatasets: lang.addSentance(s1) lang.addSentance(s2) print("Create lang model. Number of word: {}".format(len(lang))) print("Total time: {:.2f}".format(timer.getTime())) return lang
def main(): """Create the model and start the training.""" w, h = map(int, args.input_size.split(',')) args.input_size = (w, h) w, h = map(int, args.crop_size.split(',')) args.crop_size = (h, w) w, h = map(int, args.input_size_target.split(',')) args.input_size_target = (w, h) cudnn.enabled = True cudnn.benchmark = True str_ids = args.gpu_ids.split(',') gpu_ids = [] for str_id in str_ids: gid = int(str_id) if gid >=0: gpu_ids.append(gid) num_gpu = len(gpu_ids) args.multi_gpu = False if num_gpu>1: args.multi_gpu = True Trainer = AD_Trainer(args) Trainer.G = torch.nn.DataParallel( Trainer.G, gpu_ids) Trainer.D1 = torch.nn.DataParallel( Trainer.D1, gpu_ids) Trainer.D2 = torch.nn.DataParallel( Trainer.D2, gpu_ids) else: Trainer = AD_Trainer(args) print(Trainer) trainloader = data.DataLoader( cityscapesDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, scale=True, mirror=True, mean=IMG_MEAN, autoaug = args.autoaug), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader) targetloader = data.DataLoader(robotDataSet(args.data_dir_target, args.data_list_target, max_iters=args.num_steps * args.iter_size * args.batch_size, resize_size=args.input_size_target, crop_size=args.crop_size, scale=False, mirror=args.random_mirror, mean=IMG_MEAN, set=args.set, autoaug = args.autoaug_target), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) targetloader_iter = enumerate(targetloader) # set up tensor board if args.tensorboard: args.log_dir += '/'+ os.path.basename(args.snapshot_dir) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) writer = SummaryWriter(args.log_dir) for i_iter in range(args.num_steps): loss_seg_value1 = 0 loss_adv_target_value1 = 0 loss_D_value1 = 0 loss_seg_value2 = 0 loss_adv_target_value2 = 0 loss_D_value2 = 0 adjust_learning_rate(Trainer.gen_opt , i_iter, args) adjust_learning_rate_D(Trainer.dis1_opt, i_iter, args) adjust_learning_rate_D(Trainer.dis2_opt, i_iter, args) for sub_i in range(args.iter_size): # train G # train with source _, batch = trainloader_iter.__next__() _, batch_t = targetloader_iter.__next__() images, labels, _, _ = batch images = images.cuda() labels = labels.long().cuda() images_t, labels_t, _, _ = batch_t images_t = images_t.cuda() labels_t = labels_t.long().cuda() with Timer("Elapsed time in update: %f"): loss_seg1, loss_seg2, loss_adv_target1, loss_adv_target2, loss_me, loss_kl, pred1, pred2, pred_target1, pred_target2, val_loss = Trainer.gen_update(images, images_t, labels, labels_t, i_iter) loss_seg_value1 += loss_seg1.item() / args.iter_size loss_seg_value2 += loss_seg2.item() / args.iter_size loss_adv_target_value1 += loss_adv_target1 / args.iter_size loss_adv_target_value2 += loss_adv_target2 / args.iter_size loss_me_value = loss_me if args.lambda_adv_target1 > 0 and args.lambda_adv_target2 > 0: loss_D1, loss_D2 = Trainer.dis_update(pred1, pred2, pred_target1, pred_target2) loss_D_value1 += loss_D1.item() loss_D_value2 += loss_D2.item() else: loss_D_value1 = 0 loss_D_value2 = 0 del pred1, pred2, pred_target1, pred_target2 if args.tensorboard: scalar_info = { 'loss_seg1': loss_seg_value1, 'loss_seg2': loss_seg_value2, 'loss_adv_target1': loss_adv_target_value1, 'loss_adv_target2': loss_adv_target_value2, 'loss_me_target': loss_me_value, 'loss_kl_target': loss_kl, 'loss_D1': loss_D_value1, 'loss_D2': loss_D_value2, 'val_loss': val_loss, } if i_iter % 100 == 0: for key, val in scalar_info.items(): writer.add_scalar(key, val, i_iter) print('exp = {}'.format(args.snapshot_dir)) print( '\033[1m iter = %8d/%8d \033[0m loss_seg1 = %.3f loss_seg2 = %.3f loss_me = %.3f loss_kl = %.3f loss_adv1 = %.3f, loss_adv2 = %.3f loss_D1 = %.3f loss_D2 = %.3f, val_loss=%.3f'%(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2, loss_me_value, loss_kl, loss_adv_target_value1, loss_adv_target_value2, loss_D_value1, loss_D_value2, val_loss)) # clear loss del loss_seg1, loss_seg2, loss_adv_target1, loss_adv_target2, loss_me, loss_kl, val_loss if i_iter >= args.num_steps_stop - 1: print('save model ...') torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '.pth')) torch.save(Trainer.D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '_D1.pth')) torch.save(Trainer.D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '_D2.pth')) break if i_iter % args.save_pred_every == 0 and i_iter != 0: print('taking snapshot ...') torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth')) torch.save(Trainer.D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth')) torch.save(Trainer.D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D2.pth')) if args.tensorboard: writer.close()
def trainer(args): modelDir = args.model LangFile = os.path.join(modelDir, "Lang.pkl") modelFile = args.checkpoint MaxEpoch = args.epoch BatchSize = args.batch DataDir = args.data lr = args.lr print("=========Use GPU: {}=========\n".format(use_cuda)) lang, model = Loadmodel(modelDir, LangFile, modelFile, dataDir=DataDir) datasets = DramaDataset(basedir=DataDir, maxFrame=0, timeOffset=0.2, startSeries=0, maxSeries=40000, subOffset=0, subMax=None) loader = torch.utils.data.DataLoader(datasets, batch_size=BatchSize, shuffle=True, num_workers=1) print("Data size\t: {}".format(len(datasets))) print("Max epoch\t: {}\nBatch size\t: {}\nLearning rate\t: {}\n".format( MaxEpoch, BatchSize, lr)) print("Start training........\n") writer = SummaryWriter(modelDir) if use_cuda: model.cuda() model.train() optimizer = optim.Adam(model.parameters(), lr=lr) criterion = nn.BCELoss() recLoss = Average() timer = Timer() trainStep = 0 for epoch in range(MaxEpoch): for i, data in enumerate(loader, 1): #try: pre, nex, imgs = data pre, nex, scores = makeNegSample(pre, nex, negSize=1) loss = step(model=model, optimizer=optimizer, criterion=criterion, subtitles=pre, targets=nex, scores=scores, lang=lang) recLoss.addData(loss.item()) writer.add_scalar('loss', loss.item(), trainStep) trainStep += 1 loss = None if i % 50 == 0: print( "Epoch: {:2d}, Step: {:5d}, Time: {:6.3f}, Loss: {:7.5f}". format(epoch, i, timer.getAndReset(), recLoss.getAndReset())) # except Exception as exp: #print("Step error: {}".format(i)) #print(exp) if i % 50 != 0: print("Epoch: {:2d}, Step: {:5d}, Time: {:6.3f}, Loss: {:7.5f}". format(epoch, i, timer.getAndReset(), recLoss.getAndReset())) modelName = os.path.join( modelDir, "SimilarityModel.{}.pth".format(int((epoch + 1) / 10))) print("Saving Epoch model: {}.....\n".format(modelName)) torch.save(model, modelName)
def trainer(args): ModalDir = args.modal LangFile = os.path.join(ModalDir, "Lang.pkl") ModalFile = os.path.join(ModalDir, "MainModal.pth") MaxEpoch = args.epoch BatchSize = args.batch DataDir = args.data lr = args.lr use_cuda = torch.cuda.is_available() #load data with open(DataDir, 'rb') as f: trainset = pickle.load(f) loader = torch.utils.data.DataLoader(trainset, batch_size=BatchSize, shuffle=True, num_workers=2) lang, modal = loadModal(ModalDir=ModalDir, LangFile=LangFile, ModalFile=ModalFile, dataset=trainset) print("Data size: {}".format(len(trainset))) print("Max epoch:{}\nBatch size:{}\nLearning rate:{}\n".format( MaxEpoch, BatchSize, lr)) print("Start training........\n") if use_cuda: global Variable Variable = Variable.cuda modal.cuda() modal.train() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(modal.parameters(), lr=lr) recLoss = Average() timer = Timer() for epoch in range(1, MaxEpoch): for i, data in enumerate(loader, 1): fras, engs = data in_fras, in_engs, out_engs = transData(fras, engs, lang) outputs, hidden = modal(in_fras, in_engs) outputs = flatMutileLength(outputs, out_engs[1]) out_engs = flatMutileLength(out_engs[0], out_engs[1]) loss = criterion(outputs, out_engs) recLoss.addData(loss.data[0]) optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: print( "Epoch: {:2d}, Step: {:5d}, Time: {:6.3f}, Loss: {:7.5f}". format(epoch, i, timer.getAndReset(), recLoss.getAndReset())) print("Fra: {}\nEng: {}\nModal:{}\n".format( fras[0], engs[0], predit(modal, lang, fras[0]))) if epoch % 10 == 0: print("Saving Epoch modal.....\n") torch.save( modal, os.path.join(ModalDir, "SubSubModal.{}.pth".format(epoch))) print("Fra: {}\nEng{}\nModal:{}\n".format( fras[0], engs[0], predit(modal, lang, fras[0])))
def main(): """Create the model and start the training.""" print("NUMBER OF CLASSES: ", str(args.num_classes)) w, h = map(int, args.input_size.split(',')) args.input_size = (w, h) w, h = map(int, args.crop_size.split(',')) args.crop_size = (h, w) w, h = map(int, args.gt_size.split(',')) args.gt_size = (w, h) cudnn.enabled = True cudnn.benchmark = True # create result dir if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) str_ids = args.gpu_ids.split(',') gpu_ids = [] for str_id in str_ids: gid = int(str_id) if gid >=0: gpu_ids.append(gid) num_gpu = len(gpu_ids) args.multi_gpu = False if num_gpu>1: args.multi_gpu = True Trainer = AD_Trainer(args) else: Trainer = AD_Trainer(args) TARGET_IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) trainloader = data.DataLoader( cityscapesDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, set=args.set, scale=False, mirror=args.random_mirror, mean=TARGET_IMG_MEAN, autoaug = args.autoaug_target, source_domain=args.source_domain), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader) ''' trainloader = data.DataLoader( gta5DataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader) trainloader = data.DataLoader( synthiaDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader)''' # set up tensor board if args.tensorboard: args.log_dir += '/'+ os.path.basename(args.snapshot_dir) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) writer = SummaryWriter(args.log_dir) # load mIOU best_mIoUs = args.mIOU for i_iter in range(args.num_steps): loss_seg_value = 0 adjust_learning_rate(Trainer.gen_opt , i_iter, args) _, batch = trainloader_iter.__next__() images, labels, _, _ = batch images = images.cuda() labels = labels.long().cuda() with Timer("Elapsed time in update: %f"): loss_seg = Trainer.gen_update(images, labels, i_iter) loss_seg_value += loss_seg.item() if args.tensorboard: scalar_info = { 'loss_seg': loss_seg_value } if i_iter % 100 == 0: for key, val in scalar_info.items(): writer.add_scalar(key, val, i_iter) print('\033[1m iter = %8d/%8d \033[0m loss_seg = %.3f' %(i_iter, args.num_steps, loss_seg_value)) del loss_seg if i_iter % args.save_pred_every == 0 and i_iter != 0: mIoUs, _ = evaluate(args, args.gt_dir, args.gt_list, args.result_dir, Trainer.G) writer.add_scalar('mIOU', round(np.nanmean(mIoUs)*100, 2), int(i_iter/args.save_pred_every)) # (TB) if round(np.nanmean(mIoUs) * 100, 2) > best_mIoUs: print('save model ...') best_mIoUs = round(np.nanmean(mIoUs) * 100, 2) torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'supervised_seg_' + str(i_iter) + '.pth')) if args.tensorboard: writer.close()
def trainer(args): modelDir = args.model LangFile = os.path.join(modelDir, "Lang.pkl") modelFile = args.checkpoint MaxEpoch = args.epoch BatchSize = args.batch DataDir = args.data lr = args.lr print("=========Use Device: {}=========\n".format(device)) print("=========SubToSub=========") #load data datasets = DramaDataset( basedir=DataDir, maxFrame=0, maxSeries=5, ) loader = torch.utils.data.DataLoader(datasets, batch_size=BatchSize, shuffle=True, num_workers=2) lang, model = loadModel(modelDir=modelDir, LangFile=LangFile, modelFile=modelFile, dataset=datasets) print("Data size\t: {}".format(len(datasets))) print("Max epoch\t: {}\nBatch size\t: {}\nLearning rate\t: {}\n".format( MaxEpoch, BatchSize, lr)) print("Start training........\n") model.to(device) model.train() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) writer = SummaryWriter(modelDir) recLoss = Average() timer = Timer() trainStep = 0 for epoch in range(MaxEpoch): for i, data in enumerate(loader, 1): try: pre, nex, imgs = data in_pre, in_nex, out_nex = transData(pre, nex, lang) outputs, hidden = model(in_pre, in_nex) outputs = flatMutileLength(outputs, out_nex[1]) out_nexs = flatMutileLength(out_nex[0], out_nex[1]) loss = criterion(outputs, out_nexs) optimizer.zero_grad() loss.backward() optimizer.step() recLoss.addData(loss.item()) writer.add_scalar('loss', loss.item(), trainStep) trainStep += 1 if i % 100 == 0: model.eval() print( "Epoch: {:2d}, Step: {:5d}, Time: {:6.3f}, Loss: {:7.5f}" .format(epoch, i, timer.getAndReset(), recLoss.getAndReset())) print("F: {}\nS: {}\nP: {}\n".format( pre[0], nex[0], predit(model, lang, pre[0]))) model.train() except Exception as exp: print(exp) if i % 100 != 0: print("Epoch: {:2d}, Step: {:5d}, Time: {:6.3f}, Loss: {:7.5f}". format(epoch, i, timer.getAndReset(), recLoss.getAndReset())) modelName = os.path.join( modelDir, "SubSubModel.{}.pth".format(int((epoch + 1) / 5))) print("Saving Epoch model: {}.....\n".format(modelName)) torch.save(model, modelName)
def trainer(args): modelDir = args.model LangFile = os.path.join(modelDir, "Lang.pkl") modelFile = args.checkpoint MaxEpoch = args.epoch BatchSize = args.batch DataDir = args.data lr = args.lr print("=========Start training ImgSubToSub=========\n") print("=========Use Device: {}=========\n".format(device)) lang, model = Loadmodel(modelDir, LangFile, modelFile, dataDir=DataDir) datasets = DramaDataset(basedir=DataDir, maxFrame=1, maxSeries=5, timeOffset=0.2, useBmp=True, randomStart=False, transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) loader = torch.utils.data.DataLoader(datasets, batch_size=BatchSize, shuffle=True, num_workers=4) print("Data size\t: {}".format(len(datasets))) print("Max epoch\t: {}\nBatch size\t: {}\nLearning rate\t: {}\n".format( MaxEpoch, BatchSize, lr)) print("Start training........\n") writer = SummaryWriter(modelDir) model.to(device) model.train() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) recLoss = Average() timer = Timer() trainStep = 0 for epoch in range(MaxEpoch): for i, data in enumerate(loader, 1): try: pre, nex, imgs = data loss = step(model=model, criterion=criterion, optimizer=optimizer, imgs=imgs[0], subtitles=pre, targets=nex, lang=lang) recLoss.addData(loss.item()) writer.add_scalar('loss', loss.item(), trainStep) loss = None if i % 50 == 0: print( "Epoch: {:2d}, Step: {:5d}, Time: {:6.3f}, Loss: {:7.5f}" .format(epoch, i, timer.getAndReset(), recLoss.getAndReset())) model.eval() pred = predit(model, lang, imgs[0][:1], pre[0]) model.train() print("F: {}\nS: {}\nP: {}\n".format(pre[0], nex[0], pred)) except Exception as exp: print("Step error: {}".format(i)) print(exp) trainStep += 1 if i % 50 != 0: print("Epoch: {:2d}, Step: {:5d}, Time: {:6.3f}, Loss: {:7.5f}". format(epoch, i, timer.getAndReset(), recLoss.getAndReset())) modelName = os.path.join( modelDir, "SubImgModel.{}.pth".format(int((epoch + 1) / 5))) print("Saving Epoch model: {}.....\n".format(modelName)) torch.save(model, modelName)