def main(): model = None model = DCGAN_MODEL(opt, device) print("using DCGAN model") normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # cudnn.benchmark = True # preparing the training laoder train_loader = torch.utils.data.DataLoader( ImageLoader( opt.img_path, transforms.Compose([ transforms.Scale( 128 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 128), # we get only the center of that rescaled transforms.RandomCrop( 128), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), data_path=opt.data_path, partition='train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers, pin_memory=True) print('Training loader prepared.') # preparing validation loader val_loader = torch.utils.data.DataLoader( ImageLoader( opt.img_path, transforms.Compose([ transforms.Scale( 128 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 128), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opt.data_path, partition='val'), batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers, pin_memory=True) print('Validation loader prepared.') # Start model training if opt.is_train == 'True': model.train(train_loader) else: print("Done!")
DCSCN_12 = "model_check_points/DCSCN/DCSCN_model_387epos_L12_noise_1.pt" model_dcscn = torch.load(DCSCN_12) model_upconv7 = UpConv_7() model_upconv7.load_pre_train_weights("model_check_points/Upconv_7/anime/noise0_scale2.0x_model.json") img_dataset = ImageData(img_folder='demo/demo_imgs/', max_patch_per_img=1000, patch_size=96, shrink_size=2, noise_level=1, down_sample_method=Image.BICUBIC, color_mod='RGB') img_data = ImageLoader(img_dataset, up_sample=None, batch_size=1, shuffle=True, pad_img=model_upconv7.offset) # DCSCN must set pad_img = 0 ssim_score = [] psnr_score = [] for img in tqdm(img_data, ascii=True): lr, hr = img out = model_upconv7.forward_checkpoint(lr) psnr_score.append(image_quality.calc_psnr(out, hr)) ssim_score.append(image_quality.msssim(out, hr)) print("Averge PSNR score: {:.4f}".format(np.mean(psnr_score))) print("Average MS-SSIM score: {:.4f}".format(np.mean(ssim_score)))
inputpath = args.inputpath inputlist = args.inputlist mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if len(inputlist): im_names = open(inputlist, 'r').readlines() elif len(inputpath) and inputpath != '/': for root, dirs, files in os.walk(inputpath): im_names = files else: raise IOError('Error: must contain either --indir/--list') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval()
def main(): # Loss function adversarial_loss = torch.nn.BCELoss() # Initialize generator and discriminator generator = Generator() discriminator = Discriminator() # Initialize weights generator.apply(weights_init_normal) discriminator.apply(weights_init_normal) # DataParallel generator = nn.DataParallel(generator).to(device) discriminator = nn.DataParallel(discriminator).to(device) # Dataloader # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # cudnn.benchmark = True # preparing the training laoder train_loader = torch.utils.data.DataLoader( ImageLoader( opt.img_path, transforms.Compose([ transforms.Scale( 128 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 128), # we get only the center of that rescaled transforms.RandomCrop( 128), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), data_path=opt.data_path, partition='train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers, pin_memory=True) print('Training loader prepared.') # preparing validation loader val_loader = torch.utils.data.DataLoader( ImageLoader( opt.img_path, transforms.Compose([ transforms.Scale( 128 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 128), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opt.data_path, partition='val'), batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers, pin_memory=True) print('Validation loader prepared.') # Optimizers optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) # ---------- # Training # ---------- for epoch in range(opt.n_epochs): pbar = tqdm(total=len(train_loader)) start_time = time.time() for i, data in enumerate(train_loader): input_var = list() for j in range(len(data)): # if j>1: input_var.append(data[j].to(device)) imgs = input_var[0] # Adversarial ground truths valid = np.ones((imgs.shape[0], 1)) valid = torch.FloatTensor(valid).to(device) fake = np.zeros((imgs.shape[0], 1)) fake = torch.FloatTensor(fake).to(device) # ----------------- # Train Generator # ----------------- optimizer_G.zero_grad() # Sample noise as generator input z = np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim)) z = torch.FloatTensor(z).to(device) # Generate a batch of images gen_imgs = generator(z, input_var[1], input_var[2], input_var[3], input_var[4]) # Loss measures generator's ability to fool the discriminator g_loss = adversarial_loss(discriminator(gen_imgs), valid) g_loss.backward() optimizer_G.step() # --------------------- # Train Discriminator # --------------------- optimizer_D.zero_grad() # Measure discriminator's ability to classify real from generated samples real_loss = adversarial_loss(discriminator(imgs), valid) fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake) d_loss = (real_loss + fake_loss) / 2 d_loss.backward() optimizer_D.step() pbar.update(1) pbar.close() print( "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f] [Time Elapsed: %f]" % (epoch, opt.n_epochs, i, len(train_loader), d_loss.item(), g_loss.item(), time.time() - start_time)) if epoch % opt.sample_interval == 0: save_samples(epoch, gen_imgs.data[:25]) save_model(epoch, generator.state_dict(), discriminator.state_dict())
if not cfg.TRAIN.FLAG: if cfg.DATASET_NAME == 'birds': bshuffle = False split_dir = 'test' # Get data loader imsize = cfg.TREE.BASE_SIZE * (2 ** (cfg.TREE.BRANCH_NUM-1)) image_transform = transforms.Compose([ transforms.Scale(int(imsize * 76 / 64)), transforms.RandomCrop(imsize), transforms.RandomHorizontalFlip()]) if cfg.DATA_DIR.find("recipe") != -1: from dataloader import ImageLoader dataset = ImageLoader( img_path="../../data/img_data", transform = image_transform, data_path = "../../data", partition="train") print("using recipe1M dataset") else: print(cfg.DATA_DIR, " dataset not found") raise ValueError assert dataset num_gpu = len(cfg.GPU_ID.split(',')) dataloader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TRAIN.BATCH_SIZE * num_gpu, drop_last=True, shuffle=bshuffle, num_workers=int(cfg.WORKERS))
def call_alphapose(input_dir, output_dir, format='open', batchSize=1): if not os.path.exists(output_dir): os.mkdir(output_dir) for root, dirs, files in os.walk(input_dir): im_names = files print(files) data_loader = ImageLoader(im_names, batchSize=batchSize, format='yolo', dir_path=input_dir).start() det_loader = DetectionLoader(data_loader, batchSize=batchSize).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(False).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, output_dir, _format=format) correct_json_save(output_dir) print('Over')
transforms.ToTensor(), tensor_normalizer()]) transform_list.append(transform) print('transforms done') #################################### # construct ImageLoaders and dataloader #################################### DATADIR = os.path.join('..', 'Datasets') BATCHSIZE = 4 dataset_list = [ 'AR', 'CUHK', 'CUHK_FERET', 'XM2VTS', 'VIPSL0', 'VIPSL1', 'VIPSL2', 'VIPSL3', 'VIPSL4' ] imageloaders = [ ImageLoader(os.path.join(DATADIR, dname, 'photos'), os.path.join(DATADIR, dname, 'sketches')) for dname in dataset_list ] cs_dataset = ContentStyleDataset(imageloaders, transform_list) dataloader = DataLoader(cs_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=4) print('dataloader done') ############################################################ # instantiate neural networks, loss functions and optimizers ############################################################ netG = StyleBankNet(len(dataset_list)) netD = Discriminator()
def test(): inputpath = args.inputpath inputlist = args.inputlist mode = args.mode #if not os.path.exists(args.outputpath): #os.mkdir(args.outputpath) #if len(inputlist): #im_names = open(inputlist, 'r').readlines() #elif len(inputpath) and inputpath != '/': for root, dirs, files in os.walk(inputpath): im_names = files #else: #raise IOError('Error: must contain either --indir/--list') im_names = sorted(im_names, key=lambda x: int(os.path.splitext(x)[0])) print(im_names) # Load input images data_loader = ImageLoader(im_names, batchSize=1, format='yolo').start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=1).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('===========================> Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) return final_result
inputlist = args.inputlist mode = args.mode if not os.path.exists(args.outputpath): os.mkdir(args.outputpath) if len(inputlist): im_names = open(inputlist, 'r').readlines() elif len(inputpath) and inputpath != '/': for root, dirs, files in os.walk(inputpath): im_names = files else: raise IOError('Error: must contain either --indir/--list') # Load input images meanwhile start processes, threads data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo', reso=int(args.inp_dim)).start() # Load detection loader print('Loading YOLO model..') sys.stdout.flush() # for multithread displaying det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() # is_train, res, joints, rot_factor if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda()
def handle_video(video_file): # =========== common =============== args.video = video_file base_name = os.path.basename(args.video) video_name = base_name[:base_name.rfind('.')] # =========== end common =============== img_path = f'outputs/alpha_pose_{video_name}/split_image/' # =========== image =============== args.inputpath = img_path args.outputpath = f'outputs/alpha_pose_{video_name}' if os.path.exists(args.outputpath): shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) else: os.mkdir(args.outputpath) # if not len(video_file): # raise IOError('Error: must contain --video') if len(img_path) and img_path != '/': for root, dirs, files in os.walk(img_path): im_names = sorted([f for f in files if 'png' in f or 'jpg' in f]) else: raise IOError('Error: must contain either --indir/--list') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() print(f'Totally {data_loader.datalen} images') # =========== end image =============== # =========== video =============== # args.outputpath = f'outputs/alpha_pose_{video_name}' # if os.path.exists(args.outputpath): # shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True) # else: # os.mkdir(args.outputpath) # # videofile = args.video # mode = args.mode # # if not len(videofile): # raise IOError('Error: must contain --video') # # # Load input video # data_loader = VideoLoader(videofile, batchSize=args.detbatch).start() # (fourcc, fps, frameSize) = data_loader.videoinfo() # # print('the video is {} f/s'.format(fps)) # =========== end video =============== # Load detection loader print('Loading YOLO model..') sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() # start a thread to read frames from the file video stream det_processor = DetectionProcessor(det_loader).start() # Load pose model pose_dataset = Mscoco() if args.fast_inference: pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset) else: pose_model = InferenNet(4 * 1 + 1, pose_dataset) pose_model.cuda() pose_model.eval() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Data writer save_path = os.path.join( args.outputpath, 'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi') # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start() writer = DataWriter(args.save_video).start() print('Start pose estimation...') im_names_desc = tqdm(range(data_loader.length())) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if orig_img is None: print(f'{i}-th image read None: handle_video') break if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if datalen % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda() hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu().data writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while writer.running(): pass writer.stop() final_result = writer.results() write_json(final_result, args.outputpath) kpts = [] for i in range(len(final_result)): kpt = max(final_result[i]['result'], key=lambda x: x['proposal_score'].data[0] * calculate_area(x[ 'keypoints']))['keypoints'] kpts.append(kpt.data.numpy()) name = f'{args.outputpath}/{video_name}.npz' kpts = np.array(kpts).astype(np.float32) print('kpts npz save in ', name) np.savez_compressed(name, kpts=kpts) return kpts
def Alphapose( im_names, pose_model, ): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load input images data_loader = ImageLoader(im_names, batchSize=args.detbatch, format='yolo').start() # Load detection loader sys.stdout.flush() det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start() det_processor = DetectionProcessor(det_loader).start() runtime_profile = {'dt': [], 'pt': [], 'pn': []} # Init data writer writer = DataWriter(args.save_video).start() data_len = data_loader.length() im_names_desc = tqdm(range(data_len)) batchSize = args.posebatch for i in im_names_desc: start_time = getTime() with torch.no_grad(): (inps, orig_img, im_name, boxes, scores, pt1, pt2) = det_processor.read() if boxes is None or boxes.nelement() == 0: writer.save(None, None, None, None, None, orig_img, im_name.split('/')[-1]) continue ckpt_time, det_time = getTime(start_time) runtime_profile['dt'].append(det_time) # Pose Estimation datalen = inps.size(0) leftover = 0 if (datalen) % batchSize: leftover = 1 num_batches = datalen // batchSize + leftover hm = [] for j in range(num_batches): inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].to(device) hm_j = pose_model(inps_j) hm.append(hm_j) hm = torch.cat(hm) ckpt_time, pose_time = getTime(ckpt_time) runtime_profile['pt'].append(pose_time) hm = hm.cpu() writer.save(boxes, scores, hm, pt1, pt2, orig_img, im_name.split('/')[-1]) ckpt_time, post_time = getTime(ckpt_time) runtime_profile['pn'].append(post_time) if args.profile: # TQDM im_names_desc.set_description( 'det time: {dt:.3f} | pose time: {pt:.2f} | post processing: {pn:.4f}' .format(dt=np.mean(runtime_profile['dt']), pt=np.mean(runtime_profile['pt']), pn=np.mean(runtime_profile['pn']))) print('Finish Model Running.') if (args.save_img or args.save_video) and not args.vis_fast: print( '===========================> Rendering remaining images in the queue...' ) print( '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).' ) while (writer.running()): pass writer.stop() final_result = writer.results() # write_json(final_result, args.outputpath) if final_result[0]['result']: return final_result[0]['result'][0]['keypoints'] else: return None