help="preload dataset to RAM") parser.set_defaults(verbose=False) opt = parser.parse_args() with open(opt.config) as f: config = yaml.load(f) generator = OcclusionAwareGenerator( **config['model_params']['generator_params'], **config['model_params']['common_params']) discriminator = MultiScaleDiscriminator( **config['model_params']['discriminator_params'], **config['model_params']['common_params']) kp_detector = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) dataset = FramesDataset(is_train=(opt.mode == 'train'), **config['dataset_params']) if opt.preload: logging.info('PreLoad Dataset: Start') pre_list = list(range(len(dataset))) import multiprocessing.pool as pool with pool.Pool(4) as pl: buf = pl.map(dataset.preload, pre_list) for idx, (i, v) in enumerate(zip(pre_list, buf)): dataset.buffed[i] = v.copy() buf[idx] = None logging.info('PreLoad Dataset: End') if opt.mode == 'train': save_dir = opt.save_dir logging.info("Start training...") dataset = DatasetRepeater(dataset,
# Declare a key point detector kp_detector = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) if torch.cuda.is_available(): kp_detector.to(opt.device_ids[0]) # Print network details if using --verbose flag if opt.verbose: print(kp_detector) # Read in dataset details, defined in *.yaml config file, "dataset_params" section # Refer to ./config/vox-256.yaml for details # 数据预处理在此步骤完成,并读取进 dataset 变量中 dataset = FramesDataset(is_train=(opt.mode == 'train'), **config['dataset_params']) print("Dataset size: {}, repeat number: {}".format( len(dataset), config['train_params']['num_repeats'])) # Create the logging direction if not os.path.exists(log_dir): os.makedirs(log_dir) # Copy the config file (*.yaml) into the logging path if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))): copy(opt.config, log_dir) if opt.mode == 'train': # Start training # Look into this part further print("Training...") train(config, generator, discriminator, kp_detector, opt.checkpoint,
bg_predictor = BGMotionPredictor( num_channels=config['model_params']['num_channels'], **config['model_params']['bg_predictor_params']) if torch.cuda.is_available(): bg_predictor.to(opt.device_ids[0]) if opt.verbose: print(bg_predictor) avd_network = AVDNetwork(num_regions=config['model_params']['num_regions'], **config['model_params']['avd_network_params']) if torch.cuda.is_available(): avd_network.to(opt.device_ids[0]) if opt.verbose: print(avd_network) dataset = FramesDataset(is_train=(opt.mode.startswith('train')), **config['dataset_params']) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))): copy(opt.config, log_dir) if opt.mode == 'train': print("Training...") train(config, generator, region_predictor, bg_predictor, opt.checkpoint, log_dir, dataset, opt.device_ids) elif opt.mode == 'train_avd': print("Training Animation via Disentaglement...") train_avd(config, generator, region_predictor, bg_predictor, avd_network, opt.checkpoint, log_dir, dataset) elif opt.mode == 'reconstruction': print("Reconstruction...")
def prediction(config, generator, kp_detector, checkpoint, log_dir): dataset = FramesDataset(is_train=True, transform=VideoToTensor(), **config['dataset_params']) log_dir = os.path.join(log_dir, 'prediction') png_dir = os.path.join(log_dir, 'png') if checkpoint is not None: Logger.load_cpk(checkpoint, generator=generator, kp_detector=kp_detector) else: raise AttributeError("Checkpoint should be specified for mode='prediction'.") dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) generator = DataParallelWithCallback(generator) kp_detector = DataParallelWithCallback(kp_detector) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(png_dir): os.makedirs(png_dir) print("Extracting keypoints...") kp_detector.eval() generator.eval() keypoints_array = [] prediction_params = config['prediction_params'] for it, x in tqdm(enumerate(dataloader)): if prediction_params['train_size'] is not None: if it > prediction_params['train_size']: break with torch.no_grad(): keypoints = [] for i in range(x['video'].shape[2]): kp = kp_detector(x['video'][:, :, i:(i + 1)]) kp = {k: v.data.cpu().numpy() for k, v in kp.items()} keypoints.append(kp) keypoints_array.append(keypoints) predictor = PredictionModule(num_kp=config['model_params']['common_params']['num_kp'], kp_variance=config['model_params']['common_params']['kp_variance'], **prediction_params['rnn_params']).cuda() num_epochs = prediction_params['num_epochs'] lr = prediction_params['lr'] bs = prediction_params['batch_size'] num_frames = prediction_params['num_frames'] init_frames = prediction_params['init_frames'] optimizer = torch.optim.Adam(predictor.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=50) kp_dataset = KPDataset(keypoints_array, num_frames=num_frames) kp_dataloader = DataLoader(kp_dataset, batch_size=bs) print("Training prediction...") for _ in trange(num_epochs): loss_list = [] for x in kp_dataloader: x = {k: v.cuda() for k, v in x.items()} gt = {k: v.clone() for k, v in x.items()} for k in x: x[k][:, init_frames:] = 0 prediction = predictor(x) loss = sum([torch.abs(gt[k][:, init_frames:] - prediction[k][:, init_frames:]).mean() for k in x]) loss.backward() optimizer.step() optimizer.zero_grad() loss_list.append(loss.detach().data.cpu().numpy()) loss = np.mean(loss_list) scheduler.step(loss) dataset = FramesDataset(is_train=False, transform=VideoToTensor(), **config['dataset_params']) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) print("Make predictions...") for it, x in tqdm(enumerate(dataloader)): with torch.no_grad(): x['video'] = x['video'][:, :, :num_frames] kp_init = kp_detector(x['video']) for k in kp_init: kp_init[k][:, init_frames:] = 0 kp_source = kp_detector(x['video'][:, :, :1]) kp_video = predictor(kp_init) for k in kp_video: kp_video[k][:, :init_frames] = kp_init[k][:, :init_frames] if 'var' in kp_video and prediction_params['predict_variance']: kp_video['var'] = kp_init['var'][:, (init_frames - 1):init_frames].repeat(1, kp_video['var'].shape[1], 1, 1, 1) out = generate(generator, appearance_image=x['video'][:, :, :1], kp_appearance=kp_source, kp_video=kp_video) x['source'] = x['video'][:, :, :1] out_video_batch = out['video_prediction'].data.cpu().numpy() out_video_batch = np.concatenate(np.transpose(out_video_batch, [0, 2, 3, 4, 1])[0], axis=1) imageio.imsave(os.path.join(png_dir, x['name'][0] + '.png'), (255 * out_video_batch).astype(np.uint8)) image = Visualizer(**config['visualizer_params']).visualize_reconstruction(x, out) image_name = x['name'][0] + prediction_params['format'] imageio.mimsave(os.path.join(log_dir, image_name), image) del x, kp_video, kp_source, out
print(motion_generator) discriminator = Discriminator( **config['model_params']['discriminator_params'], **config['model_params']['common_params']) discriminator.to(opt.device_ids[0]) if opt.verbose: print(discriminator) kp_detector = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) kp_detector.to(opt.device_ids[0]) if opt.verbose: print(kp_detector) dataset = FramesDataset(is_train=('train' in opt.mode), **config['dataset_params']) if opt.mode == 'train': print("Training...") train(config, generator, discriminator, kp_detector, opt.checkpoint, log_dir, dataset, opt.device_ids) elif opt.mode == 'reconstruction': print("Reconstruction...") reconstruction(config, generator, kp_detector, opt.checkpoint, log_dir, dataset) elif opt.mode == 'transfer': print("Transfer...") transfer(config, generator, kp_detector, opt.checkpoint, log_dir, dataset) elif opt.mode == "prediction": print("Prediction...")
config = yaml.load(f) log_dir = os.path.join(opt.log_dir, os.path.basename(opt.config).split('.')[0]) log_dir += ' ' + strftime("%d-%m-%y %H:%M:%S", gmtime()) reconstruction_module = ReconstructionModule( **config['model_params']['reconstruction_module_params'], **config['model_params']['common_params']) reconstruction_module.to(opt.device_ids[0]) if opt.verbose: print(reconstruction_module) segmentation_module = SegmentationModule( **config['model_params']['segmentation_module_params'], **config['model_params']['common_params']) segmentation_module.to(opt.device_ids[0]) if opt.verbose: print(segmentation_module) dataset = FramesDataset(is_train=True, **config['dataset_params']) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))): copy(opt.config, log_dir) print("Training...") train(config, reconstruction_module, segmentation_module, opt.checkpoint, log_dir, dataset, opt.device_ids)
plt.axes([0.3, 0.3, 0.5, 0.5]) plt.title('Average Error') plt.plot(train, 'k:', label='train') plt.plot(validation, 'r', label='validation') plt.xlabel('Epoch') plt.ylabel('Average Error') plt.legend() results_dir = basePath + folder sample_file_name = file_name + '.png' plt.savefig(results_dir + sample_file_name) if __name__ == "__main__": #here i load the video dataset like a group of a pictures face_dataset = FramesDataset( 'file:///media/aleksandr/Files/@Machine/Github/Boiler/train/annotations.csv', 'file:///media/aleksandr/Files/@Machine/Github/Boiler/train') # here i calculate statistics of bubble boundaries appeariance at every coordinate of image with multiplication by 1000 SummResult = boundaries_summ_conv(face_dataset, 63 * 12000, 64 * 12000, 1000) sample = face_dataset[1] fig = plt.figure() print(1, sample['frame'].shape, sample['heat_transfer'].shape) ax = plt.subplot(11 // 3 + 1, 3, 1 + 1) #coordinates plt.tight_layout() ax.set_title('Sample #{}'.format(1)) ax.axis('off') print(SummResult)