help="preload dataset to RAM")
    parser.set_defaults(verbose=False)
    opt = parser.parse_args()
    with open(opt.config) as f:
        config = yaml.load(f)

    generator = OcclusionAwareGenerator(
        **config['model_params']['generator_params'],
        **config['model_params']['common_params'])
    discriminator = MultiScaleDiscriminator(
        **config['model_params']['discriminator_params'],
        **config['model_params']['common_params'])
    kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
                             **config['model_params']['common_params'])

    dataset = FramesDataset(is_train=(opt.mode == 'train'),
                            **config['dataset_params'])
    if opt.preload:
        logging.info('PreLoad Dataset: Start')
        pre_list = list(range(len(dataset)))
        import multiprocessing.pool as pool
        with pool.Pool(4) as pl:
            buf = pl.map(dataset.preload, pre_list)
        for idx, (i, v) in enumerate(zip(pre_list, buf)):
            dataset.buffed[i] = v.copy()
            buf[idx] = None
        logging.info('PreLoad Dataset: End')

    if opt.mode == 'train':
        save_dir = opt.save_dir
        logging.info("Start training...")
        dataset = DatasetRepeater(dataset,
Beispiel #2
0
    # Declare a key point detector
    kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
                             **config['model_params']['common_params'])

    if torch.cuda.is_available():
        kp_detector.to(opt.device_ids[0])

    # Print network details if using --verbose flag
    if opt.verbose:
        print(kp_detector)

    # Read in dataset details, defined in *.yaml config file, "dataset_params" section
    # Refer to ./config/vox-256.yaml for details
    # 数据预处理在此步骤完成,并读取进 dataset 变量中
    dataset = FramesDataset(is_train=(opt.mode == 'train'),
                            **config['dataset_params'])
    print("Dataset size: {}, repeat number: {}".format(
        len(dataset), config['train_params']['num_repeats']))

    # Create the logging direction
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    # Copy the config file (*.yaml) into the logging path
    if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))):
        copy(opt.config, log_dir)

    if opt.mode == 'train':
        # Start training
        # Look into this part further
        print("Training...")
        train(config, generator, discriminator, kp_detector, opt.checkpoint,
Beispiel #3
0
    bg_predictor = BGMotionPredictor(
        num_channels=config['model_params']['num_channels'],
        **config['model_params']['bg_predictor_params'])
    if torch.cuda.is_available():
        bg_predictor.to(opt.device_ids[0])
    if opt.verbose:
        print(bg_predictor)

    avd_network = AVDNetwork(num_regions=config['model_params']['num_regions'],
                             **config['model_params']['avd_network_params'])
    if torch.cuda.is_available():
        avd_network.to(opt.device_ids[0])
    if opt.verbose:
        print(avd_network)

    dataset = FramesDataset(is_train=(opt.mode.startswith('train')),
                            **config['dataset_params'])
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))):
        copy(opt.config, log_dir)

    if opt.mode == 'train':
        print("Training...")
        train(config, generator, region_predictor, bg_predictor,
              opt.checkpoint, log_dir, dataset, opt.device_ids)
    elif opt.mode == 'train_avd':
        print("Training Animation via Disentaglement...")
        train_avd(config, generator, region_predictor, bg_predictor,
                  avd_network, opt.checkpoint, log_dir, dataset)
    elif opt.mode == 'reconstruction':
        print("Reconstruction...")
Beispiel #4
0
def prediction(config, generator, kp_detector, checkpoint, log_dir):
    dataset = FramesDataset(is_train=True, transform=VideoToTensor(), **config['dataset_params'])
    log_dir = os.path.join(log_dir, 'prediction')
    png_dir = os.path.join(log_dir, 'png')

    if checkpoint is not None:
        Logger.load_cpk(checkpoint, generator=generator, kp_detector=kp_detector)
    else:
        raise AttributeError("Checkpoint should be specified for mode='prediction'.")
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1)

    generator = DataParallelWithCallback(generator)
    kp_detector = DataParallelWithCallback(kp_detector)

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    if not os.path.exists(png_dir):
        os.makedirs(png_dir)

    print("Extracting keypoints...")

    kp_detector.eval()
    generator.eval()

    keypoints_array = []

    prediction_params = config['prediction_params']

    for it, x in tqdm(enumerate(dataloader)):
        if prediction_params['train_size'] is not None:
            if it > prediction_params['train_size']:
                break
        with torch.no_grad():
            keypoints = []
            for i in range(x['video'].shape[2]):
                kp = kp_detector(x['video'][:, :, i:(i + 1)])
                kp = {k: v.data.cpu().numpy() for k, v in kp.items()}
                keypoints.append(kp)
            keypoints_array.append(keypoints)

    predictor = PredictionModule(num_kp=config['model_params']['common_params']['num_kp'],
                                 kp_variance=config['model_params']['common_params']['kp_variance'],
                                 **prediction_params['rnn_params']).cuda()

    num_epochs = prediction_params['num_epochs']
    lr = prediction_params['lr']
    bs = prediction_params['batch_size']
    num_frames = prediction_params['num_frames']
    init_frames = prediction_params['init_frames']

    optimizer = torch.optim.Adam(predictor.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=50)

    kp_dataset = KPDataset(keypoints_array, num_frames=num_frames)

    kp_dataloader = DataLoader(kp_dataset, batch_size=bs)

    print("Training prediction...")
    for _ in trange(num_epochs):
        loss_list = []
        for x in kp_dataloader:
            x = {k: v.cuda() for k, v in x.items()}
            gt = {k: v.clone() for k, v in x.items()}
            for k in x:
                x[k][:, init_frames:] = 0
            prediction = predictor(x)

            loss = sum([torch.abs(gt[k][:, init_frames:] - prediction[k][:, init_frames:]).mean() for k in x])

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            loss_list.append(loss.detach().data.cpu().numpy())

        loss = np.mean(loss_list)
        scheduler.step(loss)

    dataset = FramesDataset(is_train=False, transform=VideoToTensor(), **config['dataset_params'])
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1)

    print("Make predictions...")
    for it, x in tqdm(enumerate(dataloader)):
        with torch.no_grad():
            x['video'] = x['video'][:, :, :num_frames]
            kp_init = kp_detector(x['video'])
            for k in kp_init:
                kp_init[k][:, init_frames:] = 0

            kp_source = kp_detector(x['video'][:, :, :1])

            kp_video = predictor(kp_init)
            for k in kp_video:
                kp_video[k][:, :init_frames] = kp_init[k][:, :init_frames]
            if 'var' in kp_video and prediction_params['predict_variance']:
                kp_video['var'] = kp_init['var'][:, (init_frames - 1):init_frames].repeat(1, kp_video['var'].shape[1],
                                                                                          1, 1, 1)
            out = generate(generator, appearance_image=x['video'][:, :, :1], kp_appearance=kp_source,
                           kp_video=kp_video)

            x['source'] = x['video'][:, :, :1]

            out_video_batch = out['video_prediction'].data.cpu().numpy()
            out_video_batch = np.concatenate(np.transpose(out_video_batch, [0, 2, 3, 4, 1])[0], axis=1)
            imageio.imsave(os.path.join(png_dir, x['name'][0] + '.png'), (255 * out_video_batch).astype(np.uint8))

            image = Visualizer(**config['visualizer_params']).visualize_reconstruction(x, out)
            image_name = x['name'][0] + prediction_params['format']
            imageio.mimsave(os.path.join(log_dir, image_name), image)

            del x, kp_video, kp_source, out
Beispiel #5
0
        print(motion_generator)

    discriminator = Discriminator(
        **config['model_params']['discriminator_params'],
        **config['model_params']['common_params'])
    discriminator.to(opt.device_ids[0])
    if opt.verbose:
        print(discriminator)

    kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
                             **config['model_params']['common_params'])
    kp_detector.to(opt.device_ids[0])
    if opt.verbose:
        print(kp_detector)

    dataset = FramesDataset(is_train=('train' in opt.mode),
                            **config['dataset_params'])

    if opt.mode == 'train':
        print("Training...")
        train(config, generator, discriminator, kp_detector, opt.checkpoint,
              log_dir, dataset, opt.device_ids)
    elif opt.mode == 'reconstruction':
        print("Reconstruction...")
        reconstruction(config, generator, kp_detector, opt.checkpoint, log_dir,
                       dataset)
    elif opt.mode == 'transfer':
        print("Transfer...")
        transfer(config, generator, kp_detector, opt.checkpoint, log_dir,
                 dataset)
    elif opt.mode == "prediction":
        print("Prediction...")
Beispiel #6
0
        config = yaml.load(f)

    log_dir = os.path.join(opt.log_dir,
                           os.path.basename(opt.config).split('.')[0])
    log_dir += ' ' + strftime("%d-%m-%y %H:%M:%S", gmtime())

    reconstruction_module = ReconstructionModule(
        **config['model_params']['reconstruction_module_params'],
        **config['model_params']['common_params'])
    reconstruction_module.to(opt.device_ids[0])
    if opt.verbose:
        print(reconstruction_module)

    segmentation_module = SegmentationModule(
        **config['model_params']['segmentation_module_params'],
        **config['model_params']['common_params'])
    segmentation_module.to(opt.device_ids[0])
    if opt.verbose:
        print(segmentation_module)

    dataset = FramesDataset(is_train=True, **config['dataset_params'])

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))):
        copy(opt.config, log_dir)

    print("Training...")
    train(config, reconstruction_module, segmentation_module, opt.checkpoint,
          log_dir, dataset, opt.device_ids)
Beispiel #7
0
    plt.axes([0.3, 0.3, 0.5, 0.5])
    plt.title('Average Error')
    plt.plot(train, 'k:', label='train')
    plt.plot(validation, 'r', label='validation')
    plt.xlabel('Epoch')
    plt.ylabel('Average Error')
    plt.legend()
    results_dir = basePath + folder
    sample_file_name = file_name + '.png'
    plt.savefig(results_dir + sample_file_name)


if __name__ == "__main__":
    #here i load the video dataset like a group of a pictures
    face_dataset = FramesDataset(
        'file:///media/aleksandr/Files/@Machine/Github/Boiler/train/annotations.csv',
        'file:///media/aleksandr/Files/@Machine/Github/Boiler/train')

    # here i calculate statistics of bubble boundaries appeariance at every coordinate of image with multiplication by 1000
    SummResult = boundaries_summ_conv(face_dataset, 63 * 12000, 64 * 12000,
                                      1000)

    sample = face_dataset[1]
    fig = plt.figure()
    print(1, sample['frame'].shape, sample['heat_transfer'].shape)
    ax = plt.subplot(11 // 3 + 1, 3, 1 + 1)  #coordinates
    plt.tight_layout()
    ax.set_title('Sample #{}'.format(1))
    ax.axis('off')
    print(SummResult)