Beispiel #1
0
def main():
    # Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--net',
                        type=str,
                        help='Net model class',
                        required=True)
    parser.add_argument('--traindb',
                        type=str,
                        help='Training datasets',
                        nargs='+',
                        choices=split.available_datasets,
                        required=True)
    parser.add_argument('--valdb',
                        type=str,
                        help='Validation datasets',
                        nargs='+',
                        choices=split.available_datasets,
                        required=True)
    parser.add_argument('--face',
                        type=str,
                        help='Face crop or scale',
                        required=True,
                        choices=['scale', 'tight'])
    parser.add_argument('--size',
                        type=int,
                        help='Train patch size',
                        required=True)

    parser.add_argument('--batch',
                        type=int,
                        help='Batch size to fit in GPU memory',
                        default=32)
    parser.add_argument('--lr', type=float, default=1e-5, help='Learning rate')
    parser.add_argument('--valint',
                        type=int,
                        help='Validation interval (iterations)',
                        default=500)
    parser.add_argument(
        '--patience',
        type=int,
        help='Patience before dropping the LR [validation intervals]',
        default=10)
    parser.add_argument('--maxiter',
                        type=int,
                        help='Maximum number of iterations',
                        default=20000)
    parser.add_argument('--init', type=str, help='Weight initialization file')
    parser.add_argument('--scratch',
                        action='store_true',
                        help='Train from scratch')

    parser.add_argument('--trainsamples',
                        type=int,
                        help='Limit the number of train samples per epoch',
                        default=-1)
    parser.add_argument(
        '--valsamples',
        type=int,
        help='Limit the number of validation samples per epoch',
        default=6000)

    parser.add_argument('--logint',
                        type=int,
                        help='Training log interval (iterations)',
                        default=100)
    parser.add_argument('--workers',
                        type=int,
                        help='Num workers for data loaders',
                        default=6)
    parser.add_argument('--device', type=int, help='GPU device id', default=0)
    parser.add_argument('--seed', type=int, help='Random seed', default=0)

    parser.add_argument('--debug', action='store_true', help='Activate debug')
    parser.add_argument('--suffix', type=str, help='Suffix to default tag')

    parser.add_argument('--attention',
                        action='store_true',
                        help='Enable Tensorboard log of attention masks')
    parser.add_argument('--log_dir',
                        type=str,
                        help='Directory for saving the training logs',
                        default='runs/binclass/')
    parser.add_argument('--models_dir',
                        type=str,
                        help='Directory for saving the models weights',
                        default='weights/binclass/')

    args = parser.parse_args()

    # Parse arguments
    net_class = getattr(fornet, args.net)
    train_datasets = args.traindb
    val_datasets = args.valdb
    face_policy = args.face
    face_size = args.size

    batch_size = args.batch
    initial_lr = args.lr
    validation_interval = args.valint
    patience = args.patience
    max_num_iterations = args.maxiter
    initial_model = args.init
    train_from_scratch = args.scratch

    max_train_samples = args.trainsamples
    max_val_samples = args.valsamples

    log_interval = args.logint
    num_workers = args.workers
    device = torch.device('cuda:{:d}'.format(
        args.device)) if torch.cuda.is_available() else torch.device('cpu')
    seed = args.seed

    debug = args.debug
    suffix = args.suffix

    enable_attention = args.attention

    weights_folder = args.models_dir
    logs_folder = args.log_dir

    # Random initialization
    np.random.seed(seed)
    torch.random.manual_seed(seed)

    # Load net
    net: nn.Module = net_class().to(device)

    # Loss and optimizers
    criterion = nn.BCEWithLogitsLoss()

    min_lr = initial_lr * 1e-5
    optimizer = optim.Adam(net.get_trainable_parameters(), lr=initial_lr)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer,
        mode='min',
        factor=0.1,
        patience=patience,
        cooldown=2 * patience,
        min_lr=min_lr,
    )

    tag = utils.make_train_tag(
        net_class=net_class,
        traindb=train_datasets,
        face_policy=face_policy,
        patch_size=face_size,
        seed=seed,
        suffix=suffix,
        debug=debug,
    )

    # Model checkpoint paths
    bestval_path = os.path.join(weights_folder, tag, 'bestval.pth')
    last_path = os.path.join(weights_folder, tag, 'last.pth')
    periodic_path = os.path.join(weights_folder, tag, 'it{:06d}.pth')

    os.makedirs(os.path.join(weights_folder, tag), exist_ok=True)

    # Load model
    val_loss = min_val_loss = 10
    epoch = iteration = 0
    net_state = None
    opt_state = None
    if initial_model is not None:
        # If given load initial model
        print('Loading model form: {}'.format(initial_model))
        state = torch.load(initial_model, map_location='cpu')
        net_state = state['net']
    elif not train_from_scratch and os.path.exists(last_path):
        print('Loading model form: {}'.format(last_path))
        state = torch.load(last_path, map_location='cpu')
        net_state = state['net']
        opt_state = state['opt']
        iteration = state['iteration'] + 1
        epoch = state['epoch']
    if not train_from_scratch and os.path.exists(bestval_path):
        state = torch.load(bestval_path, map_location='cpu')
        min_val_loss = state['val_loss']
    if net_state is not None:
        incomp_keys = net.load_state_dict(net_state, strict=False)
        print(incomp_keys)
    if opt_state is not None:
        for param_group in opt_state['param_groups']:
            param_group['lr'] = initial_lr
        optimizer.load_state_dict(opt_state)

    # Initialize Tensorboard
    logdir = os.path.join(logs_folder, tag)
    if iteration == 0:
        # If training from scratch or initialization remove history if exists
        shutil.rmtree(logdir, ignore_errors=True)

    # TensorboardX instance
    tb = SummaryWriter(logdir=logdir)
    if iteration == 0:
        dummy = torch.randn((1, 3, face_size, face_size), device=device)
        dummy = dummy.to(device)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            tb.add_graph(net, [
                dummy,
            ], verbose=False)

    transformer = utils.get_transformer(face_policy=face_policy,
                                        patch_size=face_size,
                                        net_normalizer=net.get_normalizer(),
                                        train=True)

    # Datasets and data loaders
    print('Loading data')
    splits = split.make_splits(dbs={
        'train': train_datasets,
        'val': val_datasets
    })
    train_dfs = [splits['train'][db][0] for db in splits['train']]
    train_roots = [splits['train'][db][1] for db in splits['train']]
    val_roots = [splits['val'][db][1] for db in splits['val']]
    val_dfs = [splits['val'][db][0] for db in splits['val']]

    train_dataset = FrameFaceIterableDataset(
        roots=train_roots,
        dfs=train_dfs,
        scale=face_policy,
        num_samples=max_train_samples,
        transformer=transformer,
        size=face_size,
    )

    val_dataset = FrameFaceIterableDataset(
        roots=val_roots,
        dfs=val_dfs,
        scale=face_policy,
        num_samples=max_val_samples,
        transformer=transformer,
        size=face_size,
    )

    train_loader = DataLoader(
        train_dataset,
        num_workers=num_workers,
        batch_size=batch_size,
    )

    val_loader = DataLoader(
        val_dataset,
        num_workers=num_workers,
        batch_size=batch_size,
    )

    print('Training samples: {}'.format(len(train_dataset)))
    print('Validation samples: {}'.format(len(val_dataset)))

    if len(train_dataset) == 0:
        print('No training samples. Halt.')
        return

    if len(val_dataset) == 0:
        print('No validation samples. Halt.')
        return

    stop = False
    while not stop:

        # Training
        optimizer.zero_grad()

        train_loss = train_num = 0
        train_pred_list = []
        train_labels_list = []
        for train_batch in tqdm(train_loader,
                                desc='Epoch {:03d}'.format(epoch),
                                leave=False,
                                total=len(train_loader) //
                                train_loader.batch_size):
            net.train()
            batch_data, batch_labels = train_batch

            train_batch_num = len(batch_labels)
            train_num += train_batch_num
            train_labels_list.append(batch_labels.numpy().flatten())

            train_batch_loss, train_batch_pred = batch_forward(
                net, device, criterion, batch_data, batch_labels)
            train_pred_list.append(train_batch_pred.flatten())

            if torch.isnan(train_batch_loss):
                raise ValueError('NaN loss')

            train_loss += train_batch_loss.item() * train_batch_num

            # Optimization
            train_batch_loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            # Logging
            if iteration > 0 and (iteration % log_interval == 0):
                train_loss /= train_num
                tb.add_scalar('train/loss', train_loss, iteration)
                tb.add_scalar('lr', optimizer.param_groups[0]['lr'], iteration)
                tb.add_scalar('epoch', epoch, iteration)

                # Checkpoint
                save_model(net, optimizer, train_loss, val_loss, iteration,
                           batch_size, epoch, last_path)
                train_loss = train_num = 0

            # Validation
            if iteration > 0 and (iteration % validation_interval == 0):

                # Model checkpoint
                save_model(net, optimizer, train_loss, val_loss, iteration,
                           batch_size, epoch, periodic_path.format(iteration))

                # Train cumulative stats
                train_labels = np.concatenate(train_labels_list)
                train_pred = np.concatenate(train_pred_list)
                train_labels_list = []
                train_pred_list = []

                train_roc_auc = roc_auc_score(train_labels, train_pred)
                tb.add_scalar('train/roc_auc', train_roc_auc, iteration)
                tb.add_pr_curve('train/pr', train_labels, train_pred,
                                iteration)

                # Validation
                val_loss = validation_routine(net, device, val_loader,
                                              criterion, tb, iteration, 'val')
                tb.flush()

                # LR Scheduler
                lr_scheduler.step(val_loss)

                # Model checkpoint
                if val_loss < min_val_loss:
                    min_val_loss = val_loss
                    save_model(net, optimizer, train_loss, val_loss, iteration,
                               batch_size, epoch, bestval_path)

                # Attention
                if enable_attention and hasattr(net, 'get_attention'):
                    net.eval()
                    # For each dataframe show the attention for a real,fake couple of frames
                    for df, root, sample_idx, tag in [
                        (train_dfs[0], train_roots[0],
                         train_dfs[0][train_dfs[0]['label'] == False].index[0],
                         'train/att/real'),
                        (train_dfs[0], train_roots[0],
                         train_dfs[0][train_dfs[0]['label'] == True].index[0],
                         'train/att/fake'),
                    ]:
                        record = df.loc[sample_idx]
                        tb_attention(tb, tag, iteration, net, device,
                                     face_size, face_policy, transformer, root,
                                     record)

                if optimizer.param_groups[0]['lr'] == min_lr:
                    print('Reached minimum learning rate. Stopping.')
                    stop = True
                    break

            iteration += 1

            if iteration > max_num_iterations:
                print('Maximum number of iterations reached')
                stop = True
                break

            # End of iteration

        epoch += 1

    # Needed to flush out last events
    tb.close()

    print('Completed')
Beispiel #2
0
# ## Initialization

# In[4]:

print('=' * 20)
model_url = weights.weight_url['{:s}_{:s}'.format(net_model, train_db)]
print('=' * 20)
net = getattr(fornet, net_model)().eval().to(device)
print('=' * 20)
net.load_state_dict(load_url(model_url, map_location=device, check_hash=True))

# In[5]:

transf = utils.get_transformer(face_policy,
                               face_size,
                               net.get_normalizer(),
                               train=False)

# In[6]:

facedet = BlazeFace().to(device)
facedet.load_weights("../blazeface/blazeface.pth")
facedet.load_anchors("../blazeface/anchors.npy")
videoreader = VideoReader(verbose=False)
video_read_fn = lambda x: videoreader.read_frames(x,
                                                  num_frames=frames_per_video)
face_extractor = FaceExtractor(video_read_fn=video_read_fn, facedet=facedet)

# ## Detect faces

# In[7]:
Beispiel #3
0
def main():
    # Args
    parser = argparse.ArgumentParser()

    parser.add_argument('--testsets', type=str, help='Testing datasets', nargs='+', choices=split.available_datasets,
                        required=True)
    parser.add_argument('--testsplits', type=str, help='Test split', nargs='+', default=['val', 'test'],
                        choices=['train', 'val', 'test'])
    parser.add_argument('--faces_df_path', type=str, action='store',
                        help='Path to the Pandas Dataframe obtained from extract_faces.py on the FF++ dataset. '
                             'Required for training/validating on the FF++ dataset.')
    parser.add_argument('--faces_dir', type=str, action='store',
                        help='Path to the directory containing the faces extracted from the FF++ dataset. '
                             'Required for training/validating on the FF++ dataset.')

    # Alternative 1: Specify training params
    parser.add_argument('--net', type=str, help='Net model class')
    parser.add_argument('--traindb', type=str, action='store', help='Dataset used for training')
    parser.add_argument('--face', type=str, help='Face crop or scale', default='scale',
                        choices=['scale', 'tight'])
    parser.add_argument('--size', type=int, help='Train patch size')

    weights_group = parser.add_mutually_exclusive_group(required=True)
    weights_group.add_argument('--weights', type=Path, help='Weight filename', default='bestval.pth')

    # Alternative 2: Specify trained model path
    weights_group.add_argument('--model_path', type=Path, help='Full path of the trained model')

    # Common params
    parser.add_argument('--batch', type=int, help='Batch size to fit in GPU memory', default=128)

    parser.add_argument('--workers', type=int, help='Num workers for data loaders', default=6)
    parser.add_argument('--device', type=int, help='GPU id', default=0)
    parser.add_argument('--seed', type=int, help='Random seed used for training', default=0)

    parser.add_argument('--debug', action='store_true', help='Debug flag', )
    parser.add_argument('--suffix', type=str, help='Suffix to default tag')

    parser.add_argument('--models_dir', type=Path, help='Folder with trained models',
                        default='weights/')

    parser.add_argument('--num_video', type=int, help='Number of real-fake videos to test')
    parser.add_argument('--results_dir', type=Path, help='Output folder',
                        default='results/')

    parser.add_argument('--override', action='store_true', help='Override existing results', )

    args = parser.parse_args()

    device = torch.device('cuda:{}'.format(args.device)) if torch.cuda.is_available() else torch.device('cpu')
    patch_size: int = args.size
    num_workers: int = args.workers
    batch_size: int = args.batch
    net_name: str = args.net
    weights: Path = args.weights
    suffix: str = args.suffix
    face_policy: str = args.face
    models_dir: Path = args.models_dir
    max_num_videos_per_label: int = args.num_video  # number of real-fake videos to test
    model_path: Path = args.model_path
    results_dir: Path = args.results_dir
    debug: bool = args.debug
    override: bool = args.override
    train_datasets = args.traindb
    seed: int = args.seed
    test_sets = args.testsets
    test_splits = args.testsplits
    df_path = args.faces_df_path
    faces_dir = args.faces_dir

    if model_path is None:
        if net_name is None:
            raise RuntimeError('Net name is required if \"model_path\" is not provided')

        model_name = utils.make_train_tag(net_class=getattr(fornet, net_name),
                                          traindb=train_datasets,
                                          face_policy=face_policy,
                                          patch_size=patch_size,
                                          seed=seed,
                                          suffix=suffix,
                                          debug=debug,
                                          )
        model_path = models_dir.joinpath(model_name, weights)

    else:
        # get arguments from the model path
        face_policy = str(model_path).split('face-')[1].split('_')[0]
        patch_size = int(str(model_path).split('size-')[1].split('_')[0])
        net_name = str(model_path).split('net-')[1].split('_')[0]
        model_name = '_'.join(model_path.with_suffix('').parts[-2:])

    # Load net
    net_class = getattr(fornet, net_name)

    # load model
    print('Loading model...')
    state_tmp = torch.load(model_path, map_location='cpu')
    if 'net' not in state_tmp.keys():
        state = OrderedDict({'net': OrderedDict()})
        [state['net'].update({'model.{}'.format(k): v}) for k, v in state_tmp.items()]
    else:
        state = state_tmp
    net: FeatureExtractor = net_class().eval().to(device)

    incomp_keys = net.load_state_dict(state['net'], strict=True)
    print(incomp_keys)
    print('Model loaded!')

    # val loss per-frame
    criterion = nn.BCEWithLogitsLoss(reduction='none')

    # Define data transformers
    test_transformer = utils.get_transformer(face_policy, patch_size, net.get_normalizer(), train=False)

    # datasets and dataloaders (from train_binclass.py)
    print('Loading data...')
    # Check if paths for extracted faces and DataFrames are provided
    for dataset in test_sets:
        if df_path is None or faces_dir is None:
            raise RuntimeError('Specify DataFrame and directory for faces for testing!')
    splits = split.make_splits(faces_df=df_path,faces_dir=faces_dir, dbs={'train': test_sets, 'val': test_sets, 'test': test_sets})
    train_dfs = [splits['train'][db][0] for db in splits['train']]
    train_roots = [splits['train'][db][1] for db in splits['train']]
    val_roots = [splits['val'][db][1] for db in splits['val']]
    val_dfs = [splits['val'][db][0] for db in splits['val']]
    test_dfs = [splits['test'][db][0] for db in splits['test']]
    test_roots = [splits['test'][db][1] for db in splits['test']]

    # Output paths
    out_folder = results_dir.joinpath(model_name)
    out_folder.mkdir(mode=0o775, parents=True, exist_ok=True)

    # Samples selection
    if max_num_videos_per_label is not None:
        dfs_out_train = [select_videos(df, max_num_videos_per_label) for df in train_dfs]
        dfs_out_val = [select_videos(df, max_num_videos_per_label) for df in val_dfs]
        dfs_out_test = [select_videos(df, max_num_videos_per_label) for df in test_dfs]
    else:
        dfs_out_train = train_dfs
        dfs_out_val = val_dfs
        dfs_out_test = test_dfs

    # Extractions list
    extr_list = []
    # Append train and validation set first
    if 'train' in test_splits:
        for idx, dataset in enumerate(test_sets):
            extr_list.append(
                (dfs_out_train[idx], out_folder.joinpath(dataset + '_train.pkl'), train_roots[idx], dataset + ' TRAIN')
            )
    if 'val' in test_splits:
        for idx, dataset in enumerate(test_sets):
            extr_list.append(
                (dfs_out_val[idx], out_folder.joinpath(dataset + '_val.pkl'), val_roots[idx], dataset + ' VAL')
            )
    if 'test' in test_splits:
        for idx, dataset in enumerate(test_sets):
            extr_list.append(
                (dfs_out_test[idx], out_folder.joinpath(dataset + '_test.pkl'), test_roots[idx], dataset + ' TEST')
            )

    for df, df_path, df_root, tag in extr_list:
        if override or not df_path.exists():
            print('\n##### PREDICT VIDEOS FROM {} #####'.format(tag))
            print('Real frames: {}'.format(sum(df['label'] == False)))
            print('Fake frames: {}'.format(sum(df['label'] == True)))
            print('Real videos: {}'.format(df[df['label'] == False]['video'].nunique()))
            print('Fake videos: {}'.format(df[df['label'] == True]['video'].nunique()))
            dataset_out = process_dataset(root=df_root, df=df, net=net, criterion=criterion,
                                          patch_size=patch_size,
                                          face_policy=face_policy, transformer=test_transformer,
                                          batch_size=batch_size,
                                          num_workers=num_workers, device=device, )
            df['score'] = dataset_out['score'].astype(np.float32)
            df['loss'] = dataset_out['loss'].astype(np.float32)
            print('Saving results to: {}'.format(df_path))
            df.to_pickle(str(df_path))

            if debug:
                plt.figure()
                plt.title(tag)
                plt.hist(df[df.label == True].score, bins=100, alpha=0.6, label='FAKE frames')
                plt.hist(df[df.label == False].score, bins=100, alpha=0.6, label='REAL frames')
                plt.legend()

            del (dataset_out)
            del (df)
            gc.collect()

    if debug:
        plt.show()

    print('Completed!')
Beispiel #4
0
def run_nb(modelname):
    # ## Parameters

    # In[2]:
    """
    Choose an architecture between
    - EfficientNetB4
    - EfficientNetB4ST
    - EfficientNetAutoAttB4
    - EfficientNetAutoAttB4ST
    - Xception
    """
    net_model = modelname
    """
    Choose a training dataset between
    - DFDC
    - FFPP
    """
    train_db = 'DFDC'

    # In[3]:

    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    face_policy = 'scale'
    face_size = 224
    frames_per_video = 32

    # ## Initialization

    # In[4]:

    print('=' * 20)
    model_url = weights.weight_url['{:s}_{:s}'.format(net_model, train_db)]
    print('=' * 20)
    net = getattr(fornet, net_model)().eval().to(device)
    print('=' * 20)
    net.load_state_dict(
        load_url(model_url, map_location=device, check_hash=True))

    # In[5]:

    transf = utils.get_transformer(face_policy,
                                   face_size,
                                   net.get_normalizer(),
                                   train=False)

    # In[6]:

    facedet = BlazeFace().to(device)
    facedet.load_weights("../blazeface/blazeface.pth")
    facedet.load_anchors("../blazeface/anchors.npy")
    videoreader = VideoReader(verbose=False)
    video_read_fn = lambda x: videoreader.read_frames(
        x, num_frames=frames_per_video)
    face_extractor = FaceExtractor(video_read_fn=video_read_fn,
                                   facedet=facedet)

    # ## Detect faces

    # In[7]:

    torch.cuda.is_available()

    # In[8]:

    torch.cuda.current_device()

    # In[9]:

    torch.cuda.device(0)

    # In[10]:

    torch.cuda.device_count()

    # In[11]:

    torch.cuda.get_device_name(0)

    # In[12]:

    vid_real_faces = face_extractor.process_video('samples/lynaeydofd.mp4')
    vid_fake_faces = face_extractor.process_video('samples/mqzvfufzoq.mp4')

    # In[13]:

    im_real_face = vid_real_faces[0]['faces'][0]
    im_fake_face = vid_fake_faces[0]['faces'][0]

    # In[14]:

    fig, ax = plt.subplots(1, 2, figsize=(8, 4))

    ax[0].imshow(im_real_face)
    ax[0].set_title('REAL')

    ax[1].imshow(im_fake_face)
    ax[1].set_title('FAKE')

    # ## Predict scores for each frame

    # In[15]:

    # For each frame, we consider the face with the highest confidence score found by BlazeFace (= frame['faces'][0])
    faces_real_t = torch.stack([
        transf(image=frame['faces'][0])['image'] for frame in vid_real_faces
        if len(frame['faces'])
    ])
    faces_fake_t = torch.stack([
        transf(image=frame['faces'][0])['image'] for frame in vid_fake_faces
        if len(frame['faces'])
    ])

    with torch.no_grad():
        faces_real_pred = net(faces_real_t.to(device)).cpu().numpy().flatten()
        faces_fake_pred = net(faces_fake_t.to(device)).cpu().numpy().flatten()

    # In[16]:

    fig, ax = plt.subplots(1, 2, figsize=(12, 4))

    ax[0].stem([f['frame_idx'] for f in vid_real_faces if len(f['faces'])],
               expit(faces_real_pred),
               use_line_collection=True)
    ax[0].set_title('REAL')
    ax[0].set_xlabel('Frame')
    ax[0].set_ylabel('Score')
    ax[0].set_ylim([0, 1])
    ax[0].grid(True)

    ax[1].stem([f['frame_idx'] for f in vid_fake_faces if len(f['faces'])],
               expit(faces_fake_pred),
               use_line_collection=True)
    ax[1].set_title('FAKE')
    ax[1].set_xlabel('Frame')
    ax[1].set_ylabel('Score')
    ax[1].set_ylim([0, 1])
    ax[1].set_yticks([0, 1], ['REAL', 'FAKE'])

    # In[17]:
    """
    Print average scores.
    An average score close to 0 predicts REAL. An average score close to 1 predicts FAKE.
    """
    print('Average score for REAL video: {:.4f}'.format(
        expit(faces_real_pred.mean())))
    print('Average score for FAKE face: {:.4f}'.format(
        expit(faces_fake_pred.mean())))