Exemple #1
0
    #arg.set_defaults(cam = False)

    arg = arg.parse_args()
    arg.fullres = arg.fullres or arg.cam

    if arg.gpu < 0:
        arg.gpu = None

    print 'Start time:', arg.start
    print 'GPU =', arg.gpu

    gpus = [arg.gpu]
    gpus = mu.set_gpus(gpus)

    if arg.duration_mult is not None:
        pr = sep_params.full()
        step = 0.001 * pr.frame_step_ms
        length = 0.001 * pr.frame_length_ms
        arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult

    fn = getattr(sep_params, arg.model)
    pr = fn(vid_dur=arg.clip_dur)

    if arg.clip_dur is None:
        arg.clip_dur = pr.vid_dur
    pr.input_rms = np.sqrt(0.1**2 + 0.1**2)
    print 'Spectrogram samples:', pr.spec_len
    pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name,
                                                          pr.train_iters)

    if not os.path.exists(arg.vid_file):
Exemple #2
0
def main(args):
    arg = argparse.ArgumentParser(
        description='Separate on- and off-screen audio from a video')
    arg.add_argument('vid_file', type=str, help='Video file to process')
    arg.add_argument(
        '--duration_mult',
        type=float,
        default=None,
        help=
        'Multiply the default duration of the audio (i.e. %f) by this amount. Should be a power of 2.'
        % sep_params.VidDur)
    arg.add_argument(
        '--mask',
        type=str,
        default=None,
        help=
        "set to 'l' or 'r' to visually mask the left/right half of the video before processing"
    )
    arg.add_argument('--start',
                     type=float,
                     default=0.,
                     help='How many seconds into the video to start')
    arg.add_argument(
        '--model',
        type=str,
        default='full',
        help='Which variation of othe source separation model to run.')
    arg.add_argument('--gpu', type=int, default=0, help='Set to -1 for no GPU')
    arg.add_argument('--out',
                     type=str,
                     default=None,
                     help='Directory to save videos')
    arg.add_argument('--cam', dest='cam', default=False, action='store_true')

    # undocumented/deprecated options
    arg.add_argument('--clip_dur', type=float, default=None)
    arg.add_argument('--duration', type=float, default=None)
    arg.add_argument('--fullres', type=bool, default=True)
    arg.add_argument('--suffix', type=str, default='')
    arg.add_argument('--max_full_height', type=int, default=600)

    arg = arg.parse_args(args)
    arg.fullres = arg.fullres or arg.cam

    if arg.gpu < 0:
        arg.gpu = None

    print 'Start time:', arg.start
    print 'GPU =', arg.gpu

    gpus = [arg.gpu]
    gpus = mu.set_gpus(gpus)

    if arg.duration_mult is not None:
        pr = sep_params.full()
        step = 0.001 * pr.frame_step_ms
        length = 0.001 * pr.frame_length_ms
        arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult

    fn = getattr(sep_params, arg.model)
    pr = fn(vid_dur=arg.clip_dur)

    if arg.clip_dur is None:
        arg.clip_dur = pr.vid_dur
    pr.input_rms = np.sqrt(0.1**2 + 0.1**2)
    print 'Spectrogram samples:', pr.spec_len
    pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name,
                                                          pr.train_iters)

    if not os.path.exists(arg.vid_file):
        print 'Does not exist:', arg.vid_file
        sys.exit(1)

    if arg.duration is None:
        arg.duration = arg.clip_dur + 0.01

    print arg.duration, arg.clip_dur
    full_dur = arg.duration
    step_dur = arg.clip_dur / 2.
    filled = np.zeros(int(np.ceil(full_dur * pr.samp_sr)), 'bool')
    full_samples_fg = np.zeros(filled.shape, 'float32')
    full_samples_bg = np.zeros(filled.shape, 'float32')
    full_samples_src = np.zeros(filled.shape, 'float32')
    arg.start = ut.make_mod(arg.start, (1. / pr.fps))

    ts = np.arange(arg.start, arg.start + full_dur - arg.clip_dur, step_dur)
    full_ims = [None] * int(np.ceil(full_dur * pr.fps))

    # Process each video chunk
    for t in ut.time_est(ts):
        t = ut.make_mod(t, (1. / pr.fps))
        frame_start = int(t * pr.fps - arg.start * pr.fps)
        ret = run(arg.vid_file,
                  t,
                  arg.clip_dur,
                  pr,
                  gpus[0],
                  mask=arg.mask,
                  arg=arg)
        if ret is None:
            continue
        ims = ret['ims']
        for frame, im in zip(xrange(frame_start, frame_start + len(ims)), ims):
            full_ims[frame] = im

        samples_fg = ret['samples_pred_fg'][:, 0]
        samples_bg = ret['samples_pred_bg'][:, 0]
        samples_src = ret['samples_src'][:, 0]
        samples_src = samples_src[:samples_bg.shape[0]]

        sample_start = int(round((t - arg.start) * pr.samp_sr))
        n = samples_src.shape[0]
        inds = np.arange(sample_start, sample_start + n)
        ok = ~filled[inds]
        full_samples_fg[inds[ok]] = samples_fg[ok]
        full_samples_bg[inds[ok]] = samples_bg[ok]
        full_samples_src[inds[ok]] = samples_src[ok]
        filled[inds] = True

    full_samples_fg = np.clip(full_samples_fg, -1., 1.)
    full_samples_bg = np.clip(full_samples_bg, -1., 1.)
    full_samples_src = np.clip(full_samples_src, -1., 1.)
    full_ims = [x for x in full_ims if x is not None]
    table = [['start =', arg.start], 'fg:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_fg,
                                                   pr.samp_sr)), 'bg:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_bg,
                                                   pr.samp_sr)), 'src:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_src,
                                                   pr.samp_sr))]

    # Write videos
    if arg.out is not None:
        ut.mkdir(arg.out)
        vid_s = arg.vid_file.split('/')[-1].split('.mp4')[0]
        mask_s = '' if arg.mask is None else '_%s' % arg.mask
        cam_s = '' if not arg.cam else '_cam'
        suffix_s = '' if arg.suffix == '' else '_%s' % arg.suffix
        name = '%s%s%s_%s' % (suffix_s, mask_s, cam_s, vid_s)

        def snd(x):
            x = Sound(x, pr.samp_sr)
            x.samples = np.clip(x.samples, -1., 1.)
            return x

        print 'Writing to:', arg.out
        ut.save(pj(arg.out, 'ret%s.pk' % name), ret)
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'fg%s.mp4' % name),
                      snd(full_samples_fg))
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'bg%s.mp4' % name),
                      snd(full_samples_bg))
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'src%s.mp4' % name),
                      snd(full_samples_src))
    else:
        print 'Not writing, since --out was not set'

    print 'Video results:'
    ig.show(table)
    return 'fg%s.mp4' % name, 'bg%s.mp4' % name