Esempio n. 1
0
def train(pr, gpus, restore = False, restore_opt = True, 
          num_gpus = None, profile = False):
  print pr
  gpus = tfu.set_gpus(gpus)
  with tf.Graph().as_default():
    config = tf.ConfigProto(allow_soft_placement = True)
    sess = tf.InteractiveSession(config = config)
    gpus = gpus[:num_gpus]
    model = Model(pr, sess, gpus, profile = profile)
    model.make_train_model()

    if restore:
      model.restore(restore_opt = restore_opt)
    elif pr.init_path is not None:
      init_ops = []
      if pr.net_type == 'i3d':
        opt_names = ['Adam', 'beta1_power', 'beta2_power', 'Momentum']
        rgb_variable_map = {}
        for variable in tf.global_variables():
          if any(x in variable.name for x in opt_names):
            print 'Skipping:', variable.name
            continue
          if pr.init_from_2d:
            if variable.name.split('/')[0] == 'RGB':
              # if 'moving_mean' in variable.name or 'moving_variance' in variable.name:
              #   continue
              cp_name = (
                variable.name
                .replace('RGB/inception_i3d', 'InceptionV1')
                .replace('Conv3d', 'Conv2d')
                .replace('batch_norm', 'BatchNorm')
                .replace('conv_3d/w', 'weights')
                .replace(':0', ''))
              print 'shape of', variable.name, shape(variable)
              v = tf.get_variable(cp_name, shape(variable)[1:], tf.float32)
              #rgb_variable_map[cp_name] = variable
              rgb_variable_map[cp_name] = v
              n = shape(v, 0)
              init_ops.append(variable.assign(1.0/float(n) * tf.tile(ed(v, 0), (n, 1, 1, 1, 1))))
          else:
            if variable.name.split('/')[0] == 'RGB':
              rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)
        rgb_saver.restore(sess, pr.init_path)
        for x in init_ops:
          print 'Running:', x
          sess.run(x)
      else:
        print 'Restoring from init_path:', pr.init_path
        model.restore(pr.init_path, ul_only = True, restore_opt = False)

    tf.get_default_graph().finalize()

    model.train()
Esempio n. 2
0
def find_best_iter(pr, gpu, num_iters=10, sample_rate=10, dset_name='val'):
    [gpu] = mu.set_gpus([gpu])
    best_iter = (np.inf, '')
    model_paths = sorted(
        ut.glob(pj(pr.train_dir, 'slow', 'net*.index')),
        key=lambda x: int(x.split('-')[-1].split('.')[0]))[-5:]
    model_paths = list(reversed(model_paths))
    assert len(model_paths), 'no model paths at %s' % pj(
        pr.train_dir, 'slow', 'net*.index')
    for model_path in model_paths:
        model_path = model_path.split('.index')[0]
        print model_path
        clf = NetClf(pr, model_path, gpu=gpu)
        clf.init()
        if dset_name == 'train':
            print 'train'
            tf_files = sorted(ut.glob(pj(pr.train_list, '*.tf')))
        elif dset_name == 'val':
            tf_files = sorted(ut.glob(pj(pr.val_list, '*.tf')))
        else:
            raise RuntimeError()

        import sep_eval
        losses = []
        for ims, _, pair in sep_eval.pair_data(tf_files, pr):
            if abs(hash(pair['ytid_gt'])) % sample_rate == 0:
                res = clf.predict_unmixed(ims, pair['samples_gt'],
                                          pair['samples_bg'])
                # loss = np.mean(np.abs(res['spec_pred_fg'] - res['spec0']))
                # loss += np.mean(np.abs(res['spec_pred_bg'] - res['spec1']))
                loss = 0.
                if 'pit' in pr.loss_types:
                    loss += pit_loss([res['spec0']], [res['spec1']],
                                     [res['spec_pred_fg']],
                                     [res['spec_pred_bg']], pr)
                if 'fg-bg' in pr.loss_types:
                    loss += np.mean(np.abs(res['spec_pred_fg'] - res['spec0']))
                    loss += np.mean(np.abs(res['spec_pred_bg'] - res['spec1']))
                losses.append(loss)
                print 'running:', np.mean(losses)
                loss = np.mean(losses)
        print model_path, 'Loss:', loss
        best_iter = min(best_iter, (loss, model_path))
    ut.write_lines(pj(pr.resdir, 'model_path.txt'), [best_iter[1]])
Esempio n. 3
0
def train(pr, gpus, restore=False, restore_opt=True):
    print pr
    gpus = mu.set_gpus(gpus)
    with tf.Graph().as_default():
        config = tf.ConfigProto(allow_soft_placement=True)
        #config = tf.ConfigProto()
        sess = tf.InteractiveSession(config=config)
        model = Model(pr, sess, gpus)
        model.make_model()

        if restore:
            model.restore(restore_opt=restore_opt)
        elif pr.init_path is not None:
            model.restore(pr.init_path,
                          restore_resnet18_blocks=False,
                          restore_opt=False)

        tf.get_default_graph().finalize()
        model.train()
Esempio n. 4
0
def train(pr, gpus, restore=False, restore_opt=True, profile=False):
    print pr
    gpus = mu.set_gpus(gpus)
    with tf.Graph().as_default():
        config = tf.ConfigProto(allow_soft_placement=True)
        sess = tf.InteractiveSession(config=config)
        model = Model(pr, sess, gpus, profile=profile)
        model.make_model()
        if restore:
            model.restore(restore_opt=restore_opt)
        elif pr.init_path is not None:
            if pr.init_type in ['shift', 'sep']:
                model.restore(pr.init_path,
                              restore_opt=False,
                              init_type=pr.init_type)
            elif pr.init_type == 'i3d':
                opt_names = ['Adam', 'beta1_power', 'beta2_power', 'Momentum']
                rgb_variable_map = {}
                for variable in tf.global_variables():
                    if any(x in variable.name for x in opt_names):
                        print 'Skipping:', variable.name
                        continue
                    if variable.name.split('/')[0] == 'RGB':
                        rgb_variable_map[variable.name.replace(':0',
                                                               '')] = variable
                        print 'Restoring:', variable.name
                rgb_saver = tf.train.Saver(var_list=rgb_variable_map,
                                           reshape=True)
                rgb_saver.restore(sess, pr.init_path)
            elif pr.init_type == 'scratch':
                pass
            else:
                raise RuntimeError()

        tf.get_default_graph().finalize()
        model.train()
Esempio n. 5
0
    arg.add_argument('--suffix', type=str, default='')
    arg.add_argument('--max_full_height', type=int, default=600)

    #arg.set_defaults(cam = False)

    arg = arg.parse_args()
    arg.fullres = arg.fullres or arg.cam

    if arg.gpu < 0:
        arg.gpu = None

    print 'Start time:', arg.start
    print 'GPU =', arg.gpu

    gpus = [arg.gpu]
    gpus = mu.set_gpus(gpus)

    if arg.duration_mult is not None:
        pr = sep_params.full()
        step = 0.001 * pr.frame_step_ms
        length = 0.001 * pr.frame_length_ms
        arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult

    fn = getattr(sep_params, arg.model)
    pr = fn(vid_dur=arg.clip_dur)

    if arg.clip_dur is None:
        arg.clip_dur = pr.vid_dur
    pr.input_rms = np.sqrt(0.1**2 + 0.1**2)
    print 'Spectrogram samples:', pr.spec_len
    pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name,
Esempio n. 6
0
def main(args):
    arg = argparse.ArgumentParser(
        description='Separate on- and off-screen audio from a video')
    arg.add_argument('vid_file', type=str, help='Video file to process')
    arg.add_argument(
        '--duration_mult',
        type=float,
        default=None,
        help=
        'Multiply the default duration of the audio (i.e. %f) by this amount. Should be a power of 2.'
        % sep_params.VidDur)
    arg.add_argument(
        '--mask',
        type=str,
        default=None,
        help=
        "set to 'l' or 'r' to visually mask the left/right half of the video before processing"
    )
    arg.add_argument('--start',
                     type=float,
                     default=0.,
                     help='How many seconds into the video to start')
    arg.add_argument(
        '--model',
        type=str,
        default='full',
        help='Which variation of othe source separation model to run.')
    arg.add_argument('--gpu', type=int, default=0, help='Set to -1 for no GPU')
    arg.add_argument('--out',
                     type=str,
                     default=None,
                     help='Directory to save videos')
    arg.add_argument('--cam', dest='cam', default=False, action='store_true')

    # undocumented/deprecated options
    arg.add_argument('--clip_dur', type=float, default=None)
    arg.add_argument('--duration', type=float, default=None)
    arg.add_argument('--fullres', type=bool, default=True)
    arg.add_argument('--suffix', type=str, default='')
    arg.add_argument('--max_full_height', type=int, default=600)

    arg = arg.parse_args(args)
    arg.fullres = arg.fullres or arg.cam

    if arg.gpu < 0:
        arg.gpu = None

    print 'Start time:', arg.start
    print 'GPU =', arg.gpu

    gpus = [arg.gpu]
    gpus = mu.set_gpus(gpus)

    if arg.duration_mult is not None:
        pr = sep_params.full()
        step = 0.001 * pr.frame_step_ms
        length = 0.001 * pr.frame_length_ms
        arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult

    fn = getattr(sep_params, arg.model)
    pr = fn(vid_dur=arg.clip_dur)

    if arg.clip_dur is None:
        arg.clip_dur = pr.vid_dur
    pr.input_rms = np.sqrt(0.1**2 + 0.1**2)
    print 'Spectrogram samples:', pr.spec_len
    pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name,
                                                          pr.train_iters)

    if not os.path.exists(arg.vid_file):
        print 'Does not exist:', arg.vid_file
        sys.exit(1)

    if arg.duration is None:
        arg.duration = arg.clip_dur + 0.01

    print arg.duration, arg.clip_dur
    full_dur = arg.duration
    step_dur = arg.clip_dur / 2.
    filled = np.zeros(int(np.ceil(full_dur * pr.samp_sr)), 'bool')
    full_samples_fg = np.zeros(filled.shape, 'float32')
    full_samples_bg = np.zeros(filled.shape, 'float32')
    full_samples_src = np.zeros(filled.shape, 'float32')
    arg.start = ut.make_mod(arg.start, (1. / pr.fps))

    ts = np.arange(arg.start, arg.start + full_dur - arg.clip_dur, step_dur)
    full_ims = [None] * int(np.ceil(full_dur * pr.fps))

    # Process each video chunk
    for t in ut.time_est(ts):
        t = ut.make_mod(t, (1. / pr.fps))
        frame_start = int(t * pr.fps - arg.start * pr.fps)
        ret = run(arg.vid_file,
                  t,
                  arg.clip_dur,
                  pr,
                  gpus[0],
                  mask=arg.mask,
                  arg=arg)
        if ret is None:
            continue
        ims = ret['ims']
        for frame, im in zip(xrange(frame_start, frame_start + len(ims)), ims):
            full_ims[frame] = im

        samples_fg = ret['samples_pred_fg'][:, 0]
        samples_bg = ret['samples_pred_bg'][:, 0]
        samples_src = ret['samples_src'][:, 0]
        samples_src = samples_src[:samples_bg.shape[0]]

        sample_start = int(round((t - arg.start) * pr.samp_sr))
        n = samples_src.shape[0]
        inds = np.arange(sample_start, sample_start + n)
        ok = ~filled[inds]
        full_samples_fg[inds[ok]] = samples_fg[ok]
        full_samples_bg[inds[ok]] = samples_bg[ok]
        full_samples_src[inds[ok]] = samples_src[ok]
        filled[inds] = True

    full_samples_fg = np.clip(full_samples_fg, -1., 1.)
    full_samples_bg = np.clip(full_samples_bg, -1., 1.)
    full_samples_src = np.clip(full_samples_src, -1., 1.)
    full_ims = [x for x in full_ims if x is not None]
    table = [['start =', arg.start], 'fg:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_fg,
                                                   pr.samp_sr)), 'bg:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_bg,
                                                   pr.samp_sr)), 'src:',
             imtable.Video(full_ims, pr.fps, Sound(full_samples_src,
                                                   pr.samp_sr))]

    # Write videos
    if arg.out is not None:
        ut.mkdir(arg.out)
        vid_s = arg.vid_file.split('/')[-1].split('.mp4')[0]
        mask_s = '' if arg.mask is None else '_%s' % arg.mask
        cam_s = '' if not arg.cam else '_cam'
        suffix_s = '' if arg.suffix == '' else '_%s' % arg.suffix
        name = '%s%s%s_%s' % (suffix_s, mask_s, cam_s, vid_s)

        def snd(x):
            x = Sound(x, pr.samp_sr)
            x.samples = np.clip(x.samples, -1., 1.)
            return x

        print 'Writing to:', arg.out
        ut.save(pj(arg.out, 'ret%s.pk' % name), ret)
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'fg%s.mp4' % name),
                      snd(full_samples_fg))
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'bg%s.mp4' % name),
                      snd(full_samples_bg))
        ut.make_video(full_ims, pr.fps, pj(arg.out, 'src%s.mp4' % name),
                      snd(full_samples_src))
    else:
        print 'Not writing, since --out was not set'

    print 'Video results:'
    ig.show(table)
    return 'fg%s.mp4' % name, 'bg%s.mp4' % name
Esempio n. 7
0
        Arg.out = r'E:\Avinash\miscellaneous\project\av_segmentation\multisensory\results'
        Arg.cam = True
        Arg.adapt_cam_thresh = True
        Arg.max_cam_thresh = 35
        Arg.clip_dur = None
        Arg.duration = None
        Arg.fullres = True
        Arg.suffix = ''
        Arg.max_full_height = 600
        Arg.fullres = Arg.fullres or Arg.cam
        if Arg.gpu < 0:
            Arg.gpu = None


arg = Arg(vid_file)
gpus = mu.set_gpus([arg.gpu])

if arg.duration_mult is not None:
    pr = sep_params.full()
    step = 0.001 * pr.frame_step_ms
    length = 0.001 * pr.frame_length_ms
    arg.clip_dur = length + step * (0.5 + pr.spec_len) * arg.duration_mult

fn = getattr(sep_params, arg.model)
pr = fn(vid_dur=arg.clip_dur)

if arg.clip_dur is None:
    arg.clip_dur = pr.vid_dur
pr.input_rms = np.sqrt(0.1**2 + 0.1**2)
print('Spectrogram samples:', pr.spec_len)
pr.model_path = '../results/nets/sep/%s/net.tf-%d' % (pr.name, pr.train_iters)