def extract_frames((vid_file, time, label, vid_idx, im_dir, prev_free_time)): examples = [] with ut.TmpDir() as tmp_dir: free_dur = 0.1 ut.sys_check('ffmpeg -loglevel warning -ss %f -i "%s" -vf scale=%d:%d -t %f -r %d "%s/%%07d.png"' % \ (prev_free_time, vid_file, full_dim, full_dim, free_dur, sample_fps, tmp_dir)) #fname = sorted(ut.glob(pj(tmp_dir, '*.png')))[0] assert len(ut.glob(pj(tmp_dir, '*.png'))), 'no frames for prev_free_time' fname = random.choice(sorted(ut.glob(pj(tmp_dir, '*.png')))) prev_file = pj(im_dir, 'prev_%s_%05d_%d.png' % (vid_idx, 0, label)) ut.sys_check('cp %s %s' % (fname, prev_file)) with ut.TmpDir() as tmp_dir: # ut.sys_check('ffmpeg -i "%s" -vf scale=%d:%d -ss %f -t %f -r %d "%s/%%07d.png"' % \ # (vid_file, full_dim, full_dim, time, # sample_dur_secs, sample_fps, tmp_dir)) ut.sys_check('ffmpeg -loglevel warning -ss %f -i "%s" -vf scale=%d:%d -t %f -r %d "%s/%%07d.png"' % \ (time, vid_file, full_dim, full_dim, sample_dur_secs, sample_fps, tmp_dir)) for frame_idx, fname in enumerate(sorted(ut.glob(pj(tmp_dir, '*.png')))): im_file = pj(im_dir, '%s_%05d_%d.png' % (vid_idx, frame_idx, label)) ut.sys_check('cp %s %s' % (fname, im_file)) examples.append((im_file, prev_file, label, vid_file)) return examples
def get_rec_files(path, needs_done_file): if type(path) != type(''): rec_files = ut.flatten(ut.glob(pj(x, '*.tf')) for x in path) else: rec_files = ut.glob(pj(path, '*.tf')) + ut.glob(pj(path, '*.tfrecords')) if needs_done_file: rec_files = [x for x in rec_files if os.path.exists(x + '_done.txt')] rec_files = sorted(rec_files) return rec_files
def rec_files_from_path(path, num_db_files = None): print 'Path:', path if path.endswith('.txt'): rec_files = ut.read_lines(path) rec_files = filter(os.path.exists, rec_files)[:num_db_files] else: rec_files = sorted(ut.glob(path, '*.tf')) return rec_files
def find_best_iter(pr, gpu, num_iters=10, sample_rate=10, dset_name='val'): [gpu] = mu.set_gpus([gpu]) best_iter = (np.inf, '') model_paths = sorted( ut.glob(pj(pr.train_dir, 'slow', 'net*.index')), key=lambda x: int(x.split('-')[-1].split('.')[0]))[-5:] model_paths = list(reversed(model_paths)) assert len(model_paths), 'no model paths at %s' % pj( pr.train_dir, 'slow', 'net*.index') for model_path in model_paths: model_path = model_path.split('.index')[0] print model_path clf = NetClf(pr, model_path, gpu=gpu) clf.init() if dset_name == 'train': print 'train' tf_files = sorted(ut.glob(pj(pr.train_list, '*.tf'))) elif dset_name == 'val': tf_files = sorted(ut.glob(pj(pr.val_list, '*.tf'))) else: raise RuntimeError() import sep_eval losses = [] for ims, _, pair in sep_eval.pair_data(tf_files, pr): if abs(hash(pair['ytid_gt'])) % sample_rate == 0: res = clf.predict_unmixed(ims, pair['samples_gt'], pair['samples_bg']) # loss = np.mean(np.abs(res['spec_pred_fg'] - res['spec0'])) # loss += np.mean(np.abs(res['spec_pred_bg'] - res['spec1'])) loss = 0. if 'pit' in pr.loss_types: loss += pit_loss([res['spec0']], [res['spec1']], [res['spec_pred_fg']], [res['spec_pred_bg']], pr) if 'fg-bg' in pr.loss_types: loss += np.mean(np.abs(res['spec_pred_fg'] - res['spec0'])) loss += np.mean(np.abs(res['spec_pred_bg'] - res['spec1'])) losses.append(loss) print 'running:', np.mean(losses) loss = np.mean(losses) print model_path, 'Loss:', loss best_iter = min(best_iter, (loss, model_path)) ut.write_lines(pj(pr.resdir, 'model_path.txt'), [best_iter[1]])
def write_data(vid_path, out_dir, train_frac=0.75): im_dir = ut.mkdir(pj(out_dir, 'ims')) in_data = [] meta_files = sorted(ut.glob(vid_path, 'train', '*.txt')) print 'meta files:' for x in meta_files: print x print for meta_idx, meta_file in enumerate(meta_files): last_prev_time = 0. vid_file = meta_file.replace('.txt', '.mp4') for clip_idx, ex in enumerate(ut.read_lines(meta_file)): prev_time = last_prev_time vid_idx = '%05d_%05d' % (meta_idx, clip_idx) print ex s, time = ex.split() time = float(time) if s == 'p': label = 1 elif s == 'n': label = 0 last_prev_time = time else: raise RuntimeError() in_data.append((vid_file, time, label, vid_idx, im_dir, prev_time)) print 'Writing:', len(in_data), 'sequences' meta_examples = ut.flatten(ut.parmap(extract_frames, in_data)) meta_examples = ut.shuffled_with_seed(meta_examples) # add manu examples db_files = sorted( ut.sys_with_stdout('find ../data/manu-press -name "*.hdf5"').split()) db_files = ut.shuffled_with_seed(db_files) print 'Train fraction:', train_frac num_train = int(train_frac * len(db_files)) db_train = db_files[:num_train] db_test = db_files[num_train:] train_db_examples = ut.flatten( ut.parmap(examples_from_db, [(x, im_dir) for x in db_train])) test_db_examples = ut.flatten( ut.parmap(examples_from_db, [(x, im_dir) for x in db_test])) print 'Number of db train examples:', len(train_db_examples) print 'Number of meta examples:', len(meta_examples) train_examples = ut.shuffled_with_seed(meta_examples + train_db_examples) ut.write_lines(pj(out_dir, 'train.csv'), ['%s,%s,%d,%s' % x for x in train_examples]) test_examples = ut.shuffled_with_seed(test_db_examples) ut.write_lines(pj(out_dir, 'test.csv'), ['%s,%s,%d,%s' % x for x in test_examples])
def run(vid_file, start_time, dur, pr, gpu, buf=0.05, mask=None, arg=None, net=None): print pr dur = dur + buf with ut.TmpDir() as vid_path: height_s = '-vf "scale=-2:\'min(%d,ih)\'"' % arg.max_full_height if arg.max_full_height > 0 else '' ut.sys_check( ut.frm( 'ffmpeg -loglevel error -ss %(start_time)s -i "%(vid_file)s" -safe 0 ' '-t %(dur)s -r %(pr.fps)s -vf scale=256:256 "%(vid_path)s/small_%%04d.png"' )) ut.sys_check( ut.frm( 'ffmpeg -loglevel error -ss %(start_time)s -i "%(vid_file)s" -safe 0 ' '-t %(dur)s -r %(pr.fps)s %(height_s)s "%(vid_path)s/full_%%04d.png"' )) ut.sys_check( ut.frm( 'ffmpeg -loglevel error -ss %(start_time)s -i "%(vid_file)s" -safe 0 ' '-t %(dur)s -ar %(pr.samp_sr)s -ac 2 "%(vid_path)s/sound.wav"') ) if arg.fullres: fulls = map( ig.load, sorted(ut.glob(vid_path, 'full_*.png'))[:pr.sampled_frames]) fulls = np.array(fulls) snd = sound.load_sound(pj(vid_path, 'sound.wav')) samples_orig = snd.normalized().samples samples_orig = samples_orig[:pr.num_samples] samples_src = samples_orig.copy() if samples_src.shape[0] < pr.num_samples: return None ims = map(ig.load, sorted(ut.glob(vid_path, 'small_*.png'))) ims = np.array(ims) d = 224 y = x = ims.shape[1] / 2 - d / 2 ims = ims[:, y:y + d, x:x + d] ims = ims[:pr.sampled_frames] if mask == 'l': ims[:, :, :ims.shape[2] / 2] = 128 if arg.fullres: fulls[:, :, :fulls.shape[2] / 2] = 128 elif mask == 'r': ims[:, :, ims.shape[2] / 2:] = 128 if arg.fullres: fulls[:, :, fulls.shape[2] / 2:] = 128 elif mask is None: pass else: raise RuntimeError() samples_src = mu.normalize_rms_np(samples_src[None], pr.input_rms)[0] net.init() ret = net.predict(ims[None], samples_src[None]) samples_pred_fg = ret['samples_pred_fg'][0][:, None] samples_pred_bg = ret['samples_pred_bg'][0][:, None] spec_pred_fg = ret['spec_pred_fg'][0] spec_pred_bg = ret['spec_pred_bg'][0] print spec_pred_bg.shape spec_mix = ret['spec_mix'][0] if arg.cam: cam, vis = find_cam(fulls, samples_orig, arg) else: if arg.fullres: vis = fulls else: vis = ims return dict(ims=vis, samples_pred_fg=samples_pred_fg, samples_pred_bg=samples_pred_bg, samples_mix=ret['samples_mix'][0], samples_src=samples_src, spec_pred_fg=spec_pred_fg, spec_pred_bg=spec_pred_bg, spec_mix=spec_mix)