def list_all_files(fldr, extn='.mp4'): coll = [] for root, dirs, files in os.walk(fldr): for file in files: if file.lower().endswith(extn.lower()): p = cpath(os.path.join(root, file)) coll.append(p) return coll
def save_processed_videos_list(vid_paths, out_fldr_logs): ofl = os.path.join(out_fldr_logs, datetime.now().date().isoformat()) if not os.path.exists(ofl): os.makedirs(ofl) fname = 'run-{}.log'.format(time.time()) fpath = os.path.join(ofl, fname) with open(fpath, 'wt') as f: txt = '\n'.join(vid_paths) f.write(cpath(txt))
def load_processed_videos_list(out_fldr_logs): img_paths = [] if os.path.exists(out_fldr_logs): for fpath in list_all_files(out_fldr_logs, extn='.log'): with open(fpath, 'rt') as f: imgs = [cpath(i).strip() \ for i in f.read().split('\n') if i and i.strip()] if imgs: img_paths.extend(imgs) return img_paths
def gen_new_videos_list(fldr, log_fldr, reprocess=False, limit_size=999999): vid_paths = list_all_files(fldr, extn='.mp4') print('videos found: {}'.format(len(vid_paths))) if not reprocess: dupes = [] for exist_path in load_processed_videos_list(log_fldr): exist_fname = cpath(exist_path).split('/')[-1] for f in vid_paths: if f.endswith(exist_fname): dupes.append(f) vid_paths = list(sorted(set(vid_paths) - set(dupes))) if len(vid_paths) > limit_size: print('Limiting batch size: {}'.format(limit_size)) vid_paths = vid_paths[:limit_size] print('Skip extracted videos: [{}] Processing:[{}]'.format( len(dupes), len(vid_paths))) return vid_paths
def parse_mp4_fname(f): fname = cpath(f).split('/')[-1].replace('.mp4', '') return fname