def test_generator_processor_threads(tmpdir, bees_video, filelists_path, pipeline_config): repo = Repository(str(tmpdir)) pipelines = [ Pipeline([Image, Timestamp], [PipelineResult], **pipeline_config) for _ in range(3) ] gen_processor = GeneratorProcessor(pipelines, lambda: BBBinaryRepoSink(repo, camId=0)) gen = video_generator(bees_video, ts_format="2015", path_filelists=filelists_path) gen_processor(gen) fnames = list(repo.iter_fnames()) assert len(fnames) == 1 num_frames = 0 for fname in repo.iter_fnames(): with open(fname, "rb") as f: fc = FrameContainer.read(f) num_frames += len(list(fc.frames)) assert num_frames == 3
def get_files(path): repo = Repository(path) file = list(repo.iter_fnames()) a = [f.split('/')[-1].split("_")[1] for f in file] l = len(a)/4 npa = np.array(file).reshape(int(l),4) return npa
def test_bbb_repo_iter_fnames_filtered(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to_and_cam'))) span = 200 begin_end_cam_id0 = [(ts, ts + span, 0) for ts in range(0, 10000, span)] begin_end_cam_id1 = [(ts, ts + span, 1) for ts in range(0, 10000, span)] begin_end_cam_id = begin_end_cam_id0 + begin_end_cam_id1 fill_repository(repo, begin_end_cam_id) begin = 2500 end = 5000 cam = 0 fnames = list(repo.iter_fnames(begin, end, cam)) selected_fnames = np.random.choice(fnames, size=5, replace=False) selected_fnames = set([os.path.basename(f) for f in selected_fnames]) class FnameFilter(): def __init__(self, fnames): self.fnames = fnames def __call__(self, fname): return fname in self.fnames filtered_fnames = set(repo.iter_fnames(begin, end, cam, fname_filter=FnameFilter(selected_fnames))) filtered_fnames = set([os.path.basename(f) for f in filtered_fnames]) assert(filtered_fnames == selected_fnames)
def test_generator_processor(tmpdir, bees_image, pipeline_config): def image_generator(): ts = time.time() data_source = DataSource.new_message(filename='bees.jpeg') for i in range(2): img = imread(bees_image) yield data_source, img, ts + i repo = Repository(str(tmpdir)) pipeline = Pipeline([Image, Timestamp], [PipelineResult], **pipeline_config) gen_processor = GeneratorProcessor( pipeline, lambda: BBBinaryRepoSink(repo, camId=2)) gen_processor(image_generator()) gen_processor(image_generator()) fnames = list(repo.iter_fnames()) assert len(fnames) == 2 last_ts = 0 for fname in repo.iter_fnames(): print("{}: {}".format(fname, os.path.getsize(fname))) with open(fname, 'rb') as f: fc = FrameContainer.read(f) assert fc.dataSources[0].filename == 'bees.jpeg' assert last_ts < fc.fromTimestamp last_ts = fc.fromTimestamp
def test_bbb_repo_iter_frames_filtered(tmpdir): repo = Repository(str(tmpdir.join('frames_from_to_filtered'))) repo_start = 0 nFC = 10 span = 1000 nFrames = nFC * span repo_end = repo_start + nFrames begin_end_cam_id = [(ts, ts + span, 0) for ts in range(repo_start, repo_end, span)] for begin, end, cam_id in begin_end_cam_id: fc = build_frame_container(begin, end, cam_id) fc.init('frames', span) for i, tstamp in enumerate(range(begin, end)): frame = fc.frames[i] frame.id = tstamp frame.timestamp = tstamp repo.add(fc) timestamps = [f[0].timestamp for f in repo.iter_frames(begin, end)] selected_timestamps = set(np.random.choice(timestamps, size=5, replace=False)) class TimestepFilter(): def __init__(self, timesteps): self.timesteps = timesteps def __call__(self, frame): return frame.timestamp in self.timesteps filtered_frames = list(repo.iter_frames(begin, end, frame_filter=TimestepFilter(selected_timestamps))) filtered_timestamps = set([f[0].timestamp for f in filtered_frames]) assert(filtered_timestamps == selected_timestamps)
def test_bbb_iter_small_frame_window(tmpdir): """Tests that iter_frame returns frames if time window is small.""" repo = Repository(str(tmpdir.join('frames_from_to_small_window'))) repo_start = 0 nFC = 10 span = 1000 nFrames = nFC * span repo_end = repo_start + nFrames begin_end_cam_id = [(ts, ts + span, 0) for ts in range(repo_start, repo_end, span)] for begin, end, cam_id in begin_end_cam_id: fc = build_frame_container(begin, end, cam_id) fc.init('frames', span) for i, tstamp in enumerate(range(begin, end)): frame = fc.frames[i] frame.id = tstamp frame.timestamp = tstamp repo.add(fc) begin = 1 end = 10 fnames = list(repo.iter_frames(begin, end)) assert(len(fnames) > 0) begin = 1001 end = 1011 fnames = list(repo.iter_fnames(begin, end)) assert(len(fnames) > 0)
def test_bbb_repo_iter_fnames_filtered(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to_and_cam'))) span = 200 begin_end_cam_id0 = [(ts, ts + span, 0) for ts in range(0, 10000, span)] begin_end_cam_id1 = [(ts, ts + span, 1) for ts in range(0, 10000, span)] begin_end_cam_id = begin_end_cam_id0 + begin_end_cam_id1 fill_repository(repo, begin_end_cam_id) begin = 2500 end = 5000 cam = 0 fnames = list(repo.iter_fnames(begin, end, cam)) selected_fnames = np.random.choice(fnames, size=5, replace=False) selected_fnames = set([os.path.basename(f) for f in selected_fnames]) class FnameFilter(): def __init__(self, fnames): self.fnames = fnames def __call__(self, fname): return fname in self.fnames filtered_fnames = set( repo.iter_fnames(begin, end, cam, fname_filter=FnameFilter(selected_fnames))) filtered_fnames = set([os.path.basename(f) for f in filtered_fnames]) assert (filtered_fnames == selected_fnames)
def test_bbb_repo_iter_fnames_from_to_and_cam(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to_and_cam'))) span = 200 begin_end_cam_id0 = [(ts, ts + span, 0) for ts in range(0, 10000, span)] begin_end_cam_id1 = [(ts, ts + span, 1) for ts in range(0, 10000, span)] begin_end_cam_id = begin_end_cam_id0 + begin_end_cam_id1 fill_repository(repo, begin_end_cam_id) begin = 2500 end = 5000 cam = 0 fnames = list(repo.iter_fnames(begin, end, cam)) for fname in fnames: assert os.path.isabs(fname) fbasenames = [os.path.basename(f) for f in fnames] print(begin_end_cam_id) slice_begin_end_cam_id = list( filter(lambda p: begin <= p[1] and p[0] < end and p[2] == cam, begin_end_cam_id)) expected_fnames = [ os.path.basename(repo._get_filename(*p, extension='bbb')) for p in slice_begin_end_cam_id ] assert fbasenames == expected_fnames
def test_generator_processor_video(tmpdir, bees_video, filelists_path, pipeline_config): repo = Repository(str(tmpdir)) pipeline = Pipeline([Image, Timestamp], [PipelineResult], **pipeline_config) gen_processor = GeneratorProcessor(pipeline, lambda: BBBinaryRepoSink(repo, camId=0)) gen = video_generator(bees_video, ts_format="2015", path_filelists=filelists_path) gen_processor(gen) fnames = list(repo.iter_fnames()) assert len(fnames) == 1 last_ts = 0 num_frames = 0 for fname in repo.iter_fnames(): print("{}: {}".format(fname, os.path.getsize(fname))) with open(fname, "rb") as f: fc = FrameContainer.read(f) num_frames += len(list(fc.frames)) assert fc.dataSources[0].filename == os.path.basename(bees_video) assert last_ts < fc.fromTimestamp last_ts = fc.fromTimestamp assert num_frames == 3
def test_bbb_repo_open_frame_container(tmpdir): repo = Repository(str(tmpdir)) cam_id = 1 fc = build_frame_container(1000, 5000, cam_id) repo.add(fc) open_fc = repo.open(2000, 1) assert fc.fromTimestamp == open_fc.fromTimestamp assert fc.toTimestamp == open_fc.toTimestamp
def test_bbb_repo_open_frame_container(tmpdir): repo = Repository(str(tmpdir)) cam_id = 1 fc = build_frame_container(1000, 5000, cam_id) repo.add(fc) open_fc = repo.open(2000, 1) assert fc.fromTimestamp == open_fc.fromTimestamp assert fc.toTimestamp == open_fc.toTimestamp
def test_bbb_repo_iter_fnames_2_files_and_1_symlink_per_directory(tmpdir): repo = Repository(str(tmpdir.join('2_files_and_1_symlink_per_directory'))) span = 500 begin_end_cam_id = [(ts, ts + span + 100, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) fnames = [os.path.basename(f) for f in repo.iter_fnames()] expected_fnames = [os.path.basename( repo._get_filename(*p, extension='bbb')) for p in begin_end_cam_id] assert fnames == expected_fnames
def test_bbb_repo_end_after_last_file(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to'))) span = 1500 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) begin = 2500 end = 11000 fnames = list(repo.iter_fnames(begin, end)) for fname in fnames: assert os.path.isabs(fname)
def test_bbb_repo_end_after_last_file(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to'))) span = 1500 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) begin = 2500 end = 11000 fnames = list(repo.iter_fnames(begin, end)) for fname in fnames: assert os.path.isabs(fname)
def test_bbb_repo_iter_fnames_2_files_and_1_symlink_per_directory(tmpdir): repo = Repository(str(tmpdir.join('2_files_and_1_symlink_per_directory'))) span = 500 begin_end_cam_id = [(ts, ts + span + 100, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) fnames = [os.path.basename(f) for f in repo.iter_fnames()] expected_fnames = [ os.path.basename(repo._get_filename(*p, extension='bbb')) for p in begin_end_cam_id ] assert fnames == expected_fnames
def test_bbb_repo_get_ts_from_path(tmpdir): repo = Repository(str(tmpdir)) path = '1800/10/01/00/00' assert repo._get_time_from_path(path) == datetime(1800, 10, 1, 0, 0, tzinfo=pytz.utc) path = '2017/10/15/23/40' assert repo._get_time_from_path(path) == datetime(2017, 10, 15, 23, 40, tzinfo=pytz.utc) # test inverse to path_for_ts dt = datetime(2017, 10, 15, 23, repo.minute_step, tzinfo=pytz.utc) path = repo._path_for_dt(dt) assert path == '2017/10/15/23/{:02d}'.format(repo.minute_step) assert repo._get_time_from_path(path) == dt
def test_bbb_repo_iter_fnames_missing_directories(tmpdir): repo = Repository(str(tmpdir.join('missing_directories'))) span = 1500 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) fnames = list(repo.iter_fnames()) for fname in fnames: assert os.path.isabs(fname) fbasenames = [os.path.basename(f) for f in fnames] expected_fnames = [os.path.basename( repo._get_filename(*p, extension='bbb')) for p in begin_end_cam_id] assert fbasenames == expected_fnames
def test_bbb_repo_iter_fnames_missing_directories(tmpdir): repo = Repository(str(tmpdir.join('missing_directories'))) span = 1500 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) fnames = list(repo.iter_fnames()) for fname in fnames: assert os.path.isabs(fname) fbasenames = [os.path.basename(f) for f in fnames] expected_fnames = [ os.path.basename(repo._get_filename(*p, extension='bbb')) for p in begin_end_cam_id ] assert fbasenames == expected_fnames
def check_repo(path, bees_video): repo = Repository(path) last_ts = 0 num_frames = 0 for fname in repo.iter_fnames(): print("{}: {}".format(fname, os.path.getsize(fname))) with open(fname, 'rb') as f: fc = FrameContainer.read(f) num_frames += len(list(fc.frames)) assert fc.dataSources[0].filename == os.path.basename(bees_video) assert last_ts < fc.fromTimestamp last_ts = fc.fromTimestamp assert (num_frames == 3)
def check_repo(path, bees_video): repo = Repository(path) last_ts = 0 num_frames = 0 for fname in repo.iter_fnames(): print("{}: {}".format(fname, os.path.getsize(fname))) with open(fname, 'rb') as f: fc = FrameContainer.read(f) num_frames += len(list(fc.frames)) assert fc.dataSources[0].filename == os.path.basename(bees_video) assert last_ts < fc.fromTimestamp last_ts = fc.fromTimestamp assert(num_frames == 3)
def getDF(path, b, e, camID): repo = Repository(path) tpls = [] myid = 0 for frame, fc in repo.iter_frames(begin=b, end=e, cam=camID): for d in frame.detectionsUnion.detectionsDP: d = Detection10(d.idx, d.xpos, d.ypos, d.radius, d.zRotation, list(d.decodedId), myid, frame.timestamp, fc.camId, fc.id, frame.id) tpls.append(d) myid += 1 df = DataFrame(tpls) return df
def test_bbb_create_symlinks(tmpdir): repo = Repository(str(tmpdir)) fname, symlinks = repo._create_file_and_symlinks(0, 60*repo.minute_step*2 + 10, 0, 'bbb') with open(fname, 'w') as f: f.write("hello world!") assert len(symlinks) == 2 assert os.path.exists(symlinks[0]) for symlink in symlinks: with open(symlink) as f: assert f.read() == "hello world!" _, symlinks = repo._create_file_and_symlinks(1045, 1045 + 60*repo.minute_step*3 + 5, 0, 'bbb') assert len(symlinks) == 3 _, symlinks = repo._create_file_and_symlinks(1045, 1045 + repo.minute_step // 2, 0, 'bbb') assert len(symlinks) == 0
def test_generator_processor_threads(tmpdir, bees_video, filelists_path, pipeline_config): repo = Repository(str(tmpdir)) pipelines = [Pipeline([Image, Timestamp], [PipelineResult], **pipeline_config) for _ in range(3)] gen_processor = GeneratorProcessor( pipelines, lambda: BBBinaryRepoSink(repo, camId=0)) gen = video_generator(bees_video, ts_format='2015', path_filelists=filelists_path) gen_processor(gen) fnames = list(repo.iter_fnames()) assert len(fnames) == 1 num_frames = 0 for fname in repo.iter_fnames(): with open(fname, 'rb') as f: fc = FrameContainer.read(f) num_frames += len(list(fc.frames)) assert(num_frames == 3)
def handle(self, *args, **options): repo = Repository(options['repo_path']) fnames = list(repo.iter_fnames()) for fn in try_tqdm(fnames): fc = load_frame_container(fn) fco = FrameContainer(fc_id=fc.id, fc_path=fn, video_name=fc.dataSources[0].filename) fco.save() with transaction.atomic(): for frame in fc.frames: f = Frame(fc=fco, frame_id=frame.id, index=frame.frameIdx, timestamp=frame.timestamp) f.save() # start with python manage.py make_db_repo [repo_path]
def test_bbb_create_symlinks(tmpdir): repo = Repository(str(tmpdir)) fname, symlinks = repo._create_file_and_symlinks( 0, 60 * repo.minute_step * 2 + 10, 0, 'bbb') with open(fname, 'w') as f: f.write("hello world!") assert len(symlinks) == 2 assert os.path.exists(symlinks[0]) for symlink in symlinks: with open(symlink) as f: assert f.read() == "hello world!" _, symlinks = repo._create_file_and_symlinks( 1045, 1045 + 60 * repo.minute_step * 3 + 5, 0, 'bbb') assert len(symlinks) == 3 _, symlinks = repo._create_file_and_symlinks(1045, 1045 + repo.minute_step // 2, 0, 'bbb') assert len(symlinks) == 0
def test_bbb_iter_small_frame_window(tmpdir): """Tests that iter_frame returns frames if time window is small.""" repo = Repository(str(tmpdir.join('frames_from_to_small_window'))) repo_start = 0 nFC = 10 span = 1000 nFrames = nFC * span repo_end = repo_start + nFrames begin_end_cam_id = [(ts, ts + span, 0) for ts in range(repo_start, repo_end, span)] for begin, end, cam_id in begin_end_cam_id: fc = build_frame_container(begin, end, cam_id) fc.init('frames', span) for i, tstamp in enumerate(range(begin, end)): frame = fc.frames[i] frame.id = tstamp frame.timestamp = tstamp repo.add(fc) begin = 1 end = 10 fnames = list(repo.iter_frames(begin, end)) assert (len(fnames) > 0) begin = 1001 end = 1011 fnames = list(repo.iter_fnames(begin, end)) assert (len(fnames) > 0)
def test_bbb_repo_find_single_file_per_timestamp(tmpdir): repo = Repository(str(tmpdir)) span = 60 * 10 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 100000, span)] fill_repository(repo, begin_end_cam_id) assert repo.find(0)[0] == repo._get_filename(0, span, 0, 'bbb') assert repo.find(60 * 10)[0] == repo._get_filename(60 * 10, 60 * 10 + span, 0, 'bbb') assert repo.find(1000000) == []
def test_bbb_repo_iter_fnames_from_to(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to'))) span = 1500 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) begin = 2500 end = 5000 fnames = list(repo.iter_fnames(begin, end)) for fname in fnames: assert os.path.isabs(fname) fbasenames = [os.path.basename(f) for f in fnames] slice_begin_end_cam_id = list( filter(lambda p: begin <= p[1] and p[0] < end, begin_end_cam_id)) print(slice_begin_end_cam_id) expected_fnames = [ os.path.basename(repo._get_filename(*p, extension='bbb')) for p in slice_begin_end_cam_id ] print(expected_fnames) print(fbasenames) assert fbasenames == expected_fnames
def test_bbb_repo_iter_fnames_from_to(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to'))) span = 1500 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 10000, span)] fill_repository(repo, begin_end_cam_id) begin = 2500 end = 5000 fnames = list(repo.iter_fnames(begin, end)) for fname in fnames: assert os.path.isabs(fname) fbasenames = [os.path.basename(f) for f in fnames] slice_begin_end_cam_id = list(filter(lambda p: begin <= p[1] and p[0] < end, begin_end_cam_id)) print(slice_begin_end_cam_id) expected_fnames = [ os.path.basename(repo._get_filename(*p, extension='bbb')) for p in slice_begin_end_cam_id] print(expected_fnames) print(fbasenames) assert fbasenames == expected_fnames
def test_no_detection(tmpdir, pipeline_config): repo = Repository(str(tmpdir)) sink = BBBinaryRepoSink(repo, camId=0) pipeline = Pipeline([Image, Timestamp], [PipelineResult], **pipeline_config) image = np.zeros((3000, 4000), dtype=np.uint8) results = pipeline([image, 0]) data_source = DataSource.new_message(filename='source') sink.add_frame(data_source, results, 0) sink.finish() assert(len(list(repo.iter_fnames())) == 1) for fname in repo.iter_fnames(): with open(fname, 'rb') as f: fc = FrameContainer.read(f) assert(len(fc.frames) == 1) assert fc.dataSources[0].filename == 'source' frame = fc.frames[0] assert(len(frame.detectionsUnion.detectionsDP) == 0)
def __init__(self): self.frames = None self.truth = None self.tracked = None if not os.path.exists(config.DATA_FOLDER): print 'Error: folder not found' return repo = Repository.load(config.DATA_FOLDER) start_time = datetime(config.DATE[0], config.DATE[1], config.DATE[2], config.TIME[0], config.TIME[1], tzinfo=pytz.utc) end_time = datetime(config.DATE[0], config.DATE[1], config.DATE[2], config.TIME[0], config.TIME[1] + 1, tzinfo=pytz.utc) fnames = repo.iter_fnames(begin=start_time, end=end_time) for fname in fnames: frame_container = load_frame_container(fname) cam = frame_container.camId self.frames = list(frame_container.frames) # break because we only load the first fname break if not os.path.exists(config.TRACKED_PATHS_FILE): print 'Error: file not found' return with open(config.TRACKED_PATHS_FILE, 'rb') as tracked_paths_file: tracked_input = pickle.load(tracked_paths_file) self.tracked = tracked_input['paths'] if not os.path.exists(config.TRUTH_PATHS_FILE): print 'Error: file not found' return with open(config.TRUTH_PATHS_FILE, 'rb') as truth_paths_file: truth_input = pickle.load(truth_paths_file) self.truth = truth_input['paths']
def __init__( self ): self.frames = None self.truth = None self.tracked = None if not os.path.exists( config.DATA_FOLDER ): print 'Error: folder not found' return repo = Repository.load( config.DATA_FOLDER ) start_time = datetime( config.DATE[ 0 ], config.DATE[ 1 ], config.DATE[ 2 ], config.TIME[ 0 ], config.TIME[ 1 ], tzinfo=pytz.utc ) end_time = datetime( config.DATE[ 0 ], config.DATE[ 1 ], config.DATE[ 2 ], config.TIME[ 0 ], config.TIME[ 1 ]+1, tzinfo=pytz.utc ) fnames = repo.iter_fnames( begin=start_time, end=end_time ) for fname in fnames: frame_container = load_frame_container( fname ) cam = frame_container.camId self.frames = list( frame_container.frames ) # break because we only load the first fname break if not os.path.exists( config.TRACKED_PATHS_FILE ): print 'Error: file not found' return with open( config.TRACKED_PATHS_FILE, 'rb' ) as tracked_paths_file: tracked_input = pickle.load( tracked_paths_file ) self.tracked = tracked_input[ 'paths' ] if not os.path.exists( config.TRUTH_PATHS_FILE ): print 'Error: file not found' return with open( config.TRUTH_PATHS_FILE, 'rb' ) as truth_paths_file: truth_input = pickle.load( truth_paths_file ) self.truth = truth_input[ 'paths' ]
def test_bbb_repo_find_multiple_file_per_timestamp(tmpdir): repo = Repository(str(tmpdir)) span = 500 begin = 1000 end = 100000 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(begin, end, span)] begin_end_cam_id += [(ts, ts + span, 1) for ts in range(begin, end, span)] fill_repository(repo, begin_end_cam_id) find_and_assert_begin(repo, 0, expect_begin=0, nb_files_found=0) find_and_assert_begin(repo, 1050, expect_begin=1000, nb_files_found=2) find_and_assert_begin(repo, 1499, expect_begin=1000, nb_files_found=2) find_and_assert_begin(repo, 1500, expect_begin=1500, nb_files_found=2)
def test_generator_processor_video(tmpdir, bees_video, filelists_path, pipeline_config): repo = Repository(str(tmpdir)) pipeline = Pipeline([Image, Timestamp], [PipelineResult], **pipeline_config) gen_processor = GeneratorProcessor( pipeline, lambda: BBBinaryRepoSink(repo, camId=0)) gen = video_generator(bees_video, ts_format='2015', path_filelists=filelists_path) gen_processor(gen) fnames = list(repo.iter_fnames()) assert len(fnames) == 1 last_ts = 0 num_frames = 0 for fname in repo.iter_fnames(): print("{}: {}".format(fname, os.path.getsize(fname))) with open(fname, 'rb') as f: fc = FrameContainer.read(f) num_frames += len(list(fc.frames)) assert fc.dataSources[0].filename == os.path.basename(bees_video) assert last_ts < fc.fromTimestamp last_ts = fc.fromTimestamp assert(num_frames == 3)
def test_bbb_repo_iter_fnames_from_to_and_cam(tmpdir): repo = Repository(str(tmpdir.join('complex_from_to_and_cam'))) span = 200 begin_end_cam_id0 = [(ts, ts + span, 0) for ts in range(0, 10000, span)] begin_end_cam_id1 = [(ts, ts + span, 1) for ts in range(0, 10000, span)] begin_end_cam_id = begin_end_cam_id0 + begin_end_cam_id1 fill_repository(repo, begin_end_cam_id) begin = 2500 end = 5000 cam = 0 fnames = list(repo.iter_fnames(begin, end, cam)) for fname in fnames: assert os.path.isabs(fname) fbasenames = [os.path.basename(f) for f in fnames] print(begin_end_cam_id) slice_begin_end_cam_id = list(filter( lambda p: begin <= p[1] and p[0] < end and p[2] == cam, begin_end_cam_id)) expected_fnames = [ os.path.basename(repo._get_filename(*p, extension='bbb')) for p in slice_begin_end_cam_id] assert fbasenames == expected_fnames
def test_bbb_repo_add_frame_container(tmpdir): repo = Repository(str(tmpdir)) cam_id = 1 fc = build_frame_container(1000, 5000, 1) repo.add(fc) fnames = repo.find(1000) expected_fname = repo._get_filename(fc.fromTimestamp, fc.toTimestamp, cam_id, 'bbb') expected_fname = os.path.basename(expected_fname) assert os.path.basename(fnames[0]) == expected_fname fnames = repo.find(1500) assert os.path.basename(fnames[0]) == expected_fname fnames = repo.find(2500) assert os.path.basename(fnames[0]) == expected_fname
def test_bbb_repo_find_single_file_per_timestamp(tmpdir): repo = Repository(str(tmpdir)) span = 60*10 begin_end_cam_id = [(ts, ts + span, 0) for ts in range(0, 100000, span)] fill_repository(repo, begin_end_cam_id) assert repo.find(0)[0] == repo._get_filename(0, span, 0, 'bbb') assert repo.find(60*10)[0] == repo._get_filename(60*10, 60*10+span, 0, 'bbb') assert repo.find(1000000) == []
def test_bbb_repo_path_for_ts(tmpdir): repo = Repository(str(tmpdir)) path = repo._path_for_dt(datetime(1970, 1, 20, 8, 25)) assert path == '1970/01/20/08/20' repo = Repository(str(tmpdir)) path = repo._path_for_dt(datetime(2012, 2, 29, 8, 55)) assert path == '2012/02/29/08/40' now = datetime.now(pytz.utc) print(now.utcoffset()) path = repo._path_for_dt(now) expected_minutes = int( math.floor(now.minute / repo.minute_step) * repo.minute_step) expected_dt = now.replace(minute=expected_minutes, second=0, microsecond=0) print(expected_dt.utcoffset()) assert repo._get_time_from_path(path) == expected_dt
def test_bbb_iter_frames_from_to(tmpdir): """Tests that only frames in given range are iterated.""" repo = Repository(str(tmpdir.join('frames_from_to'))) repo_start = 0 nFC = 10 span = 1000 nFrames = nFC * span repo_end = repo_start + nFrames begin_end_cam_id = [(ts, ts + span, 0) for ts in range(repo_start, repo_end, span)] for begin, end, cam_id in begin_end_cam_id: fc = build_frame_container(begin, end, cam_id) fc.init('frames', span) for i, tstamp in enumerate(range(begin, end)): frame = fc.frames[i] frame.id = tstamp frame.timestamp = tstamp repo.add(fc) def check_tstamp_invariant(begin, end): """Helper to check if begin <= tstamp < end is true for all frames.""" for frame, fc in repo.iter_frames(begin, end): # frame container invariant assert begin < fc.toTimestamp assert fc.fromTimestamp < end # frame invariant assert begin <= frame.timestamp < end # repo_start < start < end < repo_end check_tstamp_invariant(repo_start + 10, repo_end - 10) # start < repo_start < end < repo_end check_tstamp_invariant(repo_start - 10, repo_end - 10) # start < end < repo_start < repo_end check_tstamp_invariant(repo_start - 20, repo_start - 10) # repo_start < start < repo_end < end check_tstamp_invariant(repo_start + 10, repo_end + 10) # repo_start < repo_end < start < end check_tstamp_invariant(repo_end + 10, repo_end + 20) # check whole length all_frames = [f for f, _ in repo.iter_frames()] assert len(all_frames) == nFrames # check with begin = None skip_end = [f for f, _ in repo.iter_frames(end=repo_end - span)] assert len(skip_end) == nFrames - span # check with end = None skip_start = [f for f, _ in repo.iter_frames(begin=span)] assert len(skip_start) == nFrames - span
def test_bbb_repo_add_frame_container(tmpdir): repo = Repository(str(tmpdir)) cam_id = 1 fc = build_frame_container(1000, 5000, 1) repo.add(fc) fnames = repo.find(1000) expected_fname = repo._get_filename(fc.fromTimestamp, fc.toTimestamp, cam_id, 'bbb') expected_fname = os.path.basename(expected_fname) assert os.path.basename(fnames[0]) == expected_fname fnames = repo.find(1500) assert os.path.basename(fnames[0]) == expected_fname fnames = repo.find(2500) assert os.path.basename(fnames[0]) == expected_fname
def example_experiment_repo(request): tmpdir = tempfile.mkdtemp(prefix=os.path.dirname(__file__) + "_tmpdir_") repo = Repository(tmpdir) experiment_duration = 6 * 7 * 24 * 3600 one_video = int(1024 / 3) begin = int(time.time()) end = begin + experiment_duration begin_end_cam_id = [] for cam_id in range(4): begin_end_cam_id.extend([(ts, ts + one_video, cam_id) for ts in range(begin, end, one_video)]) fill_repository(repo, begin_end_cam_id) def fin(): shutil.rmtree(tmpdir) request.addfinalizer(fin) return repo, begin, end
def test_bbb_iter_frames_from_to(tmpdir): """Tests that only frames in given range are iterated.""" repo = Repository(str(tmpdir.join('frames_from_to'))) repo_start = 0 nFC = 10 span = 1000 nFrames = nFC * span repo_end = repo_start + nFrames begin_end_cam_id = [(ts, ts + span, 0) for ts in range(repo_start, repo_end, span)] for begin, end, cam_id in begin_end_cam_id: fc = build_frame_container(begin, end, cam_id) fc.init('frames', span) for i, tstamp in enumerate(range(begin, end)): frame = fc.frames[i] frame.id = tstamp frame.timestamp = tstamp repo.add(fc) def check_tstamp_invariant(begin, end): """Helper to check if begin <= tstamp < end is true for all frames.""" for frame, fc in repo.iter_frames(begin, end): # frame container invariant assert begin < fc.toTimestamp assert fc.fromTimestamp < end # frame invariant assert begin <= frame.timestamp < end # repo_start < start < end < repo_end check_tstamp_invariant(repo_start + 10, repo_end - 10) # start < repo_start < end < repo_end check_tstamp_invariant(repo_start - 10, repo_end - 10) # start < end < repo_start < repo_end check_tstamp_invariant(repo_start - 20, repo_start - 10) # repo_start < start < repo_end < end check_tstamp_invariant(repo_start + 10, repo_end + 10) # repo_start < repo_end < start < end check_tstamp_invariant(repo_end + 10, repo_end + 20) # check whole length all_frames = [f for f, _ in repo.iter_frames()] assert len(all_frames) == nFrames # check with begin = None skip_end = [f for f, _ in repo.iter_frames(end=repo_end - span)] assert len(skip_end) == nFrames - span # check with end = None skip_start = [f for f, _ in repo.iter_frames(begin=span)] assert len(skip_start) == nFrames - span
def __init__(self): self.frames = None self.source = None if not os.path.exists(config.DATA_FOLDER): print 'Error: folder not found' return try: repo = Repository.load(config.DATA_FOLDER) start_time = datetime(config.DATE[0], config.DATE[1], config.DATE[2], config.TIME[0], config.TIME[1], tzinfo=pytz.utc) end_time = datetime(config.DATE[0], config.DATE[1], config.DATE[2], config.TIME[0], config.TIME[1] + 1, tzinfo=pytz.utc) fnames = repo.iter_fnames(begin=start_time, end=end_time) for fname in fnames: frame_container = load_frame_container(fname) cam = frame_container.camId self.frames = list(frame_container.frames) self.source = frame_container.dataSources[0].filename # break because we only load the first fname break except: pass
def process_video(args): config = get_auto_config() logger.info('Initializing {} pipeline(s)'.format(args.num_threads)) plines = [ Pipeline([Image, Timestamp], [PipelineResult], **config) for _ in range(args.num_threads) ] logger.info('Loading bb_binary repository {}'.format( args.repo_output_path)) repo = Repository(args.repo_output_path) camId, _, _ = parse_video_fname(args.video_path) logger.info('Parsed camId = {}'.format(camId)) gen_processor = GeneratorProcessor( plines, lambda: BBBinaryRepoSink(repo, camId=camId)) logger.info('Processing video frames from {}'.format(args.video_path)) gen_processor( video_generator(args.video_path, args.timestamp_format, args.text_root_path))
def process_video(video_path, repo_output_path, ts_format, text_root_path, rank): info = lambda msg: logger.info('Process {}: {}'.format(rank, msg)) import theano from pipeline import Pipeline from pipeline.cmdline import logger from pipeline.pipeline import GeneratorProcessor, get_auto_config from pipeline.io import BBBinaryRepoSink, video_generator from pipeline.objects import PipelineResult, Image, Timestamp from bb_binary import Repository, parse_video_fname repo_output_path = os.path.join(repo_output_path, 'process_{}'.format(rank)) info('Theano compile dir: {}'.format(theano.config.base_compiledir)) info('Output dir: {}'.format(repo_output_path)) config = get_auto_config() info('Initializing pipeline') pipeline = Pipeline([Image, Timestamp], [PipelineResult], **config) info('Loading bb_binary repository {}'.format(repo_output_path)) repo = Repository(repo_output_path) camId, _, _ = parse_video_fname(video_path) info('Parsed camId = {}'.format(camId)) gen_processor = GeneratorProcessor( pipeline, lambda: BBBinaryRepoSink(repo, camId=camId)) log_callback = lambda frame_idx: info('Processing frame {} from {}'.format( frame_idx, video_path)) ffmpeg_stderr_fd = open('process_{}_ffmpeg_stderr.log'.format(rank), 'w') info('Processing video frames from {}'.format(video_path)) gen_processor( video_generator(video_path, ts_format, text_root_path, log_callback, ffmpeg_stderr_fd))
def test_bbb_repo_path_for_ts(tmpdir): repo = Repository(str(tmpdir)) path = repo._path_for_dt(datetime(1970, 1, 20, 8, 25)) assert path == '1970/01/20/08/20' repo = Repository(str(tmpdir)) path = repo._path_for_dt(datetime(2012, 2, 29, 8, 55)) assert path == '2012/02/29/08/40' now = datetime.now(pytz.utc) print(now.utcoffset()) path = repo._path_for_dt(now) expected_minutes = int(math.floor(now.minute / repo.minute_step) * repo.minute_step) expected_dt = now.replace(minute=expected_minutes, second=0, microsecond=0) print(expected_dt.utcoffset()) assert repo._get_time_from_path(path) == expected_dt
def process_video(video_path, repo_output_path, ts_format, text_root_path, rank): info = lambda msg: logger.info(f"Process {rank}: {msg}") import theano from pipeline import Pipeline from pipeline.cmdline import logger from pipeline.pipeline import GeneratorProcessor, get_auto_config from pipeline.io import BBBinaryRepoSink, video_generator from pipeline.objects import PipelineResult, Image, Timestamp from bb_binary import Repository, parse_video_fname repo_output_path = os.path.join(repo_output_path, f"process_{rank}") info(f"Theano compile dir: {theano.config.base_compiledir}") info(f"Output dir: {repo_output_path}") config = get_auto_config() info("Initializing pipeline") pipeline = Pipeline([Image, Timestamp], [PipelineResult], **config) info(f"Loading bb_binary repository {repo_output_path}") repo = Repository(repo_output_path) camId, _, _ = parse_video_fname(video_path) info(f"Parsed camId = {camId}") gen_processor = GeneratorProcessor( pipeline, lambda: BBBinaryRepoSink(repo, camId=camId)) log_callback = lambda frame_idx: info( f"Processing frame {frame_idx} from {video_path}") ffmpeg_stderr_fd = open(f"process_{rank}_ffmpeg_stderr.log", "w") info(f"Processing video frames from {video_path}") gen_processor( video_generator(video_path, ts_format, text_root_path, log_callback, ffmpeg_stderr_fd))
def __init__( self ): self.frames = None self.source = None if not os.path.exists( config.DATA_FOLDER ): print 'Error: folder not found' return try: repo = Repository.load( config.DATA_FOLDER ) start_time = datetime( config.DATE[ 0 ], config.DATE[ 1 ], config.DATE[ 2 ], config.TIME[ 0 ], config.TIME[ 1 ], tzinfo=pytz.utc ) end_time = datetime( config.DATE[ 0 ], config.DATE[ 1 ], config.DATE[ 2 ], config.TIME[ 0 ], config.TIME[ 1 ]+1, tzinfo=pytz.utc ) fnames = repo.iter_fnames( begin=start_time, end=end_time ) for fname in fnames: frame_container = load_frame_container( fname ) cam = frame_container.camId self.frames = list( frame_container.frames ) self.source = frame_container.dataSources[ 0 ].filename # break because we only load the first fname break except: pass
def test_bbb_repo_get_ts_from_path(tmpdir): repo = Repository(str(tmpdir)) path = '1800/10/01/00/00' assert repo._get_time_from_path(path) == datetime(1800, 10, 1, 0, 0, tzinfo=pytz.utc) path = '2017/10/15/23/40' assert repo._get_time_from_path(path) == datetime(2017, 10, 15, 23, 40, tzinfo=pytz.utc) # test inverse to path_for_ts dt = datetime(2017, 10, 15, 23, repo.minute_step, tzinfo=pytz.utc) path = repo._path_for_dt(dt) assert path == '2017/10/15/23/{:02d}'.format(repo.minute_step) assert repo._get_time_from_path(path) == dt
def test_bbb_repo_iter_frames_filtered(tmpdir): repo = Repository(str(tmpdir.join('frames_from_to_filtered'))) repo_start = 0 nFC = 10 span = 1000 nFrames = nFC * span repo_end = repo_start + nFrames begin_end_cam_id = [(ts, ts + span, 0) for ts in range(repo_start, repo_end, span)] for begin, end, cam_id in begin_end_cam_id: fc = build_frame_container(begin, end, cam_id) fc.init('frames', span) for i, tstamp in enumerate(range(begin, end)): frame = fc.frames[i] frame.id = tstamp frame.timestamp = tstamp repo.add(fc) timestamps = [f[0].timestamp for f in repo.iter_frames(begin, end)] selected_timestamps = set( np.random.choice(timestamps, size=5, replace=False)) class TimestepFilter(): def __init__(self, timesteps): self.timesteps = timesteps def __call__(self, frame): return frame.timestamp in self.timesteps filtered_frames = list( repo.iter_frames(begin, end, frame_filter=TimestepFilter(selected_timestamps))) filtered_timestamps = set([f[0].timestamp for f in filtered_frames]) assert (filtered_timestamps == selected_timestamps)
def main(): # loading data if not os.path.exists( config.DATA_FOLDER ): print 'Error: folder not found' return dset_store = ds.DetectionSetStore() repo = Repository.load( config.DATA_FOLDER ) start_time = datetime( config.DATE[ 0 ], config.DATE[ 1 ], config.DATE[ 2 ], config.TIME[ 0 ], config.TIME[ 1 ], tzinfo=pytz.utc ) end_time = datetime( config.DATE[ 0 ], config.DATE[ 1 ], config.DATE[ 2 ], config.TIME[ 0 ], config.TIME[ 1 ]+1, tzinfo=pytz.utc ) fnames = repo.iter_fnames( begin=start_time, end=end_time ) for fname in fnames: frame_container = load_frame_container( fname ) cam = frame_container.camId dset_store.source = frame_container.dataSources[ 0 ].filename previous_timestamp = None frame_index = config.FRAME_START for frame in list( frame_container.frames )[ config.FRAME_START : config.FRAME_END + 1 ]: timestamp = ds.TimeStamp( frame_index, cam ) timestamp.connect_with_previous( previous_timestamp ) previous_timestamp = timestamp dset = ds.DetectionSet() dset_store.store[ timestamp ] = dset data = convert_frame_to_numpy( frame ) for detection_data in data: dset.add_detection( ds.Detection( detection_data[ 'idx' ], timestamp, np.array( [ detection_data[ 'ypos' ], detection_data[ 'xpos' ] ] ), # rotated, otherwise will be portrait orientation detection_data[ 'localizerSaliency' ], detection_data[ 'decodedId' ][::-1] # reversed, we want least significant bit last ) ) dset.build_kd_tree() frame_index += 1 # break because we only load the first fname break # loading truth if not os.path.isfile( config.PATHS_FILE ): print 'Error: file not found' return with open( config.PATHS_FILE, 'rb' ) as paths_file: input = pickle.load( paths_file ) if input[ 'source' ] != dset_store.source: print 'Error: data sources do not match' return paths_input = input[ 'paths' ] # match for tag_id in paths_input.keys(): for path_id in paths_input[ tag_id ].keys(): for frame,detection_data in paths_input[ tag_id ][ path_id ].items(): old_detection_id, pos_x, pos_y, readability = detection_data timestamp = dset_store.get_timestamp( frame ) new_detection_id = None distance = None if timestamp is not None and readability < 3: dset = dset_store.get( timestamp ) distances, indices = dset.kd_tree.query( [ pos_x, pos_y ], k=1 ) distance = distances[ 0 ][ 0 ] index = indices[ 0 ][ 0 ] if distance <= MATCH_DISTANCE_LIMIT: new_detection_id = index # use this if you're matching to the same output for test purposes: #if new_detection_id != old_detection_id: # print 'mismatch old: ' + str(old_detection_id) + ', new: ' + str(new_detection_id) paths_input[ tag_id ][ path_id ][ frame ] = ( new_detection_id, pos_x, pos_y, readability ) # saving truth with open( config.PATHS_FILE, 'wb' ) as paths_file: pickle.dump( input, paths_file ) print 'done'
def test_bbb_repo_iter_fnames_empty(tmpdir): repo = Repository(str(tmpdir.join('empty'))) assert list(repo.iter_fnames()) == []
def load_data( self ): if not os.path.exists( config.DATA_FOLDER ): print 'Error: folder not found' return self.block_inputs( True ) self.dset_store = ds.DetectionSetStore() self.path_manager = None self.paths_load_progress.setValue( 0 ) self.paths_load_label.setText( '' ) try: repo = Repository( config.DATA_FOLDER ) start_time = datetime( config.DATE[ 0 ], config.DATE[ 1 ], config.DATE[ 2 ], config.TIME[ 0 ], config.TIME[ 1 ], tzinfo=pytz.utc ) fnames = repo.iter_fnames( begin=start_time ) for fname in fnames: frame_container = load_frame_container( fname ) cam = frame_container.camId #frame_container.fromTimestamp # already available #frame_container.toTimestamp # already available self.dset_store.source = frame_container.dataSources[ 0 ].filename previous_timestamp = None self.data_load_progress.setMaximum( config.FRAME_END + 1 - config.FRAME_START ) self.app.processEvents() frame_index = config.FRAME_START for frame in list( frame_container.frames )[ config.FRAME_START : config.FRAME_END + 1 ]: #timestamp = frame.timestamp # not included yet #frame.id # not included yet timestamp = ds.TimeStamp( frame_index, cam ) timestamp.connect_with_previous( previous_timestamp ) previous_timestamp = timestamp dset = ds.DetectionSet() self.dset_store.store[ timestamp ] = dset data = convert_frame_to_numpy( frame ) for detection_data in data: dset.add_detection( ds.Detection( detection_data[ 'idx' ], timestamp, np.array( [ detection_data[ 'ypos' ], detection_data[ 'xpos' ] ] ), # rotated, otherwise will be portrait orientation detection_data[ 'localizerSaliency' ], detection_data[ 'decodedId' ][::-1] # reversed, we want least significant bit last ) ) frame_index += 1 self.data_load_progress.setValue( frame_index - config.FRAME_START ) self.app.processEvents() self.data_load_label.setText( str( len( self.dset_store.store ) ) + ' frames loaded' ) self.app.processEvents() # break because we only load the first fname break except: pass self.block_inputs( False )