def __init__(self, fn, canonicalize=True, only_union_types=False, sort_by_time=False, dat=None): self.data_version = None self._only_union_types = only_union_types ext = None if not dat: _, ext = os.path.splitext(urllib.parse.urlparse(fn).path) if ext not in ('', '.bz2'): # old rlogs weren't bz2 compressed raise Exception(f"unknown extension {ext}") with FileReader(fn) as f: dat = f.read() if ext == ".bz2" or dat.startswith(b'BZh9'): dat = bz2.decompress(dat) ents = capnp_log.Event.read_multiple_bytes(dat) _ents = [] try: for e in ents: _ents.append(e) except capnp.KjException: warnings.warn("Corrupted events detected", RuntimeWarning) self._ents = list( sorted(_ents, key=lambda x: x.logMonoTime ) if sort_by_time else _ents) self._ts = [x.logMonoTime for x in self._ents]
def __init__(self, fn, canonicalize=True, only_union_types=False, sort_by_time=False): data_version = None _, ext = os.path.splitext(urllib.parse.urlparse(fn).path) with FileReader(fn) as f: dat = f.read() if ext == "": # old rlogs weren't bz2 compressed ents = capnp_log.Event.read_multiple_bytes(dat) elif ext == ".bz2": dat = bz2.decompress(dat) ents = capnp_log.Event.read_multiple_bytes(dat) else: raise Exception(f"unknown extension {ext}") self._ents = list( sorted(ents, key=lambda x: x.logMonoTime) if sort_by_time else ents ) self._ts = [x.logMonoTime for x in self._ents] self.data_version = data_version self._only_union_types = only_union_types
def __init__(self, fn, canonicalize=True): _, ext = os.path.splitext(fn) data_version = None with FileReader(fn) as f: dat = f.read() # decompress file if ext == ".gz" and ("log_" in fn or "log2" in fn): dat = zlib.decompress(dat, zlib.MAX_WBITS | 32) elif ext == ".bz2": dat = bz2.decompress(dat) elif ext == ".7z": with libarchive.public.memory_reader(dat) as aa: mdat = [] for it in aa: for bb in it.get_blocks(): mdat.append(bb) dat = ''.join(mdat) # TODO: extension shouln't be a proxy for DeviceType if ext == "": if dat[0] == "[": needs_conversion = True ents = [json.loads(x) for x in dat.strip().split("\n")[:-1]] if "_" in fn: data_version = fn.split("_")[1] else: # old rlogs weren't bz2 compressed needs_conversion = False ents = event_read_multiple_bytes(dat) elif ext == ".gz": if "log_" in fn: # Zero data file. ents = [json.loads(x) for x in dat.strip().split("\n")[:-1]] needs_conversion = True elif "log2" in fn: needs_conversion = False ents = event_read_multiple_bytes(dat) else: raise Exception("unknown extension") elif ext == ".bz2": needs_conversion = False ents = event_read_multiple_bytes(dat) elif ext == ".7z": needs_conversion = True ents = [json.loads(x) for x in dat.strip().split("\n")] else: raise Exception("unknown extension") if needs_conversion: # TODO: should we call convert_old_pkt_to_new to generate this? self._ts = [x[0][0] * 1e9 for x in ents] else: self._ts = [x.logMonoTime for x in ents] self.data_version = data_version self._do_conversion = needs_conversion and canonicalize self._ents = ents
def index_stream(fn, typ): assert typ in ("hevc", ) with FileReader(fn) as f: assert os.path.exists(f.name), fn index, prefix = vidindex(f.name, typ) probe = ffprobe(f.name, typ) return {'index': index, 'global_prefix': prefix, 'probe': probe}
def write_thread(self): try: with FileReader(self.fn) as f: while True: r = f.read(1024 * 1024) if len(r) == 0: break self.proc.stdin.write(r) finally: self.proc.stdin.close()
def write_thread(self): try: with FileReader(self.fn) as f: while True: r = f.read(1024 * 1024) if len(r) == 0: break self.proc.stdin.write(r) except BrokenPipeError: pass finally: self.proc.stdin.close()
def fingerprint_video(fn): with FileReader(fn) as f: header = f.read(4) if len(header) == 0: raise DataUnreadableError(f"{fn} is empty") elif header == b"\x00\xc0\x12\x00": return FrameType.raw elif header == b"\x00\x00\x00\x01": if 'hevc' in fn: return FrameType.h265_stream else: raise NotImplementedError(fn) else: raise NotImplementedError(fn)
def unbzip_frames(url): with FileReader(url) as f: dat = f.read() data = bz2.decompress(dat) res = [] for y_start in range(0, len(data), FRAME_WIDTH * FRAME_HEIGHT + UV_SIZE * 2): u_start = y_start + FRAME_WIDTH * FRAME_HEIGHT v_start = u_start + UV_SIZE y = np.frombuffer(data[y_start: u_start], dtype=np.uint8).reshape((FRAME_HEIGHT, FRAME_WIDTH)) u = np.frombuffer(data[u_start: v_start], dtype=np.uint8).reshape((UV_HEIGHT, UV_WIDTH)) v = np.frombuffer(data[v_start: v_start + UV_SIZE], dtype=np.uint8).reshape((UV_HEIGHT, UV_WIDTH)) res.append((y, u, v)) return res
def get(self, num, count=1, pix_fmt="yuv420p"): assert 0 < num + count <= self.frame_count frame_dats = [] with FileReader(self.fn) as f: for i in xrange(num, num + count): pos, length, _ = self.index[i] f.seek(pos) frame_dats.append(f.read(length)) of = StringIO() mkvindex.simple_gen(of, self.config_record, self.w, self.h, frame_dats) r = decompress_video_data(of.getvalue(), "matroska", self.w, self.h, pix_fmt) assert len(r) == count return r
def fingerprint_video(fn): with FileReader(fn) as f: header = f.read(4) if len(header) == 0: raise DataUnreadableError("%s is empty" % fn) elif header == b"\x00\xc0\x12\x00": return FrameType.raw elif header == b"\x00\x00\x00\x01": if 'hevc' in fn: return FrameType.h265_stream elif os.path.basename(fn) in ("camera", "acamera"): return FrameType.h264_pstream else: raise NotImplementedError(fn) elif header == b"\x00\x00\x00\x1c": return FrameType.h264_mp4 elif header == b"\x1a\x45\xdf\xa3": return FrameType.ffv1_mkv else: raise NotImplementedError(fn)
def get_gop(self, num): frame_b, frame_e, offset_b, offset_e = self._lookup_gop(num) assert frame_b <= num < frame_e num_frames = frame_e - frame_b with FileReader(self.fn) as f: f.seek(offset_b) rawdat = f.read(offset_e - offset_b) if num < self.first_iframe: assert self.prefix_frame_data rawdat = self.prefix_frame_data + rawdat rawdat = self.prefix + rawdat skip_frames = 0 if num < self.first_iframe: skip_frames = self.num_prefix_frames return frame_b, num_frames, skip_frames, rawdat
def _ffmpeg_fcamera_input_for_frame_info(frame_info): st = time.time() fn, num, count, cache_prefix = frame_info assert fn.endswith('.hevc') sindex = index_stream(fn, "hevc", cache_prefix=cache_prefix) index = sindex['index'] prefix = sindex['global_prefix'] probe = sindex['probe'] frame_e = num + count frame_b = num # must start decoding on an i-frame while index[frame_b, 0] != HEVC_SLICE_I: frame_b -= 1 offset_b = index[frame_b, 1] offset_e = index[frame_e, 1] assert frame_b <= num < frame_e skip = num - frame_b w = probe['streams'][0]['width'] h = probe['streams'][0]['height'] assert (h, w) == (874, 1164) st2 = time.time() with FileReader(fn) as f: f.seek(offset_b) input_data = f.read(offset_e - offset_b) et = time.time() get_time = et - st get_time2 = et - st2 if get_time > 10.0: print "TOOK OVER 10 seconds to fetch", frame_info, get_time, get_time2 return prefix, input_data, skip, count
def get_gop(self, num): frame_b, frame_e = self._lookup_gop(num) assert frame_b <= num < frame_e num_frames = frame_e - frame_b with FileReader(self.fn) as f: rawdat = [] sample_i = frame_b while sample_i < frame_e: size = self.sample_sizes[sample_i] start_offset = self.sample_offsets[sample_i] # try to read contiguously because a read could actually be a http request sample_i += 1 while sample_i < frame_e and size < 10000000 and start_offset + size == self.sample_offsets[ sample_i]: size += self.sample_sizes[sample_i] sample_i += 1 f.seek(start_offset) sampledat = f.read(size) # read length-prefixed NALUs and output in Annex-B i = 0 while i < len(sampledat): nal_len, = struct.unpack(">I", sampledat[i:i + 4]) rawdat.append("\x00\x00\x00\x01" + sampledat[i + 4:i + 4 + nal_len]) i = i + 4 + nal_len assert i == len(sampledat) rawdat = self.prefix + ''.join(rawdat) return frame_b, num_frames, 0, rawdat
def get_log_data(segment): r, n = segment.rsplit("--", 1) with FileReader(get_url(r, n)) as f: return (segment, f.read())
def index_mp4(fn): with FileReader(fn) as f: return vidindex_mp4(f.name)
def index_mkv(fn): with FileReader(fn) as f: probe = ffprobe(f.name, "matroska") with open(f.name, "rb") as d_f: config_record, index = mkvindex.mkvindex(d_f) return {'probe': probe, 'config_record': config_record, 'index': index}
def write_thread(): for fn in fns: with FileReader(fn) as f: decompress_proc.stdin.write(f.read()) decompress_proc.stdin.close()
def index_pstream(fns, typ, cache_prefix=None): if typ != "h264": raise NotImplementedError(typ) if not fns: raise DataUnreadableError("chffr h264 requires contiguous files") out_fns = [cache_path_for_file_path(fn, cache_prefix) for fn in fns] out_exists = map(os.path.exists, out_fns) if all(out_exists): return # load existing index files to avoid re-doing work existing_indexes = [] for out_fn, exists in zip(out_fns, out_exists): existing = None if exists: with open(out_fn, "rb") as cache_file: existing = pickle.load(cache_file) existing_indexes.append(existing) # probe the first file if existing_indexes[0]: probe = existing_indexes[0]['probe'] else: with FileReader(fns[0]) as f: probe = ffprobe(f.name, typ) global_prefix = None # get the video index of all the segments in this stream indexes = [] for i, fn in enumerate(fns): if existing_indexes[i]: index = existing_indexes[i]['index'] prefix = existing_indexes[i]['global_prefix'] else: with FileReader(fn) as f: index, prefix = vidindex(f.name, typ) if i == 0: # assert prefix if not prefix: raise DataUnreadableError("vidindex failed for %s" % fn) global_prefix = prefix indexes.append(index) assert global_prefix if np.sum(indexes[0][:, 0] == H264_SLICE_I) <= 1: print( "pstream %s is unseekable. pre-decompressing all the segments..." % (fns[0])) pstream_predecompress(fns, probe, indexes, global_prefix, cache_prefix) return # generate what's required to make each segment self-contained # (the partial GOP from the end of each segments are put asside to add # to the start of the following segment) prefix_data = ["" for _ in fns] prefix_index = [[] for _ in fns] for i in xrange(len(fns) - 1): if indexes[i + 1][0, 0] == H264_SLICE_I and indexes[i + 1][0, 1] <= 1: # next file happens to start with a i-frame, dont need use this file's end continue index = indexes[i] if i == 0 and np.sum(index[:, 0] == H264_SLICE_I) <= 1: raise NotImplementedError("No I-frames in pstream.") # find the last GOP in the index frame_b = len(index) - 1 while frame_b > 0 and index[frame_b, 0] != H264_SLICE_I: frame_b -= 1 assert frame_b >= 0 assert index[frame_b, 0] == H264_SLICE_I end_len = len(index) - frame_b with FileReader(fns[i]) as vid: vid.seek(index[frame_b, 1]) end_data = vid.read() prefix_data[i + 1] = end_data prefix_index[i + 1] = index[frame_b:-1] # indexes[i] = index[:frame_b] for i, fn in enumerate(fns): cache_path = out_fns[i] if os.path.exists(cache_path): continue segment_index = { 'index': indexes[i], 'global_prefix': global_prefix, 'probe': probe, 'prefix_frame_data': prefix_data[i], # data to prefix the first GOP with 'num_prefix_frames': len(prefix_index[i]), # number of frames to skip in the first GOP } with atomic_write_in_dir(cache_path, mode="wb", overwrite=True) as cache_file: pickle.dump(segment_index, cache_file, -1)