def read(self, ll=None): if self._force_download: return self.read_aux(ll=ll) file_begin = self._pos file_end = self._pos + ll if ll is not None else self.get_length() # We have to allign with chunks we store. Position is the begginiing of the latest chunk that starts before or at our file position = (file_begin // CHUNK_SIZE) * CHUNK_SIZE response = b"" while True: self._pos = position chunk_number = self._pos / CHUNK_SIZE file_name = hash_256(self._url) + "_" + str(chunk_number) full_path = os.path.join(CACHE_DIR, str(file_name)) data = None # If we don't have a file, download it if not os.path.exists(full_path): data = self.read_aux(ll=CHUNK_SIZE) with atomic_write_in_dir(full_path, mode="wb") as new_cached_file: new_cached_file.write(data) else: with open(full_path, "rb") as cached_file: data = cached_file.read() response += data[max(0, file_begin - position):min(CHUNK_SIZE, file_end - position)] position += CHUNK_SIZE if position >= file_end: self._pos = file_end return response
def cache_inner(fn, *args, **kwargs): cache_prefix = kwargs.pop('cache_prefix', None) cache_path = cache_path_for_file_path(fn, cache_prefix) if cache_path and os.path.exists(cache_path): with open(cache_path, "rb") as cache_file: cache_value = pickle.load(cache_file) else: cache_value = func(fn, *args, **kwargs) if cache_path: with atomic_write_in_dir(cache_path, mode="wb", overwrite=True) as cache_file: pickle.dump(cache_value, cache_file, -1) return cache_value
def get_length(self): if self._length is not None: return self._length file_length_path = os.path.join(CACHE_DIR, hash_256(self._url) + "_length") if os.path.exists(file_length_path) and not self._force_download: with open(file_length_path, "r") as file_length: content = file_length.read() self._length = int(content) return self._length self._length = self.get_length_online() if not self._force_download: with atomic_write_in_dir(file_length_path, mode="w") as file_length: file_length.write(str(self._length)) return self._length
def index_pstream(fns, typ, cache_prefix=None): if typ != "h264": raise NotImplementedError(typ) if not fns: raise DataUnreadableError("chffr h264 requires contiguous files") out_fns = [cache_path_for_file_path(fn, cache_prefix) for fn in fns] out_exists = map(os.path.exists, out_fns) if all(out_exists): return # load existing index files to avoid re-doing work existing_indexes = [] for out_fn, exists in zip(out_fns, out_exists): existing = None if exists: with open(out_fn, "rb") as cache_file: existing = pickle.load(cache_file) existing_indexes.append(existing) # probe the first file if existing_indexes[0]: probe = existing_indexes[0]['probe'] else: with FileReader(fns[0]) as f: probe = ffprobe(f.name, typ) global_prefix = None # get the video index of all the segments in this stream indexes = [] for i, fn in enumerate(fns): if existing_indexes[i]: index = existing_indexes[i]['index'] prefix = existing_indexes[i]['global_prefix'] else: with FileReader(fn) as f: index, prefix = vidindex(f.name, typ) if i == 0: # assert prefix if not prefix: raise DataUnreadableError("vidindex failed for %s" % fn) global_prefix = prefix indexes.append(index) assert global_prefix if np.sum(indexes[0][:, 0] == H264_SLICE_I) <= 1: print("pstream %s is unseekable. pre-decompressing all the segments..." % (fns[0])) pstream_predecompress(fns, probe, indexes, global_prefix, cache_prefix) return # generate what's required to make each segment self-contained # (the partial GOP from the end of each segments are put asside to add # to the start of the following segment) prefix_data = ["" for _ in fns] prefix_index = [[] for _ in fns] for i in range(len(fns)-1): if indexes[i+1][0, 0] == H264_SLICE_I and indexes[i+1][0, 1] <= 1: # next file happens to start with a i-frame, dont need use this file's end continue index = indexes[i] if i == 0 and np.sum(index[:, 0] == H264_SLICE_I) <= 1: raise NotImplementedError("No I-frames in pstream.") # find the last GOP in the index frame_b = len(index)-1 while frame_b > 0 and index[frame_b, 0] != H264_SLICE_I: frame_b -= 1 assert frame_b >= 0 assert index[frame_b, 0] == H264_SLICE_I end_len = len(index)-frame_b with FileReader(fns[i]) as vid: vid.seek(index[frame_b, 1]) end_data = vid.read() prefix_data[i+1] = end_data prefix_index[i+1] = index[frame_b:-1] # indexes[i] = index[:frame_b] for i, fn in enumerate(fns): cache_path = out_fns[i] if os.path.exists(cache_path): continue segment_index = { 'index': indexes[i], 'global_prefix': global_prefix, 'probe': probe, 'prefix_frame_data': prefix_data[i], # data to prefix the first GOP with 'num_prefix_frames': len(prefix_index[i]), # number of frames to skip in the first GOP } with atomic_write_in_dir(cache_path, mode="wb", overwrite=True) as cache_file: pickle.dump(segment_index, cache_file, -1)
def pstream_predecompress(fns, probe, indexes, global_prefix, cache_prefix, multithreaded=False): assert len(fns) == len(indexes) out_fns = [cache_path_for_file_path(fn, cache_prefix, extension=".predecom.mkv") for fn in fns] out_exists = map(os.path.exists, out_fns) if all(out_exists): return w = probe['streams'][0]['width'] h = probe['streams'][0]['height'] frame_size = w*h*3/2 # yuv420p decompress_proc = subprocess.Popen( ["ffmpeg", "-threads", "0" if multithreaded else "1", "-vsync", "0", "-f", "h264", "-i", "pipe:0", "-threads", "0" if multithreaded else "1", "-f", "rawvideo", "-pix_fmt", "yuv420p", "pipe:1"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=open("/dev/null", "wb")) def write_thread(): for fn in fns: with FileReader(fn) as f: decompress_proc.stdin.write(f.read()) decompress_proc.stdin.close() def read_frame(): frame = None try: frame = decompress_proc.stdout.read(frame_size) except (IOError, ValueError): pass if frame is None or frame == "" or len(frame) != frame_size: raise DataUnreadableError("pre-decompression failed for %s" % fn) return frame t = threading.Thread(target=write_thread) t.daemon = True t.start() try: for fn, out_fn, out_exist, index in zip(fns, out_fns, out_exists, indexes): if out_exist: for fi in range(index.shape[0]-1): read_frame() continue with atomic_write_in_dir(out_fn, mode="w+b", overwrite=True) as out_tmp: compress_proc = subprocess.Popen( ["ffmpeg", "-threads", "0" if multithreaded else "1", "-y", "-vsync", "0", "-f", "rawvideo", "-pix_fmt", "yuv420p", "-s", "%dx%d" % (w, h), "-i", "pipe:0", "-threads", "0" if multithreaded else "1", "-f", "matroska", "-vcodec", "ffv1", "-g", "0", out_tmp.name], stdin=subprocess.PIPE, stderr=open("/dev/null", "wb")) try: for fi in range(index.shape[0]-1): frame = read_frame() compress_proc.stdin.write(frame) compress_proc.stdin.close() except: compress_proc.kill() raise assert compress_proc.wait() == 0 cache_path = cache_path_for_file_path(fn, cache_prefix) with atomic_write_in_dir(cache_path, mode="wb", overwrite=True) as cache_file: pickle.dump({ 'predecom': os.path.basename(out_fn), 'index': index, 'probe': probe, 'global_prefix': global_prefix, }, cache_file, -1) except: decompress_proc.kill() raise finally: t.join() rc = decompress_proc.wait() if rc != 0: raise DataUnreadableError(fns[0])