Example #1
0
def index_video(fn, frame_type=None, cache_prefix=None):
    cache_path = cache_path_for_file_path(fn, cache_prefix)

    if os.path.exists(cache_path):
        return

    if frame_type is None:
        frame_type = fingerprint_video(fn[0])

    if frame_type == FrameType.h264_pstream:
        #hack: try to index the whole route now
        route = Route.from_file_path(fn)

        camera_paths = route.camera_paths()
        if fn not in camera_paths:
            raise DataUnreadableError(
                "Not a contiguous route camera file: {}".format(fn))

        print("no pstream cache for %s, indexing route %s now" %
              (fn, route.name))
        index_pstream(route.camera_paths(), "h264", cache_prefix)
    elif frame_type == FrameType.h265_stream:
        index_stream(fn, "hevc", cache_prefix=cache_prefix)
    elif frame_type == FrameType.h264_mp4:
        index_mp4(fn, cache_prefix=cache_prefix)
Example #2
0
def vidindex(fn, typ):
    vidindex_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                "vidindex")
    vidindex = os.path.join(vidindex_dir, "vidindex")

    subprocess.check_call(["make"],
                          cwd=vidindex_dir,
                          stdout=open("/dev/null", "w"))

    with tempfile.NamedTemporaryFile() as prefix_f, \
         tempfile.NamedTemporaryFile() as index_f:
        try:
            subprocess.check_call(
                [vidindex, typ, fn, prefix_f.name, index_f.name])
        except subprocess.CalledProcessError as e:
            raise DataUnreadableError("vidindex failed on file %s" % fn)
        with open(index_f.name, "rb") as f:
            index = f.read()
        with open(prefix_f.name, "rb") as f:
            prefix = f.read()

    index = np.frombuffer(index, np.uint32).reshape(-1, 2)

    assert index[-1, 0] == 0xFFFFFFFF
    assert index[-1, 1] == os.path.getsize(fn)

    return index, prefix
Example #3
0
def decompress_video_data(rawdat, vid_fmt, w, h, pix_fmt, multithreaded=False):
    # using a tempfile is much faster than proc.communicate for some reason

    with tempfile.TemporaryFile() as tmpf:
        tmpf.write(rawdat)
        tmpf.seek(0)

        proc = subprocess.Popen([
            "ffmpeg", "-threads", "0" if multithreaded else "1", "-vsync", "0",
            "-f", vid_fmt, "-flags2", "showall", "-i", "pipe:0", "-threads",
            "0" if multithreaded else "1", "-f", "rawvideo", "-pix_fmt",
            pix_fmt, "pipe:1"
        ],
                                stdin=tmpf,
                                stdout=subprocess.PIPE,
                                stderr=open("/dev/null"))

        # dat = proc.communicate()[0]
        dat = proc.stdout.read()
        if proc.wait() != 0:
            raise DataUnreadableError("ffmpeg failed")

    if pix_fmt == "rgb24":
        ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, h, w, 3)
    elif pix_fmt == "yuv420p":
        ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, (h * w * 3 // 2))
    elif pix_fmt == "yuv444p":
        ret = np.frombuffer(dat, dtype=np.uint8).reshape(-1, 3, h, w)
    else:
        raise NotImplementedError

    return ret
Example #4
0
 def read_frame():
     frame = None
     try:
         frame = decompress_proc.stdout.read(frame_size)
     except (IOError, ValueError):
         pass
     if frame is None or frame == "" or len(frame) != frame_size:
         raise DataUnreadableError("pre-decompression failed for %s" % fn)
     return frame
Example #5
0
def index_log(fn):
    index_log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "index_log")
    index_log = os.path.join(index_log_dir, "index_log")
    phonelibs_dir = os.path.join(OP_PATH, 'phonelibs')

    subprocess.check_call(["make", "PHONELIBS=" + phonelibs_dir],
                          cwd=index_log_dir,
                          stdout=open("/dev/null", "w"))

    try:
        dat = subprocess.check_output([index_log, fn, "-"])
    except subprocess.CalledProcessError:
        raise DataUnreadableError("%s capnp is corrupted/truncated" % fn)
    return np.frombuffer(dat, dtype=np.uint64)
Example #6
0
def ffprobe(fn, fmt=None):
    cmd = [
        "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format",
        "-show_streams"
    ]
    if fmt:
        cmd += ["-format", fmt]
    cmd += [fn]

    try:
        ffprobe_output = subprocess.check_output(cmd)
    except subprocess.CalledProcessError as e:
        raise DataUnreadableError(fn)

    return json.loads(ffprobe_output)
Example #7
0
def fingerprint_video(fn):
    with FileReader(fn) as f:
        header = f.read(4)
    if len(header) == 0:
        raise DataUnreadableError("%s is empty" % fn)
    elif header == b"\x00\xc0\x12\x00":
        return FrameType.raw
    elif header == b"\x00\x00\x00\x01":
        if 'hevc' in fn:
            return FrameType.h265_stream
        elif os.path.basename(fn) in ("camera", "acamera"):
            return FrameType.h264_pstream
        else:
            raise NotImplementedError(fn)
    elif header == b"\x00\x00\x00\x1c":
        return FrameType.h264_mp4
    elif header == b"\x1a\x45\xdf\xa3":
        return FrameType.ffv1_mkv
    else:
        raise NotImplementedError(fn)
Example #8
0
def vidindex_mp4(fn):
    try:
        xmls = subprocess.check_output(
            ["MP4Box", fn, "-diso", "-out", "/dev/stdout"])
    except subprocess.CalledProcessError as e:
        raise DataUnreadableError(fn)

    tree = ET.fromstring(xmls)

    def parse_content(s):
        assert s.startswith("data:application/octet-string,")
        return s[len("data:application/octet-string,"):].decode("hex")

    avc_element = tree.find(".//AVCSampleEntryBox")
    width = int(avc_element.attrib['Width'])
    height = int(avc_element.attrib['Height'])

    sps_element = avc_element.find(
        ".//AVCDecoderConfigurationRecord/SequenceParameterSet")
    pps_element = avc_element.find(
        ".//AVCDecoderConfigurationRecord/PictureParameterSet")

    sps = parse_content(sps_element.attrib['content'])
    pps = parse_content(pps_element.attrib['content'])

    media_header = tree.find("MovieBox/TrackBox/MediaBox/MediaHeaderBox")
    time_scale = int(media_header.attrib['TimeScale'])

    sample_sizes = [
        int(entry.attrib['Size']) for entry in tree.findall(
            "MovieBox/TrackBox/MediaBox/MediaInformationBox/SampleTableBox/SampleSizeBox/SampleSizeEntry"
        )
    ]

    sample_dependency = [
        entry.attrib['dependsOnOther'] == "yes" for entry in tree.findall(
            "MovieBox/TrackBox/MediaBox/MediaInformationBox/SampleTableBox/SampleDependencyTypeBox/SampleDependencyEntry"
        )
    ]

    assert len(sample_sizes) == len(sample_dependency)

    chunk_offsets = [
        int(entry.attrib['offset']) for entry in tree.findall(
            "MovieBox/TrackBox/MediaBox/MediaInformationBox/SampleTableBox/ChunkOffsetBox/ChunkEntry"
        )
    ]

    sample_chunk_table = [(
        int(entry.attrib['FirstChunk']) - 1,
        int(entry.attrib['SamplesPerChunk'])
    ) for entry in tree.findall(
        "MovieBox/TrackBox/MediaBox/MediaInformationBox/SampleTableBox/SampleToChunkBox/SampleToChunkEntry"
    )]

    sample_offsets = [None for _ in sample_sizes]

    sample_i = 0
    for i, (first_chunk, samples_per_chunk) in enumerate(sample_chunk_table):
        if i == len(sample_chunk_table) - 1:
            last_chunk = len(chunk_offsets) - 1
        else:
            last_chunk = sample_chunk_table[i + 1][0] - 1
        for k in xrange(first_chunk, last_chunk + 1):
            sample_offset = chunk_offsets[k]
            for _ in xrange(samples_per_chunk):
                sample_offsets[sample_i] = sample_offset
                sample_offset += sample_sizes[sample_i]
                sample_i += 1

    assert sample_i == len(sample_sizes)

    pts_offset_table = [(int(
        entry.attrib['CompositionOffset']
    ), int(entry.attrib['SampleCount'])) for entry in tree.findall(
        "MovieBox/TrackBox/MediaBox/MediaInformationBox/SampleTableBox/CompositionOffsetBox/CompositionOffsetEntry"
    )]
    sample_pts_offset = [0 for _ in sample_sizes]
    sample_i = 0
    for dt, count in pts_offset_table:
        for _ in xrange(count):
            sample_pts_offset[sample_i] = dt
            sample_i += 1

    sample_time_table = [(
        int(entry.attrib['SampleDelta']), int(entry.attrib['SampleCount'])
    ) for entry in tree.findall(
        "MovieBox/TrackBox/MediaBox/MediaInformationBox/SampleTableBox/TimeToSampleBox/TimeToSampleEntry"
    )]
    sample_time = [None for _ in sample_sizes]
    cur_ts = 0
    sample_i = 0
    for dt, count in sample_time_table:
        for _ in xrange(count):
            sample_time[sample_i] = (
                cur_ts + sample_pts_offset[sample_i]) * 1000 / time_scale

            cur_ts += dt
            sample_i += 1

    sample_time.sort()  # because we ony decode GOPs in PTS order

    return {
        'width': width,
        'height': height,
        'sample_offsets': sample_offsets,
        'sample_sizes': sample_sizes,
        'sample_dependency': sample_dependency,
        'sample_time': sample_time,
        'sps': sps,
        'pps': pps
    }
Example #9
0
def index_pstream(fns, typ, cache_prefix=None):
    if typ != "h264":
        raise NotImplementedError(typ)

    if not fns:
        raise DataUnreadableError("chffr h264 requires contiguous files")

    out_fns = [cache_path_for_file_path(fn, cache_prefix) for fn in fns]
    out_exists = map(os.path.exists, out_fns)
    if all(out_exists): return

    # load existing index files to avoid re-doing work
    existing_indexes = []
    for out_fn, exists in zip(out_fns, out_exists):
        existing = None
        if exists:
            with open(out_fn, "rb") as cache_file:
                existing = pickle.load(cache_file)
        existing_indexes.append(existing)

    # probe the first file
    if existing_indexes[0]:
        probe = existing_indexes[0]['probe']
    else:
        with FileReader(fns[0]) as f:
            probe = ffprobe(f.name, typ)

    global_prefix = None

    # get the video index of all the segments in this stream
    indexes = []
    for i, fn in enumerate(fns):
        if existing_indexes[i]:
            index = existing_indexes[i]['index']
            prefix = existing_indexes[i]['global_prefix']
        else:
            with FileReader(fn) as f:
                index, prefix = vidindex(f.name, typ)
        if i == 0:
            # assert prefix
            if not prefix:
                raise DataUnreadableError("vidindex failed for %s" % fn)
            global_prefix = prefix
        indexes.append(index)

    assert global_prefix

    if np.sum(indexes[0][:, 0] == H264_SLICE_I) <= 1:
        print(
            "pstream %s is unseekable. pre-decompressing all the segments..." %
            (fns[0]))
        pstream_predecompress(fns, probe, indexes, global_prefix, cache_prefix)
        return

    # generate what's required to make each segment self-contained
    # (the partial GOP from the end of each segments are put asside to add
    #  to the start of the following segment)
    prefix_data = ["" for _ in fns]
    prefix_index = [[] for _ in fns]
    for i in xrange(len(fns) - 1):
        if indexes[i + 1][0, 0] == H264_SLICE_I and indexes[i + 1][0, 1] <= 1:
            # next file happens to start with a i-frame, dont need use this file's end
            continue

        index = indexes[i]
        if i == 0 and np.sum(index[:, 0] == H264_SLICE_I) <= 1:
            raise NotImplementedError("No I-frames in pstream.")

        # find the last GOP in the index
        frame_b = len(index) - 1
        while frame_b > 0 and index[frame_b, 0] != H264_SLICE_I:
            frame_b -= 1

        assert frame_b >= 0
        assert index[frame_b, 0] == H264_SLICE_I

        end_len = len(index) - frame_b

        with FileReader(fns[i]) as vid:
            vid.seek(index[frame_b, 1])
            end_data = vid.read()

        prefix_data[i + 1] = end_data
        prefix_index[i + 1] = index[frame_b:-1]
        # indexes[i] = index[:frame_b]

    for i, fn in enumerate(fns):
        cache_path = out_fns[i]

        if os.path.exists(cache_path):
            continue

        segment_index = {
            'index': indexes[i],
            'global_prefix': global_prefix,
            'probe': probe,
            'prefix_frame_data':
            prefix_data[i],  # data to prefix the first GOP with
            'num_prefix_frames':
            len(prefix_index[i]),  # number of frames to skip in the first GOP
        }

        with atomic_write_in_dir(cache_path, mode="wb",
                                 overwrite=True) as cache_file:
            pickle.dump(segment_index, cache_file, -1)
Example #10
0
def pstream_predecompress(fns,
                          probe,
                          indexes,
                          global_prefix,
                          cache_prefix,
                          multithreaded=False):
    assert len(fns) == len(indexes)
    out_fns = [
        cache_path_for_file_path(fn, cache_prefix, extension=".predecom.mkv")
        for fn in fns
    ]
    out_exists = map(os.path.exists, out_fns)
    if all(out_exists):
        return

    w = probe['streams'][0]['width']
    h = probe['streams'][0]['height']

    frame_size = w * h * 3 / 2  # yuv420p

    decompress_proc = subprocess.Popen([
        "ffmpeg", "-threads", "0" if multithreaded else "1", "-vsync", "0",
        "-f", "h264", "-i", "pipe:0", "-threads",
        "0" if multithreaded else "1", "-f", "rawvideo", "-pix_fmt", "yuv420p",
        "pipe:1"
    ],
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=open("/dev/null", "wb"))

    def write_thread():
        for fn in fns:
            with FileReader(fn) as f:
                decompress_proc.stdin.write(f.read())
        decompress_proc.stdin.close()

    def read_frame():
        frame = None
        try:
            frame = decompress_proc.stdout.read(frame_size)
        except (IOError, ValueError):
            pass
        if frame is None or frame == "" or len(frame) != frame_size:
            raise DataUnreadableError("pre-decompression failed for %s" % fn)
        return frame

    t = threading.Thread(target=write_thread)
    t.daemon = True
    t.start()

    try:
        for fn, out_fn, out_exist, index in zip(fns, out_fns, out_exists,
                                                indexes):
            if out_exist:
                for fi in xrange(index.shape[0] - 1):
                    read_frame()
                continue

            with atomic_write_in_dir(out_fn, mode="w+b",
                                     overwrite=True) as out_tmp:
                compress_proc = subprocess.Popen([
                    "ffmpeg", "-threads", "0" if multithreaded else "1", "-y",
                    "-vsync", "0", "-f", "rawvideo", "-pix_fmt", "yuv420p",
                    "-s",
                    "%dx%d" % (w, h), "-i", "pipe:0", "-threads",
                    "0" if multithreaded else "1", "-f", "matroska", "-vcodec",
                    "ffv1", "-g", "0", out_tmp.name
                ],
                                                 stdin=subprocess.PIPE,
                                                 stderr=open(
                                                     "/dev/null", "wb"))
                try:
                    for fi in xrange(index.shape[0] - 1):
                        frame = read_frame()
                        compress_proc.stdin.write(frame)
                    compress_proc.stdin.close()
                except:
                    compress_proc.kill()
                    raise

                assert compress_proc.wait() == 0

            cache_path = cache_path_for_file_path(fn, cache_prefix)
            with atomic_write_in_dir(cache_path, mode="wb",
                                     overwrite=True) as cache_file:
                pickle.dump(
                    {
                        'predecom': os.path.basename(out_fn),
                        'index': index,
                        'probe': probe,
                        'global_prefix': global_prefix,
                    }, cache_file, -1)

    except:
        decompress_proc.kill()
        raise
    finally:
        t.join()

    rc = decompress_proc.wait()
    if rc != 0:
        raise DataUnreadableError(fns[0])