Python flatten Examples, contentai_metadata_flatten.main.flatten Python Examples

Example #1

0

Show file

File: test_library.py Project: ezavesky/metadata-flatten-extractor

def test_split_timing():
    # goal is to check the assembly of multiple parts using a 'timing' file

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    path_temp = Path(tempfile.mkdtemp())

    for asset_dir in PATH_TEST_SPLIT.rglob("part*"):
        if asset_dir.is_dir():
            # test straight result parse
            dict_result = flatten(
                {
                    "path_content": str(
                        asset_dir.joinpath("test.mp4").resolve()),
                    "path_result": str(path_temp),
                    "time_offset_source": str(asset_dir.joinpath("timing.txt"))
                },
                args=[])

    for path_gen_csv in path_temp.rglob("*comskip*csv*"):
        df = pd.read_csv(str(path_gen_csv))
        # want to confirm that events exist in times from at least 0-59m, 60-119m, 120m+
        assert len(df[df["time_begin"] < 3600])
        assert len(df[(df["time_begin"] > 3600) & (df["time_begin"] < 7200)])
        assert len(df[df["time_begin"] > 7200])

    shutil.rmtree(path_temp)  # cleanup

Example #2

0

Show file

File: test_main_parse.py Project: ezavesky/metadata-flatten-extractor

def test_generator():
    path_temp = Path(tempfile.mkdtemp()).resolve()

    # test only single input (extractor)
    dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
                                "--verbose", "--path_result", str(path_temp)])
    assert "generated" in dict_result
    assert 2 == len(dict_result['generated'])
    shutil.rmtree(str(path_temp))   # cleanup

    # with no output (v1.3.0+)
    dict_result = flatten({"path_content": str(PATH_TEST.resolve()), "extractor": "azure_videoindexer",
                           "generator": "", "verbose": True, "path_result": str(path_temp)}, args=[])
    assert "generated" not in dict_result
    assert not [x for x in path_temp.rglob("*") if not x.is_dir()]

    # with no output (v1.3.0+)
    dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
                                "--generator", None, "--verbose", "--path_result", str(path_temp)])
    assert "generated" not in dict_result
    assert not [x for x in path_temp.rglob("*") if not x.is_dir()]

    # with no output (v1.3.0+)
    dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
                                "--generator", " ", "--verbose", "--path_result", str(path_temp)])
    assert "generated" not in dict_result
    assert not [x for x in path_temp.rglob("*") if not x.is_dir()]

    # test only single input and output (extractor + generator)
    dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
                                "--generator", "flattened_csv", "--path_result", str(path_temp)])
    assert "generated" in dict_result and 1 == len(dict_result['generated'])
    assert len([x for x in path_temp.rglob("*") if not x.is_dir()]) == 1
    df_single = pd.read_csv(dict_result['generated'][0]['path']).sort_values(["time_begin", "tag"])
    shutil.rmtree(str(path_temp))   # cleanup
    assert len(df_single) > 0

    # test time offset
    time_offset = 5
    dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
                                "--time_offset", str(time_offset),
                                "--generator", "flattened_csv", "--path_result", str(path_temp)])
    assert "generated" in dict_result and 1 == len(dict_result['generated'])
    df_offset = pd.read_csv(dict_result['generated'][0]['path']).sort_values(["time_begin", "tag"])
    shutil.rmtree(str(path_temp))   # cleanup
    assert len(df_offset) > 0

    # make sure same data
    assert len(df_offset) == len(df_single)
    for idx in range(len(df_offset)):
        assert abs((df_offset.iloc[0]["time_begin"] - df_single.iloc[0]["time_begin"]) - time_offset) < 0.1

    # test non-compressed version
    dict_uncompressed = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
                                      "--no_compression", "--generator", "flattened_csv", "--path_result", str(path_temp)])
    assert "generated" in dict_uncompressed and 1 == len(dict_uncompressed['generated'])
    assert dict_result['generated'][0]['path'].endswith(".gz")
    assert not dict_uncompressed['generated'][0]['path'].endswith(".gz")
    shutil.rmtree(str(path_temp))   # cleanup

Example #3

0

Show file

File: test_main_parse.py Project: ezavesky/metadata-flatten-extractor

def test_programmatic():
    path_temp = Path(tempfile.mkdtemp()).resolve()

    # test bad input or output
    dict_result = flatten(args=["--path_result", str(path_temp)])
    assert "data" not in dict_result
    dict_result = flatten(args=["--path_content", str(PATH_TEST.joinpath("test.mp4").resolve())])
    assert "data" not in dict_result

    # test straight result parse
    dict_result = flatten(args=["--path_content", str(PATH_TEST.joinpath("test.mp4").resolve()), 
                                "--path_result", str(path_temp)])
    assert "data" in dict_result
    num_results_long = len(dict_result['data'])
    assert num_results_long > 1
    shutil.rmtree(str(path_temp))   # cleanup

    # test directory input instead of file
    dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), 
                                "--path_result", str(path_temp)])
    assert "data" in dict_result and num_results_long == len(dict_result['data'])
    shutil.rmtree(str(path_temp))   # cleanup

Example #4

0

Show file

File: test_main_parse.py Project: ezavesky/metadata-flatten-extractor

def test_cli():
    import os

    path_temp = Path(tempfile.mkdtemp())

    os.system(f"contentai-metadata-flatten --path_result {path_temp} --path_content {str(PATH_TEST.joinpath('test.mp4').resolve())} ")
    list_result_cli = [x for x in path_temp.rglob("*") if not x.is_dir()]
    print(list_result_cli)
    assert len(list_result_cli) > 0

    # test straight result parse
    dict_result = flatten({"path_content": str(PATH_TEST.joinpath("test.mp4").resolve()), 
                           "path_result": str(path_temp)}, args=[])
    assert "generated" in dict_result and len(dict_result['generated'])
    print(dict_result['generated'])
    assert len(list_result_cli) == len(dict_result['generated'])
    shutil.rmtree(path_temp)   # cleanup


# # test all frames

Example #5

0

Show file

File: test_main_parse.py Project: johndpope/metadata-flatten-extractor

def test_programmatic():
    path_temp = Path(tempfile.mkdtemp()).resolve()

    # test bad input or output
    list_result = flatten(args=["--path_result", str(path_temp)])
    assert len(list_result) == 0
    list_result = flatten(
        args=["--path_content",
              str(PATH_TEST.joinpath("test.mp4").resolve())])
    assert len(list_result) == 0

    # test straight result parse
    list_result = flatten(args=[
        "--path_content",
        str(PATH_TEST.joinpath("test.mp4").resolve()), "--path_result",
        str(path_temp)
    ])
    num_results_long = len(list_result)
    assert num_results_long >= 2
    shutil.rmtree(str(path_temp))  # cleanup

    # test directory input instead of file
    list_result = flatten(args=[
        "--path_content",
        str(PATH_TEST.resolve()), "--path_result",
        str(path_temp)
    ])
    assert num_results_long == len(list_result)
    shutil.rmtree(str(path_temp))  # cleanup

    # test only single input (extractor)
    list_result = flatten(args=[
        "--path_content",
        str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
        "--verbose", "--path_result",
        str(path_temp)
    ])
    assert 2 == len(list_result)
    shutil.rmtree(str(path_temp))  # cleanup

    # test only single input and output (extractor + generator)
    list_result = flatten(args=[
        "--path_content",
        str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
        "--generator", "flattened_csv", "--path_result",
        str(path_temp)
    ])
    assert 1 == len(list_result)
    df_single = pd.read_csv(list_result[0]).sort_values(["time_begin", "tag"])
    shutil.rmtree(str(path_temp))  # cleanup
    assert len(df_single) > 0

    # test time offset
    time_offset = 5
    list_result = flatten(args=[
        "--path_content",
        str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
        "--time_offset",
        str(time_offset), "--generator", "flattened_csv", "--path_result",
        str(path_temp)
    ])
    assert 1 == len(list_result)
    df_offset = pd.read_csv(list_result[0]).sort_values(["time_begin", "tag"])
    shutil.rmtree(str(path_temp))  # cleanup
    assert len(df_offset) > 0

    # make sure same data
    assert len(df_offset) == len(df_single)
    for idx in range(len(df_offset)):
        assert abs((df_offset.iloc[0]["time_begin"] -
                    df_single.iloc[0]["time_begin"]) - time_offset) < 0.1

    # test non-compressed version
    list_uncompressed = flatten(args=[
        "--path_content",
        str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer",
        "--no_compression", "--generator", "flattened_csv", "--path_result",
        str(path_temp)
    ])
    assert len(list_result) == len(list_uncompressed)
    for idx in range(len(list_result)):
        assert list_result[idx].endswith(".gz")
        assert not list_uncompressed[idx].endswith(".gz")
    shutil.rmtree(str(path_temp))  # cleanup