def test_split_timing(): # goal is to check the assembly of multiple parts using a 'timing' file logger = logging.getLogger() logger.setLevel(logging.INFO) path_temp = Path(tempfile.mkdtemp()) for asset_dir in PATH_TEST_SPLIT.rglob("part*"): if asset_dir.is_dir(): # test straight result parse dict_result = flatten( { "path_content": str( asset_dir.joinpath("test.mp4").resolve()), "path_result": str(path_temp), "time_offset_source": str(asset_dir.joinpath("timing.txt")) }, args=[]) for path_gen_csv in path_temp.rglob("*comskip*csv*"): df = pd.read_csv(str(path_gen_csv)) # want to confirm that events exist in times from at least 0-59m, 60-119m, 120m+ assert len(df[df["time_begin"] < 3600]) assert len(df[(df["time_begin"] > 3600) & (df["time_begin"] < 7200)]) assert len(df[df["time_begin"] > 7200]) shutil.rmtree(path_temp) # cleanup
def test_generator(): path_temp = Path(tempfile.mkdtemp()).resolve() # test only single input (extractor) dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--verbose", "--path_result", str(path_temp)]) assert "generated" in dict_result assert 2 == len(dict_result['generated']) shutil.rmtree(str(path_temp)) # cleanup # with no output (v1.3.0+) dict_result = flatten({"path_content": str(PATH_TEST.resolve()), "extractor": "azure_videoindexer", "generator": "", "verbose": True, "path_result": str(path_temp)}, args=[]) assert "generated" not in dict_result assert not [x for x in path_temp.rglob("*") if not x.is_dir()] # with no output (v1.3.0+) dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--generator", None, "--verbose", "--path_result", str(path_temp)]) assert "generated" not in dict_result assert not [x for x in path_temp.rglob("*") if not x.is_dir()] # with no output (v1.3.0+) dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--generator", " ", "--verbose", "--path_result", str(path_temp)]) assert "generated" not in dict_result assert not [x for x in path_temp.rglob("*") if not x.is_dir()] # test only single input and output (extractor + generator) dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--generator", "flattened_csv", "--path_result", str(path_temp)]) assert "generated" in dict_result and 1 == len(dict_result['generated']) assert len([x for x in path_temp.rglob("*") if not x.is_dir()]) == 1 df_single = pd.read_csv(dict_result['generated'][0]['path']).sort_values(["time_begin", "tag"]) shutil.rmtree(str(path_temp)) # cleanup assert len(df_single) > 0 # test time offset time_offset = 5 dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--time_offset", str(time_offset), "--generator", "flattened_csv", "--path_result", str(path_temp)]) assert "generated" in dict_result and 1 == len(dict_result['generated']) df_offset = pd.read_csv(dict_result['generated'][0]['path']).sort_values(["time_begin", "tag"]) shutil.rmtree(str(path_temp)) # cleanup assert len(df_offset) > 0 # make sure same data assert len(df_offset) == len(df_single) for idx in range(len(df_offset)): assert abs((df_offset.iloc[0]["time_begin"] - df_single.iloc[0]["time_begin"]) - time_offset) < 0.1 # test non-compressed version dict_uncompressed = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--no_compression", "--generator", "flattened_csv", "--path_result", str(path_temp)]) assert "generated" in dict_uncompressed and 1 == len(dict_uncompressed['generated']) assert dict_result['generated'][0]['path'].endswith(".gz") assert not dict_uncompressed['generated'][0]['path'].endswith(".gz") shutil.rmtree(str(path_temp)) # cleanup
def test_programmatic(): path_temp = Path(tempfile.mkdtemp()).resolve() # test bad input or output dict_result = flatten(args=["--path_result", str(path_temp)]) assert "data" not in dict_result dict_result = flatten(args=["--path_content", str(PATH_TEST.joinpath("test.mp4").resolve())]) assert "data" not in dict_result # test straight result parse dict_result = flatten(args=["--path_content", str(PATH_TEST.joinpath("test.mp4").resolve()), "--path_result", str(path_temp)]) assert "data" in dict_result num_results_long = len(dict_result['data']) assert num_results_long > 1 shutil.rmtree(str(path_temp)) # cleanup # test directory input instead of file dict_result = flatten(args=["--path_content", str(PATH_TEST.resolve()), "--path_result", str(path_temp)]) assert "data" in dict_result and num_results_long == len(dict_result['data']) shutil.rmtree(str(path_temp)) # cleanup
def test_cli(): import os path_temp = Path(tempfile.mkdtemp()) os.system(f"contentai-metadata-flatten --path_result {path_temp} --path_content {str(PATH_TEST.joinpath('test.mp4').resolve())} ") list_result_cli = [x for x in path_temp.rglob("*") if not x.is_dir()] print(list_result_cli) assert len(list_result_cli) > 0 # test straight result parse dict_result = flatten({"path_content": str(PATH_TEST.joinpath("test.mp4").resolve()), "path_result": str(path_temp)}, args=[]) assert "generated" in dict_result and len(dict_result['generated']) print(dict_result['generated']) assert len(list_result_cli) == len(dict_result['generated']) shutil.rmtree(path_temp) # cleanup # # test all frames
def test_programmatic(): path_temp = Path(tempfile.mkdtemp()).resolve() # test bad input or output list_result = flatten(args=["--path_result", str(path_temp)]) assert len(list_result) == 0 list_result = flatten( args=["--path_content", str(PATH_TEST.joinpath("test.mp4").resolve())]) assert len(list_result) == 0 # test straight result parse list_result = flatten(args=[ "--path_content", str(PATH_TEST.joinpath("test.mp4").resolve()), "--path_result", str(path_temp) ]) num_results_long = len(list_result) assert num_results_long >= 2 shutil.rmtree(str(path_temp)) # cleanup # test directory input instead of file list_result = flatten(args=[ "--path_content", str(PATH_TEST.resolve()), "--path_result", str(path_temp) ]) assert num_results_long == len(list_result) shutil.rmtree(str(path_temp)) # cleanup # test only single input (extractor) list_result = flatten(args=[ "--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--verbose", "--path_result", str(path_temp) ]) assert 2 == len(list_result) shutil.rmtree(str(path_temp)) # cleanup # test only single input and output (extractor + generator) list_result = flatten(args=[ "--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--generator", "flattened_csv", "--path_result", str(path_temp) ]) assert 1 == len(list_result) df_single = pd.read_csv(list_result[0]).sort_values(["time_begin", "tag"]) shutil.rmtree(str(path_temp)) # cleanup assert len(df_single) > 0 # test time offset time_offset = 5 list_result = flatten(args=[ "--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--time_offset", str(time_offset), "--generator", "flattened_csv", "--path_result", str(path_temp) ]) assert 1 == len(list_result) df_offset = pd.read_csv(list_result[0]).sort_values(["time_begin", "tag"]) shutil.rmtree(str(path_temp)) # cleanup assert len(df_offset) > 0 # make sure same data assert len(df_offset) == len(df_single) for idx in range(len(df_offset)): assert abs((df_offset.iloc[0]["time_begin"] - df_single.iloc[0]["time_begin"]) - time_offset) < 0.1 # test non-compressed version list_uncompressed = flatten(args=[ "--path_content", str(PATH_TEST.resolve()), "--extractor", "azure_videoindexer", "--no_compression", "--generator", "flattened_csv", "--path_result", str(path_temp) ]) assert len(list_result) == len(list_uncompressed) for idx in range(len(list_result)): assert list_result[idx].endswith(".gz") assert not list_uncompressed[idx].endswith(".gz") shutil.rmtree(str(path_temp)) # cleanup