def test_pair_directories(): directory1 = SAMPLES_DIR.joinpath("sample_1") directory2 = SAMPLES_DIR.joinpath("sample_2") pairs = files.pair(directory1, directory2, extensions=[".log"]) assert sort_sublist(pairs) == [ (sample_1, sample_2) for sample_1 in SAMPLES_1 for sample_2 in SAMPLES_2 ]
def match( *paths: Path, length: int = DEFAULT_LENGTH, extensions: Optional[Iterable[str]] = None, ) -> Dict[Tuple[Path, Path], float]: """ Finds similar audio files in paths. Args: length: specifies how many seconds of the input audio to take for analysis. Defaults to 120. extensions: Take only files with given extensions. It has no effect on paths that already have extension. Returns: A dictionary where key is a pair of filepaths and value is a score between them. """ pairs = list(files.pair(*paths, extensions=extensions)) filepaths = list(set(itertools.chain.from_iterable(pairs))) func = functools.partial(fingerprints.calc, length=length) with concurrent.futures.ThreadPoolExecutor() as executor: fps = { filepaths[i]: fp for i, fp in enumerate(executor.map(func, filepaths)) } # Using multiprocessing.Pool.starmap method we can avoid writing wrapper to unpack # arguments. However, multiprocessing.Pool doesn't play nicely with coverage, and # require to explicitly call 'pool.join' with concurrent.futures.ProcessPoolExecutor() as pool: scores = pool.map(_compare, ((fps[a], fps[b]) for a, b in pairs)) return dict(zip(pairs, scores))
def test_pair_glob(): wildcard = SAMPLES_DIR.joinpath("sample_1/*.log") pairs = files.pair(wildcard) assert sort_sublist(pairs) == [ (SAMPLES_1[0], SAMPLES_1[1]), (SAMPLES_1[0], SAMPLES_1[2]), (SAMPLES_1[1], SAMPLES_1[2]), ]
def test_pair_files_in_a_directory(): directory = SAMPLES_DIR.joinpath("sample_1") pairs = files.pair(directory, extensions=[".log"]) assert sort_sublist(pairs) == [ (SAMPLES_1[0], SAMPLES_1[1]), (SAMPLES_1[0], SAMPLES_1[2]), (SAMPLES_1[1], SAMPLES_1[2]), ]
def test_pair_a_file_and_all_files_in_a_directory(): file = SAMPLES_DIR.joinpath("sample_1/take-1.log") directory = SAMPLES_DIR.joinpath("sample_1") pairs = files.pair(file, directory, extensions=[".log"]) assert sort_sublist(pairs) == [ (SAMPLES_1[0], SAMPLES_1[0]), (SAMPLES_1[0], SAMPLES_1[1]), (SAMPLES_1[0], SAMPLES_1[2]), ]
def test_pair_one_file(): file = SAMPLES_DIR.joinpath("sample_1/take-1.log") with pytest.raises(NotEnoughFiles) as excinfo: files.pair(file) assert str(excinfo.value) == "Not enough input files."
def test_pair_two_files(): file1 = SAMPLES_DIR.joinpath("sample_1/take-1.log") file2 = SAMPLES_DIR.joinpath("sample_1/take-2.log") pairs = files.pair(file1, file2) assert sort_sublist(pairs) == [(SAMPLES_1[0], SAMPLES_1[1])]