def test_different_n(): iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] * 2 n = 5 actual = group(iterable, n) expected = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] assert actual == expected
def testGroup(): items = [('I', 'A', 1), ('I', 'A', 2), ('I', 'B', 1), ('II', 'A', 1), ('II', 'B', 1)] expected = { 'I': { 'A': { 1: [('I', 'A', 1)], 2: [('I', 'A', 2)] }, 'B': { 1: [('I', 'B', 1)] } }, 'II': { 'A': { 1: [('II', 'A', 1)] }, 'B': { 1: [('II', 'B', 1)] } } } actual = group(items, lambda i: i[0], lambda i: i[1], lambda i: i[2]) assert actual == expected
def test_different_iterable_size(): iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] * 2 n = 3 actual = group(iterable, n) expected = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10]] assert actual == expected
def test_passing_in_generator(): iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9] gen = (i for i in iterable) assert isinstance(gen, types.GeneratorType) n = 3 actual = group(gen, n) expected = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] assert actual == expected
def print_hashes(args): exclude_list = build_exclude_list() results = [] for filepath in args["files_list"]: result = telfhash_single(filepath, debug=args["debug"], exclude_list=exclude_list) results.append(result) # the fancy formatting is done so that we could properly vertically # align the telfhashes in the second column. we're using the `max_len` # value computed before in the get_args() function # # data is printed as soon as the data is obtained so that the user sees # data right away, and it makes the console more active. # only go this path if args['output']=None and args['format']=None if args["output"] is None and args["format"] is None: if result["telfhash"] is not None: print("{:<{max_len}} {}".format(result["file"], result["telfhash"], max_len=args["max_len"])) else: print("{:<{max_len}} {msg}".format(filepath, max_len=args["max_len"], msg=result["msg"])) if args["format"] == "tsv": output_format_tsv(args, results) elif args["format"] == "json": output_format_json(args, results) if args["group"] and len(results) > 1: groups = grouping.group(results, threshold=args["threshold"]) print() for i in range(len(groups["grouped"])): print("Group {}:".format(i + 1)) for f in groups["grouped"][i]: print(" {}".format(f)) if len(groups["nogroup"]) > 0: print("Ungrouped:") for f in groups["nogroup"]: print(" {}".format(f)) print()
def group(telfhash_results, threshold=50): """Group the files according to the TLSH distances between the telfhashes of the files Args: telfhash_results: The output of the telfhash.telfhash function call. List of telfhash data of the files threshold: [Optional] The minimum TLSH distance between telfhashes for the files to be considered as related Returns: Tuple of tuples, each member tuple is one group """ groups = grouping.group(telfhash_results, threshold=threshold) return groups
def test_passing_in_tuple(): iterable = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) n = 3 actual = group(iterable, n) expected = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10]] assert actual == expected
def test_split_10_ints_by_3(): iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] n = 3 actual = group(iterable, n) expected = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10]] assert actual == expected
iterable = list(iterable) for i in range(0, len(iterable), n): result.append(iterable[i:i + n]) return result # another way # iterator = iter(iterable) # result = list(iter(lambda: list(islice(iterator, n)), [])) # return result if __name__ == '__main__': iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] n = 3 ret = group(iterable, n) print(ret) # TESTS import types from grouping import group def test_split_10_ints_by_3(): iterable = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] n = 3 actual = group(iterable, n) expected = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10]] assert actual == expected