def test_length_list(): """ Basic tests to ensure that len() works as expected. """ assert len(Clumper([])) == 0 assert len(Clumper([{"a": 1}])) == 1 assert len(Clumper([{"a": 1} for i in range(100)])) == 100
def test_correct_keys_kept(): """ Make sure that we keep the correct names of the keys. """ data = [{ "a": 1, "b": 1, "items": [1, 2] }, { "a": 2, "b": 1, "items": [3, 4] }] assert set(Clumper(data).explode("items").keys()) == {"items", "a", "b"} assert set(Clumper(data).explode("items", foobar="items").keys()) == { "items", "a", "b", "foobar", } assert set( Clumper(data).explode(items="items").keys()) == {"items", "a", "b"} assert set( Clumper(data).explode(item="items").keys()) == {"item", "a", "b"} assert set(Clumper(data).explode(a="items").keys()) == {"a", "b"}
def explore( report_path: str = typer.Argument(..., help="Report log to visualise."), no_trim: bool = typer.Option( False, is_flag=True, help="Flag to not reduce parametrized calls."), port: int = typer.Option(8002, help="Port to serve the dashboard on."), ): """Starts up a pytest reportlog dashboard.""" # Use clumper to arrange the data appropriately. res = (Clumper.read_jsonl(report_path).pipe(parse_test_info, trim=not no_trim).pipe( to_hierarchy_dict, hierarchy_col="hierarchy", value_col="duration")) # We server everything as static files from a temporary folder tmpdir = tempfile.mkdtemp() orig = resource_filename("pytest_duration_insights", "static") shutil.copytree(src=orig, dst=Path(tmpdir) / "static") Clumper(res, listify=False).write_json(Path(tmpdir) / "static" / "data.json") tree_res = Node.from_dict(res).to_value_dict() Clumper(tree_res, listify=False).write_json( Path(tmpdir) / "static" / "treedata.json") # This a bit hacky but does the job subprocess.run([ "python", "-m", "http.server", str(port), "--directory", str(Path(tmpdir) / "static"), ])
def test_write_missing_keys(tmp_path): """Test that function works with missing keys.""" missing_key = [{"a": "1", "b": "2"}, {"c": "3"}] path = temp_file(tmp_path) Clumper(missing_key).write_csv(path) reader = Clumper.read_csv(path) assert Clumper(missing_key).equals(reader)
def test_no_mutate_query(pokemon): """ This was an error that happened in the past. """ r1 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate( type=lambda d: d["type"][0])) r2 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate( type=lambda d: d["type"][0])) assert len(r1) == len(r2)
def test_not_keep_correct_keys(): data = [ { "a": 1, "b": 1, "item": 1 }, { "a": 1, "b": 1, "item": 2 }, { "a": 1, "b": 1, "item": 1 }, { "a": 2, "b": 2, "c": 2, "item": 3 }, { "a": 2, "b": 2, "c": 2, "item": 2 }, ] keys = Clumper(data).group_by("a", "b").implode(items="item").keys() assert set(keys) == {"a", "b", "c", "items"}
def notifications(request): c = request.course if not c: return HttpResponseRedirect('/accounts/login/') user = request.user if user.is_staff and request.GET.has_key('as'): user = get_object_or_404(User, username=request.GET['as']) class_feed = [] #personal feed my_assets = {} for n in SherdNote.objects.filter(author=user, asset__course=c): my_assets[str(n.asset_id)] = 1 for comment in Comment.objects.filter(user=user): if c == getattr(comment.content_object, 'course', None): my_assets[str(comment.object_pk)] = 1 my_discussions = [ d.collaboration_id for d in DiscussionIndex.objects.filter( participant=user, collaboration__context=request.collaboration_context) ] my_feed = Clumper( Comment.objects.filter( content_type=ContentType.objects.get_for_model(Asset), object_pk__in=my_assets.keys()).order_by( '-submit_date'), #so the newest ones show up SherdNote.objects.filter( asset__in=my_assets.keys(), #no global annotations #warning: if we include global annotations #we need to stop it from autocreating one on-view #of the asset somehow range1__isnull=False).order_by('-added'), Project.objects.filter(Q(participants=user.pk) | Q(author=user.pk), course=c).order_by('-modified'), DiscussionIndex.with_permission( request, DiscussionIndex.objects.filter( Q(Q(asset__in=my_assets.keys()) | Q(collaboration__in=my_discussions) | Q(collaboration__user=request.user) | Q(collaboration__group__user=request.user), participant__isnull=False)).order_by('-modified')), ) tags = Tag.objects.usage_for_queryset( SherdNote.objects.filter(asset__course=c), counts=True) #only top 10 tags tag_cloud = calculate_cloud( sorted(tags, lambda t, w: cmp(w.count, t.count))[:10]) return { 'my_feed': my_feed, 'tag_cloud': tag_cloud, }
def test_correct_values_value(): list_dicts = [ { "a": 1, "b": 2 }, { "a": 2, "b": 3 }, { "a": 3 }, { "a": 4, "b": 6 }, { "a": 5 }, ] res = (Clumper(list_dicts).mutate( b=impute("b", strategy="value", fallback=0)).collect()) assert [d["b"] for d in res] == [2, 3, 0, 6, 0]
def test_group_combos_one_group(): prod = it.product([1, 2, 3, 4, 5], [-0.1, 0.0, 0.1], [True, False], ["a", "b"]) clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod]) res = clump.group_by("a")._group_combos() assert list(sorted(r["a"] for r in res)) == list(sorted([True, False])) res = clump.group_by("b")._group_combos() assert list(sorted(r["b"] for r in res)) == list(sorted(["a", "b"]))
def base_clumper(): data = [{ "data": [i for _ in range(2)], "i": i, "c": c } for i, c in enumerate("abcdefghijklmnopqrstuvwxyz")] return Clumper(data)
def test_correct_values_prev(): list_dicts = [ { "a": 1, "b": 2 }, { "a": 2, "b": 3 }, { "a": 3 }, { "a": 4, "b": 6 }, { "a": 5 }, ] res = Clumper(list_dicts).mutate(b=impute("b", strategy="prev")).collect() assert [d["b"] for d in res] == [2, 3, 3, 6, 6]
def test_mutability_insurance(): """ We don't want to change the original data going in. Ever. """ data = [{"a": 1}, {"b": 2}] blob = Clumper(data).blob assert id(data) != id(blob)
def test_iteration(): """ Just make sure that we can iterate. """ data = [1, 2, 3, 4, 5] blob = [i for i in Clumper(data)] assert data == blob
def test_explode_many(n, k): """ Ensure we do cartesian product elegantly with one nested set. """ data = [{"i": i, "nested": [j for j in range(k)]} for i in range(n)] c = Clumper(data).explode(j="nested").count("j") assert c == n * k
def test_case_zero(): empty_c = Clumper([]) assert empty_c.mean("i") is None assert empty_c.max("i") is None assert empty_c.min("i") is None assert empty_c.sum("i") is None assert empty_c.unique("i") == [] assert empty_c.n_unique("i") == 0
def test_keep_does_not_mutate(): """ The original data must not be changed. This happened originally. """ data = [{"a": 1}, {"a": 2}] c = Clumper(data).keep(lambda d: d["a"] == 1) assert len(data) == 2 assert len(c) == 1
def test_with_groups(n): """ We should never count more rows than we have in the original data. """ prod = it.product(range(1, n + 1), [-0.1, 0.0, 0.1], [True, False], ["a", "b"]) clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod]) length = len(clump) n_items = clump.group_by("a", "b").agg(r=("r", "sum")).sum("r") assert n_items == length
def test_mutate_group_aware(): """ Does `row_number` reset during mutate if a group is active? """ data = [{"bool": True if i % 2 else False} for i in range(20)] clump = Clumper(data).group_by("bool").mutate(r=row_number()) assert len(clump) == len(data) assert clump.groups == ("bool",) assert set(clump.unique("r")) == {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
def test_read_csv_nulls(): """Test that null rows are discarded.""" assert Clumper.read_csv("tests/data/null.csv").equals( Clumper([{ "a": "1", "b": "2" }, { "a": "2", "c": "4" }]))
def test_left_join_no_overlap(): d1 = [{ "a": 1, "b": 1, "d": 1 }, { "a": 1, "b": 2, "d": 1 }, { "a": 1, "b": 5, "d": 1 }] d2 = [ { "b": 1, "c": 1, "d": 100 }, { "b": 2, "c": 2, "d": 200 }, { "b": 2, "c": 20, "d": 200 }, ] joined = (Clumper(d1).left_join(Clumper(d2), mapping={ "d": "d" }, lsuffix="left", rsuffix="right").collect()) assert joined == d1
def test_can_rename_key(keyname): """We should be able to change the keyname""" data = { "f1": { "p1": 1, "p2": 2 }, "f2": { "p1": 3, "p2": 4 }, "f3": { "p1": 5, "p2": 6 }, } expected = [ { "p1": 1, "p2": 2, keyname: "f1" }, { "p1": 3, "p2": 4, keyname: "f2" }, { "p1": 5, "p2": 6, keyname: "f3" }, ] assert (Clumper( data, listify=False).flatten_keys(keyname=keyname).collect() == expected)
def test_len_appropriate_dict_input(): """You can pass a dictionary, but then the length should be 1. Not the number of keys.""" assert len(Clumper({"a": 1, "b": 2, "c": 3})) == 1
def test_subsets_sizes(keys, size): prod = it.product([1, 2], [1, 2], [True, False], ["a", "b"]) clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod]) for c in clump.group_by(*keys)._subsets(): assert len(c) == size
def test_write_empty_csv(data, tmp_path): """Test that null cells are exported correctly as empty strings""" path = temp_file(tmp_path) Clumper(data).write_csv(path) reader = Clumper.read_csv(path, na_values="ignore") assert Clumper(data).collect() == reader.collect()
def test_group_combos_two_groups(): prod = it.product([1, 2, 3, 4, 5], [-0.1, 0.0, 0.1], [True, False], ["a", "b"]) clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod]) assert len(clump.group_by("a", "b")._group_combos()) == 4
def test_write_csv_fieldnames(data, tmp_path): """Test that fieldnames of Clumper match the headers in the exported csv file""" path = temp_file(tmp_path) Clumper(data).write_csv(path) reader = Clumper.read_csv(path) assert not set(Clumper(data).keys()).difference(reader.keys())
def test_unpack_dict(): assert len(Clumper(d).unpack("data")) == 10
def test_read_csv(dtype_data, dtype, tmp_path): """Test that the correct dtype is returned when dtype argument is not None.""" path = temp_file(tmp_path) Clumper(dtype_data).write_csv(path) reader = Clumper.read_csv(path, dtype=dtype) assert Clumper(dtype_data).equals(reader)
def to_json(self, path): return Clumper(self.data).write_json(path, indent=2)
def test_write_csv_n_positive(data, tmp_path): """Test that the correct number of rows is exported""" path = temp_file(tmp_path) Clumper(data).head(n=10).write_csv(path) reader = Clumper.read_csv(path, na_values="ignore") assert Clumper(data).head(n=10).collect() == reader.collect()