def test_write_missing_keys(tmp_path): """Test that function works with missing keys.""" missing_key = [{"a": "1", "b": "2"}, {"c": "3"}] path = temp_file(tmp_path) Clumper(missing_key).write_csv(path) reader = Clumper.read_csv(path) assert Clumper(missing_key).equals(reader)
def test_group_combos_one_group(): prod = it.product([1, 2, 3, 4, 5], [-0.1, 0.0, 0.1], [True, False], ["a", "b"]) clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod]) res = clump.group_by("a")._group_combos() assert list(sorted(r["a"] for r in res)) == list(sorted([True, False])) res = clump.group_by("b")._group_combos() assert list(sorted(r["b"] for r in res)) == list(sorted(["a", "b"]))
def test_correct_keys_kept(): """ Make sure that we keep the correct names of the keys. """ data = [{ "a": 1, "b": 1, "items": [1, 2] }, { "a": 2, "b": 1, "items": [3, 4] }] assert set(Clumper(data).explode("items").keys()) == {"items", "a", "b"} assert set(Clumper(data).explode("items", foobar="items").keys()) == { "items", "a", "b", "foobar", } assert set( Clumper(data).explode(items="items").keys()) == {"items", "a", "b"} assert set( Clumper(data).explode(item="items").keys()) == {"item", "a", "b"} assert set(Clumper(data).explode(a="items").keys()) == {"a", "b"}
def test_local_read_write_same_lines(tmp_path, lines, expected): """Test that an error is raised if the locally written files has the same number of lines as expected""" path = str(tmp_path / "pokemon_copy.json") writer = Clumper.read_json("tests/data/pokemon.json", lines) writer.write_json(path) reader = Clumper.read_json(path) assert len(reader) == len(writer)
def test_local_read_write_content_same(tmp_path): """Test that an error is raised if the written JSON file is not the same as what is read locally""" path = str(tmp_path / "pokemon_copy.json") writer = Clumper.read_json("tests/data/pokemon.json") writer.write_json(path) reader = Clumper.read_json(path) assert reader.collect() == writer.collect()
def test_length_list(): """ Basic tests to ensure that len() works as expected. """ assert len(Clumper([])) == 0 assert len(Clumper([{"a": 1}])) == 1 assert len(Clumper([{"a": 1} for i in range(100)])) == 100
def explore( report_path: str = typer.Argument(..., help="Report log to visualise."), no_trim: bool = typer.Option( False, is_flag=True, help="Flag to not reduce parametrized calls."), port: int = typer.Option(8002, help="Port to serve the dashboard on."), ): """Starts up a pytest reportlog dashboard.""" # Use clumper to arrange the data appropriately. res = (Clumper.read_jsonl(report_path).pipe(parse_test_info, trim=not no_trim).pipe( to_hierarchy_dict, hierarchy_col="hierarchy", value_col="duration")) # We server everything as static files from a temporary folder tmpdir = tempfile.mkdtemp() orig = resource_filename("pytest_duration_insights", "static") shutil.copytree(src=orig, dst=Path(tmpdir) / "static") Clumper(res, listify=False).write_json(Path(tmpdir) / "static" / "data.json") tree_res = Node.from_dict(res).to_value_dict() Clumper(tree_res, listify=False).write_json( Path(tmpdir) / "static" / "treedata.json") # This a bit hacky but does the job subprocess.run([ "python", "-m", "http.server", str(port), "--directory", str(Path(tmpdir) / "static"), ])
def test_local_read_write_content_same(tmp_path): """Test that the written JSONL file is the same as what is read locally""" path = str(tmp_path / "cards_copy.jsonl") writer = Clumper.read_jsonl("tests/data/cards.jsonl") writer.write_jsonl(path) reader = Clumper.read_jsonl(path) assert reader.collect() == writer.collect()
def test_with_groups(n): """ We should never count more rows than we have in the original data. """ prod = it.product(range(1, n + 1), [-0.1, 0.0, 0.1], [True, False], ["a", "b"]) clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod]) length = len(clump) n_items = clump.group_by("a", "b").agg(r=("r", "sum")).sum("r") assert n_items == length
def test_mutate_group_aware(): """ Does `row_number` reset during mutate if a group is active? """ data = [{"bool": True if i % 2 else False} for i in range(20)] clump = Clumper(data).group_by("bool").mutate(r=row_number()) assert len(clump) == len(data) assert clump.groups == ("bool",) assert set(clump.unique("r")) == {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
def test_local_read_write_same_lines(tmp_path, lines, expected): """Test that the locally written files has the same number of lines as expected""" path = tmp_path / "cards_copy.jsonl" writer = Clumper.read_jsonl("tests/data/cards.jsonl", lines) writer.write_jsonl(path) reader = Clumper.read_jsonl(str(path)) assert len(reader) == len(writer)
def test_read_csv_nulls(): """Test that null rows are discarded.""" assert Clumper.read_csv("tests/data/null.csv").equals( Clumper([{ "a": "1", "b": "2" }, { "a": "2", "c": "4" }]))
def test_no_mutate_query(pokemon): """ This was an error that happened in the past. """ r1 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate( type=lambda d: d["type"][0])) r2 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate( type=lambda d: d["type"][0])) assert len(r1) == len(r2)
def test_read_multiple_yaml(tmp_path, copies): """ Test that csv files can be read given a pattern """ writer = Clumper.read_yaml("tests/data/demo-nested.yml") for i in range(copies): writer.write_yaml(tmp_path / f"demo-nested-{i}.yml") reader = Clumper.read_yaml(str(tmp_path / "*.yml")) assert len(reader) == copies * len(writer)
def test_read_multiple_csv(tmp_path, copies): """ Test that csv files can be read given a pattern """ writer = Clumper.read_csv("tests/data/monopoly.csv") for i in range(copies): writer.write_csv(tmp_path / f"monopoly_copy_{i}.csv") reader = Clumper.read_csv(str(tmp_path / "*.csv")) assert len(reader) == copies * len(writer)
def test_read_multiple_json(tmp_path, copies): """ Test that json files can be read given a pattern """ writer = Clumper.read_json("tests/data/pokemon.json") for i in range(copies): writer.write_json(tmp_path / f"pokemon_copy_{i}.json") reader = Clumper.read_json(str(tmp_path / "*.json")) assert len(reader) == copies * len(writer)
def test_case_zero(): empty_c = Clumper([]) assert empty_c.mean("i") is None assert empty_c.max("i") is None assert empty_c.min("i") is None assert empty_c.sum("i") is None assert empty_c.unique("i") == [] assert empty_c.n_unique("i") == 0
def test_read_multiple_jsonl(tmp_path, copies): """ Test that jsonl files can be read given a pattern """ writer = Clumper.read_jsonl("tests/data/cards.jsonl") for i in range(copies): writer.write_jsonl(tmp_path / f"cards_copy_{i}.jsonl") reader = Clumper.read_jsonl(str(tmp_path / "*.jsonl")) assert len(reader) == copies * len(writer) reader = Clumper.read_jsonl(list(Path(tmp_path).glob("*.jsonl"))) assert len(reader) == copies * len(writer)
def history( n: int = typer.Option(10, help="How many rows should the table show."), only_failures: bool = typer.Option(False, is_flag=True, help="Only show failures."), date: str = typer.Option(None, is_flag=True, help="Only show specific date."), name: str = typer.Option(None, is_flag=True, help="Only show jobs with specific name."), ): """Shows a table with job status.""" clump = Clumper.read_jsonl(heartbeat_path()).sort(lambda _: _["start"], reverse=True) if only_failures: clump = clump.keep(lambda _: _["status"] != "success") if name: clump = clump.keep(lambda _: name in _["name"]) if date: clump = clump.keep(lambda _: date in _["start"]) table = Table(title=None) table.add_column("status") table.add_column("date") table.add_column("name") table.add_column("logfile") for d in clump.head(n).collect(): table.add_row( f"[{'red' if d['status'] == 'fail' else 'green'}]{d['status']}[/]", d["start"], d["name"], d["logpath"], ) print(table)
def base_clumper(): data = [{ "data": [i for _ in range(2)], "i": i, "c": c } for i, c in enumerate("abcdefghijklmnopqrstuvwxyz")] return Clumper(data)
def test_correct_values_value(): list_dicts = [ { "a": 1, "b": 2 }, { "a": 2, "b": 3 }, { "a": 3 }, { "a": 4, "b": 6 }, { "a": 5 }, ] res = (Clumper(list_dicts).mutate( b=impute("b", strategy="value", fallback=0)).collect()) assert [d["b"] for d in res] == [2, 3, 0, 6, 0]
def test_correct_values_prev(): list_dicts = [ { "a": 1, "b": 2 }, { "a": 2, "b": 3 }, { "a": 3 }, { "a": 4, "b": 6 }, { "a": 5 }, ] res = Clumper(list_dicts).mutate(b=impute("b", strategy="prev")).collect() assert [d["b"] for d in res] == [2, 3, 3, 6, 6]
def test_mutability_insurance(): """ We don't want to change the original data going in. Ever. """ data = [{"a": 1}, {"b": 2}] blob = Clumper(data).blob assert id(data) != id(blob)
def test_multi_file_add_path_many(): """We check the path again, but now for data that is flat.""" c = Clumper.read_yaml("tests/data/demo-flat-*.yaml", add_path=True) paths = c.map(lambda d: d["read_path"]).collect() assert set(paths) == { "tests/data/demo-flat-1.yaml", "tests/data/demo-flat-2.yaml" }
def test_iteration(): """ Just make sure that we can iterate. """ data = [1, 2, 3, 4, 5] blob = [i for i in Clumper(data)] assert data == blob
def notifications(request): c = request.course if not c: return HttpResponseRedirect('/accounts/login/') user = request.user if user.is_staff and request.GET.has_key('as'): user = get_object_or_404(User, username=request.GET['as']) class_feed = [] #personal feed my_assets = {} for n in SherdNote.objects.filter(author=user, asset__course=c): my_assets[str(n.asset_id)] = 1 for comment in Comment.objects.filter(user=user): if c == getattr(comment.content_object, 'course', None): my_assets[str(comment.object_pk)] = 1 my_discussions = [ d.collaboration_id for d in DiscussionIndex.objects.filter( participant=user, collaboration__context=request.collaboration_context) ] my_feed = Clumper( Comment.objects.filter( content_type=ContentType.objects.get_for_model(Asset), object_pk__in=my_assets.keys()).order_by( '-submit_date'), #so the newest ones show up SherdNote.objects.filter( asset__in=my_assets.keys(), #no global annotations #warning: if we include global annotations #we need to stop it from autocreating one on-view #of the asset somehow range1__isnull=False).order_by('-added'), Project.objects.filter(Q(participants=user.pk) | Q(author=user.pk), course=c).order_by('-modified'), DiscussionIndex.with_permission( request, DiscussionIndex.objects.filter( Q(Q(asset__in=my_assets.keys()) | Q(collaboration__in=my_discussions) | Q(collaboration__user=request.user) | Q(collaboration__group__user=request.user), participant__isnull=False)).order_by('-modified')), ) tags = Tag.objects.usage_for_queryset( SherdNote.objects.filter(asset__course=c), counts=True) #only top 10 tags tag_cloud = calculate_cloud( sorted(tags, lambda t, w: cmp(w.count, t.count))[:10]) return { 'my_feed': my_feed, 'tag_cloud': tag_cloud, }
def test_explode_many(n, k): """ Ensure we do cartesian product elegantly with one nested set. """ data = [{"i": i, "nested": [j for j in range(k)]} for i in range(n)] c = Clumper(data).explode(j="nested").count("j") assert c == n * k
def test_not_keep_correct_keys(): data = [ { "a": 1, "b": 1, "item": 1 }, { "a": 1, "b": 1, "item": 2 }, { "a": 1, "b": 1, "item": 1 }, { "a": 2, "b": 2, "c": 2, "item": 3 }, { "a": 2, "b": 2, "c": 2, "item": 2 }, ] keys = Clumper(data).group_by("a", "b").implode(items="item").keys() assert set(keys) == {"a", "b", "c", "items"}
def test_keep_does_not_mutate(): """ The original data must not be changed. This happened originally. """ data = [{"a": 1}, {"a": 2}] c = Clumper(data).keep(lambda d: d["a"] == 1) assert len(data) == 2 assert len(c) == 1
def test_paths_are_added(): """When add_path=True we need to add the path information.""" paths = (Clumper.read_jsonl( "tests/data/*.jsonl", add_path=True).map(lambda d: d["read_path"]).collect()) assert set(paths) == { "tests/data/cards.jsonl", "tests/data/cards-more.jsonl" }