Esempio n. 1
0
def test_length_list():
    """
    Basic tests to ensure that len() works as expected.
    """
    assert len(Clumper([])) == 0
    assert len(Clumper([{"a": 1}])) == 1
    assert len(Clumper([{"a": 1} for i in range(100)])) == 100
Esempio n. 2
0
def test_correct_keys_kept():
    """
    Make sure that we keep the correct names of the keys.
    """
    data = [{
        "a": 1,
        "b": 1,
        "items": [1, 2]
    }, {
        "a": 2,
        "b": 1,
        "items": [3, 4]
    }]
    assert set(Clumper(data).explode("items").keys()) == {"items", "a", "b"}
    assert set(Clumper(data).explode("items", foobar="items").keys()) == {
        "items",
        "a",
        "b",
        "foobar",
    }
    assert set(
        Clumper(data).explode(items="items").keys()) == {"items", "a", "b"}
    assert set(
        Clumper(data).explode(item="items").keys()) == {"item", "a", "b"}
    assert set(Clumper(data).explode(a="items").keys()) == {"a", "b"}
def explore(
        report_path: str = typer.Argument(...,
                                          help="Report log to visualise."),
        no_trim: bool = typer.Option(
            False, is_flag=True,
            help="Flag to not reduce parametrized calls."),
        port: int = typer.Option(8002, help="Port to serve the dashboard on."),
):
    """Starts up a pytest reportlog dashboard."""
    # Use clumper to arrange the data appropriately.
    res = (Clumper.read_jsonl(report_path).pipe(parse_test_info,
                                                trim=not no_trim).pipe(
                                                    to_hierarchy_dict,
                                                    hierarchy_col="hierarchy",
                                                    value_col="duration"))

    # We server everything as static files from a temporary folder
    tmpdir = tempfile.mkdtemp()
    orig = resource_filename("pytest_duration_insights", "static")
    shutil.copytree(src=orig, dst=Path(tmpdir) / "static")
    Clumper(res,
            listify=False).write_json(Path(tmpdir) / "static" / "data.json")
    tree_res = Node.from_dict(res).to_value_dict()
    Clumper(tree_res, listify=False).write_json(
        Path(tmpdir) / "static" / "treedata.json")

    # This a bit hacky but does the job
    subprocess.run([
        "python",
        "-m",
        "http.server",
        str(port),
        "--directory",
        str(Path(tmpdir) / "static"),
    ])
Esempio n. 4
0
def test_write_missing_keys(tmp_path):
    """Test that function works with missing keys."""
    missing_key = [{"a": "1", "b": "2"}, {"c": "3"}]
    path = temp_file(tmp_path)
    Clumper(missing_key).write_csv(path)
    reader = Clumper.read_csv(path)
    assert Clumper(missing_key).equals(reader)
Esempio n. 5
0
def test_no_mutate_query(pokemon):
    """
    This was an error that happened in the past.
    """
    r1 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate(
        type=lambda d: d["type"][0]))

    r2 = (Clumper(pokemon).keep(lambda d: len(d["type"]) == 2).mutate(
        type=lambda d: d["type"][0]))

    assert len(r1) == len(r2)
Esempio n. 6
0
def test_not_keep_correct_keys():
    data = [
        {
            "a": 1,
            "b": 1,
            "item": 1
        },
        {
            "a": 1,
            "b": 1,
            "item": 2
        },
        {
            "a": 1,
            "b": 1,
            "item": 1
        },
        {
            "a": 2,
            "b": 2,
            "c": 2,
            "item": 3
        },
        {
            "a": 2,
            "b": 2,
            "c": 2,
            "item": 2
        },
    ]

    keys = Clumper(data).group_by("a", "b").implode(items="item").keys()
    assert set(keys) == {"a", "b", "c", "items"}
Esempio n. 7
0
def notifications(request):
    c = request.course

    if not c:
        return HttpResponseRedirect('/accounts/login/')

    user = request.user
    if user.is_staff and request.GET.has_key('as'):
        user = get_object_or_404(User, username=request.GET['as'])

    class_feed = []

    #personal feed
    my_assets = {}
    for n in SherdNote.objects.filter(author=user, asset__course=c):
        my_assets[str(n.asset_id)] = 1
    for comment in Comment.objects.filter(user=user):
        if c == getattr(comment.content_object, 'course', None):
            my_assets[str(comment.object_pk)] = 1
    my_discussions = [
        d.collaboration_id for d in DiscussionIndex.objects.filter(
            participant=user,
            collaboration__context=request.collaboration_context)
    ]

    my_feed = Clumper(
        Comment.objects.filter(
            content_type=ContentType.objects.get_for_model(Asset),
            object_pk__in=my_assets.keys()).order_by(
                '-submit_date'),  #so the newest ones show up
        SherdNote.objects.filter(
            asset__in=my_assets.keys(),
            #no global annotations
            #warning: if we include global annotations
            #we need to stop it from autocreating one on-view
            #of the asset somehow
            range1__isnull=False).order_by('-added'),
        Project.objects.filter(Q(participants=user.pk) | Q(author=user.pk),
                               course=c).order_by('-modified'),
        DiscussionIndex.with_permission(
            request,
            DiscussionIndex.objects.filter(
                Q(Q(asset__in=my_assets.keys())
                  | Q(collaboration__in=my_discussions)
                  | Q(collaboration__user=request.user)
                  | Q(collaboration__group__user=request.user),
                  participant__isnull=False)).order_by('-modified')),
    )

    tags = Tag.objects.usage_for_queryset(
        SherdNote.objects.filter(asset__course=c), counts=True)

    #only top 10 tags
    tag_cloud = calculate_cloud(
        sorted(tags, lambda t, w: cmp(w.count, t.count))[:10])

    return {
        'my_feed': my_feed,
        'tag_cloud': tag_cloud,
    }
Esempio n. 8
0
def test_correct_values_value():
    list_dicts = [
        {
            "a": 1,
            "b": 2
        },
        {
            "a": 2,
            "b": 3
        },
        {
            "a": 3
        },
        {
            "a": 4,
            "b": 6
        },
        {
            "a": 5
        },
    ]

    res = (Clumper(list_dicts).mutate(
        b=impute("b", strategy="value", fallback=0)).collect())

    assert [d["b"] for d in res] == [2, 3, 0, 6, 0]
def test_group_combos_one_group():
    prod = it.product([1, 2, 3, 4, 5], [-0.1, 0.0, 0.1], [True, False], ["a", "b"])
    clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod])
    res = clump.group_by("a")._group_combos()
    assert list(sorted(r["a"] for r in res)) == list(sorted([True, False]))
    res = clump.group_by("b")._group_combos()
    assert list(sorted(r["b"] for r in res)) == list(sorted(["a", "b"]))
Esempio n. 10
0
def base_clumper():
    data = [{
        "data": [i for _ in range(2)],
        "i": i,
        "c": c
    } for i, c in enumerate("abcdefghijklmnopqrstuvwxyz")]
    return Clumper(data)
Esempio n. 11
0
def test_correct_values_prev():
    list_dicts = [
        {
            "a": 1,
            "b": 2
        },
        {
            "a": 2,
            "b": 3
        },
        {
            "a": 3
        },
        {
            "a": 4,
            "b": 6
        },
        {
            "a": 5
        },
    ]

    res = Clumper(list_dicts).mutate(b=impute("b", strategy="prev")).collect()

    assert [d["b"] for d in res] == [2, 3, 3, 6, 6]
Esempio n. 12
0
def test_mutability_insurance():
    """
    We don't want to change the original data going in. Ever.
    """
    data = [{"a": 1}, {"b": 2}]
    blob = Clumper(data).blob
    assert id(data) != id(blob)
Esempio n. 13
0
def test_iteration():
    """
    Just make sure that we can iterate.
    """
    data = [1, 2, 3, 4, 5]
    blob = [i for i in Clumper(data)]
    assert data == blob
Esempio n. 14
0
def test_explode_many(n, k):
    """
    Ensure we do cartesian product elegantly with one nested set.
    """
    data = [{"i": i, "nested": [j for j in range(k)]} for i in range(n)]
    c = Clumper(data).explode(j="nested").count("j")
    assert c == n * k
Esempio n. 15
0
def test_case_zero():
    empty_c = Clumper([])
    assert empty_c.mean("i") is None
    assert empty_c.max("i") is None
    assert empty_c.min("i") is None
    assert empty_c.sum("i") is None
    assert empty_c.unique("i") == []
    assert empty_c.n_unique("i") == 0
Esempio n. 16
0
def test_keep_does_not_mutate():
    """
    The original data must not be changed. This happened originally.
    """
    data = [{"a": 1}, {"a": 2}]
    c = Clumper(data).keep(lambda d: d["a"] == 1)
    assert len(data) == 2
    assert len(c) == 1
Esempio n. 17
0
def test_with_groups(n):
    """
    We should never count more rows than we have in the original data.
    """
    prod = it.product(range(1, n + 1), [-0.1, 0.0, 0.1], [True, False], ["a", "b"])
    clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod])
    length = len(clump)
    n_items = clump.group_by("a", "b").agg(r=("r", "sum")).sum("r")
    assert n_items == length
Esempio n. 18
0
def test_mutate_group_aware():
    """
    Does `row_number` reset during mutate if a group is active?
    """
    data = [{"bool": True if i % 2 else False} for i in range(20)]
    clump = Clumper(data).group_by("bool").mutate(r=row_number())
    assert len(clump) == len(data)
    assert clump.groups == ("bool",)
    assert set(clump.unique("r")) == {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
Esempio n. 19
0
def test_read_csv_nulls():
    """Test that null rows are discarded."""
    assert Clumper.read_csv("tests/data/null.csv").equals(
        Clumper([{
            "a": "1",
            "b": "2"
        }, {
            "a": "2",
            "c": "4"
        }]))
Esempio n. 20
0
def test_left_join_no_overlap():
    d1 = [{
        "a": 1,
        "b": 1,
        "d": 1
    }, {
        "a": 1,
        "b": 2,
        "d": 1
    }, {
        "a": 1,
        "b": 5,
        "d": 1
    }]
    d2 = [
        {
            "b": 1,
            "c": 1,
            "d": 100
        },
        {
            "b": 2,
            "c": 2,
            "d": 200
        },
        {
            "b": 2,
            "c": 20,
            "d": 200
        },
    ]
    joined = (Clumper(d1).left_join(Clumper(d2),
                                    mapping={
                                        "d": "d"
                                    },
                                    lsuffix="left",
                                    rsuffix="right").collect())
    assert joined == d1
Esempio n. 21
0
def test_can_rename_key(keyname):
    """We should be able to change the keyname"""
    data = {
        "f1": {
            "p1": 1,
            "p2": 2
        },
        "f2": {
            "p1": 3,
            "p2": 4
        },
        "f3": {
            "p1": 5,
            "p2": 6
        },
    }

    expected = [
        {
            "p1": 1,
            "p2": 2,
            keyname: "f1"
        },
        {
            "p1": 3,
            "p2": 4,
            keyname: "f2"
        },
        {
            "p1": 5,
            "p2": 6,
            keyname: "f3"
        },
    ]

    assert (Clumper(
        data,
        listify=False).flatten_keys(keyname=keyname).collect() == expected)
Esempio n. 22
0
def test_len_appropriate_dict_input():
    """You can pass a dictionary, but then the length should be 1. Not the number of keys."""
    assert len(Clumper({"a": 1, "b": 2, "c": 3})) == 1
Esempio n. 23
0
def test_subsets_sizes(keys, size):
    prod = it.product([1, 2], [1, 2], [True, False], ["a", "b"])
    clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod])
    for c in clump.group_by(*keys)._subsets():
        assert len(c) == size
Esempio n. 24
0
def test_write_empty_csv(data, tmp_path):
    """Test that null cells are exported correctly as empty strings"""
    path = temp_file(tmp_path)
    Clumper(data).write_csv(path)
    reader = Clumper.read_csv(path, na_values="ignore")
    assert Clumper(data).collect() == reader.collect()
Esempio n. 25
0
def test_group_combos_two_groups():
    prod = it.product([1, 2, 3, 4, 5], [-0.1, 0.0, 0.1], [True, False], ["a", "b"])
    clump = Clumper([{"r": 1, "i": i, "j": j, "a": a, "b": b} for i, j, a, b in prod])
    assert len(clump.group_by("a", "b")._group_combos()) == 4
Esempio n. 26
0
def test_write_csv_fieldnames(data, tmp_path):
    """Test that fieldnames of Clumper match the headers in the exported csv file"""
    path = temp_file(tmp_path)
    Clumper(data).write_csv(path)
    reader = Clumper.read_csv(path)
    assert not set(Clumper(data).keys()).difference(reader.keys())
Esempio n. 27
0
def test_unpack_dict():
    assert len(Clumper(d).unpack("data")) == 10
Esempio n. 28
0
def test_read_csv(dtype_data, dtype, tmp_path):
    """Test that the correct dtype is returned when dtype argument is not None."""
    path = temp_file(tmp_path)
    Clumper(dtype_data).write_csv(path)
    reader = Clumper.read_csv(path, dtype=dtype)
    assert Clumper(dtype_data).equals(reader)
Esempio n. 29
0
 def to_json(self, path):
     return Clumper(self.data).write_json(path, indent=2)
Esempio n. 30
0
def test_write_csv_n_positive(data, tmp_path):
    """Test that the correct number of rows is exported"""
    path = temp_file(tmp_path)
    Clumper(data).head(n=10).write_csv(path)
    reader = Clumper.read_csv(path, na_values="ignore")
    assert Clumper(data).head(n=10).collect() == reader.collect()