Ejemplo n.º 1
0
def test_all():
    int_iter = iter(range(10))

    out = glom(int_iter, Iter().all())
    assert out == list(range(10))
    assert next(int_iter, None) is None
    assert repr(Iter().all()) == repr((Iter(), list))
Ejemplo n.º 2
0
def test_filter():
    is_odd = lambda x: x % 2
    odd_spec = Iter().filter(is_odd)
    out = glom(RANGE_5, odd_spec)
    assert list(out) == [1, 3]

    # let's just make sure we're actually streaming just in case
    counter = count()
    out = glom(counter, odd_spec)
    assert next(out) == 1
    assert next(out) == 3
    assert next(counter) == 4
    assert next(counter) == 5
    assert next(out) == 7

    bools = [True, False, False, True, False]
    spec = Iter().filter().all()
    out = glom(bools, spec)
    assert out == [True, True]

    imags = [0j, 1j, 2, 2j, 3j]
    spec = Iter().filter(
        Check(T.imag.real, type=float, one_of=(0, 2), default=SKIP)).all()
    out = glom(imags, spec)
    assert out == [0j, 2j]

    assert repr(Iter().filter(T.a.b)).startswith('Iter().filter(T.a.b)')
Ejemplo n.º 3
0
def test_iter_composition():
    int_list = list(range(10))
    out = glom(int_list, (Iter(), Iter(), list))
    assert out == int_list

    out = glom([int_list] * 3, Iter(Iter(lambda x: x % 4)).flatten().unique())
    assert list(out) == [0, 1, 2, 3]
Ejemplo n.º 4
0
def test_chunked():
    int_list = list(range(9))

    spec = Iter().chunked(3)
    out = glom(int_list, spec)
    assert list(out) == [[0, 1, 2], [3, 4, 5], [6, 7, 8]]

    spec = Iter().chunked(3).map(sum)
    out = glom(int_list, spec)
    assert list(out) == [3, 12, 21]
Ejemplo n.º 5
0
def test_unique():
    int_list = list(range(10))

    spec = Iter().unique()
    out = glom(int_list, spec)
    assert list(out) == int_list

    spec = Iter(lambda x: x % 4).unique()
    out = glom(int_list, spec)
    assert list(out) == int_list[:4]
    assert repr(Iter().unique(T.a)) == 'Iter().unique(T.a)'
Ejemplo n.º 6
0
def test_split_flatten():
    falsey_stream = [1, None, None, 2, 3, None, 4]
    spec = Iter().split()
    out = glom(falsey_stream, spec)
    assert list(out) == [[1], [2, 3], [4]]

    spec = Iter().split().flatten()
    out = glom(falsey_stream, spec)
    assert list(out) == [1, 2, 3, 4]

    assert repr(Iter().split(
        sep=None, maxsplit=2)) == 'Iter().split(sep=None, maxsplit=2)'
    assert repr(Iter(T.a.b[1]).flatten()) == 'Iter(T.a.b[1]).flatten()'
Ejemplo n.º 7
0
def test_slice():
    cnt = count()

    spec = Iter().slice(3)
    out = glom(cnt, spec)

    assert list(out) == [0, 1, 2]
    assert next(cnt) == 3

    out = glom(range(10), Iter().slice(1, 5))
    assert list(out) == [1, 2, 3, 4]

    out = glom(range(10), Iter().slice(1, 6, 2))
    assert list(out) == [1, 3, 5]
    assert repr(Iter().slice(1, 6, 2)) == 'Iter().slice(1, 6, 2)'

    out = glom(range(10), Iter().limit(3))
    assert list(out) == [0, 1, 2]
    assert repr(Iter().limit(3)) == 'Iter().limit(3)'

    out = glom(range(5), Iter().limit(10))
    assert list(out) == [0, 1, 2, 3, 4]

    # test broken args
    with pytest.raises(TypeError):
        Iter().slice(1, 2, 3, 4)
Ejemplo n.º 8
0
def test_windowed():
    int_list = list(range(5))

    spec = Iter().windowed(3)
    out = glom(int_list, spec)
    assert list(out) == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
    assert repr(spec) == 'Iter().windowed(3)'

    spec = spec.filter(lambda x: bool(x[0] % 2)).map(sum)
    out = glom(int_list, spec)
    assert next(out) == 6

    out = glom(range(10), spec)
    assert list(out) == [6, 12, 18, 24]
Ejemplo n.º 9
0
def test_first():
    spec = Iter().first(T.imag)

    target = iter([1, 2, 3j, 4])
    out = glom(target, spec)
    assert out == 3j
    assert next(target) == 4
    assert repr(spec) == '(Iter(), First(T.imag))'

    spec = Iter().first(T.imag, default=0)
    target = iter([1, 2, 4])
    out = glom(target, spec)
    assert out == 0
    assert repr(spec) == '(Iter(), First(T.imag, default=0))'
def test_check_pkg(pkg):
    reports = check_pkg(pkg.descriptor)

    # only errors in the bad files
    assert all(
        map(
            lambda fp: "bad" in fp,
            glom(glom(reports, [("tables", ["source"])]),
                 Iter().flatten()),
        ))

    counter = defaultdict(int)

    def _proc(err):
        counter[err["code"]] += 1
        return (
            err["row-number"],
            err["column-number"],
        )

    err_row_cols = tuple(
        flatten_list(glom(reports, [("tables", [("errors", [_proc])])])))

    # incorrect type: (99, 2) int > float, and (101, 3) bool -> string
    assert counter["type-or-format-error"] == 2

    # FIXME: not clear why missing value detection fails
    if 0 == counter["missing-value"]:
        pytest.xfail("cannot detect missing-value, not clear why")

    # missing value: (11, 9)
    assert counter["missing-value"] == 1
    assert (11, 9, 99, 2, 101, 3) == err_row_cols
Ejemplo n.º 11
0
def test_ref():
    assert glom([[[]]], Ref('item', [Ref('item')])) == [[[]]]
    with pytest.raises(
            Exception
    ):  # check that it recurses downards and dies on int iteration
        glom([[[1]]], Ref('item', [Ref('item')]))
    assert repr(Ref('item',
                    (T[1], Ref('item')))) == "Ref('item', (T[1], Ref('item')))"

    etree2dicts = Ref(
        'ElementTree', {
            "tag": "tag",
            "text": "text",
            "attrib": "attrib",
            "children": (iter, [Ref('ElementTree')])
        })
    etree2tuples = Fill(
        Ref('ElementTree', (T.tag, Iter(Ref('ElementTree')).all())))
    etree = ElementTree.fromstring('''
    <html>
      <head>
        <title>the title</title>
      </head>
      <body id="the-body">
        <p>A paragraph</p>
      </body>
    </html>''')
    glom(etree, etree2dicts)
    glom(etree, etree2tuples)
Ejemplo n.º 12
0
def test_faulty_iterate():
    glommer = Glommer()

    def bad_iter(obj):
        raise RuntimeError('oops')

    glommer.register(str, iterate=bad_iter)

    with pytest.raises(TypeError):
        glommer.glom('abc', (Iter(), list))
Ejemplo n.º 13
0
def test_iter():
    assert list(glom(['1', '2', '3'], Iter(int))) == [1, 2, 3]
    cnt = count()
    cnt_1 = glom(cnt, Iter(lambda t: t + 1))
    assert (next(cnt_1), next(cnt_1)) == (1, 2)
    assert next(cnt) == 2

    assert list(glom(['1', '2', '3'], (Iter(int), enumerate))) == [(0, 1),
                                                                   (1, 2),
                                                                   (2, 3)]

    assert list(glom([1, SKIP, 2], Iter())) == [1, 2]
    assert list(glom([1, STOP, 2], Iter())) == [1]

    with pytest.raises(TypeError):
        Iter(nonexistent_kwarg=True)
Ejemplo n.º 14
0
def test_while():
    cnt = count()
    out = glom(cnt, Iter().takewhile(lambda x: x < 3))
    assert list(out) == [0, 1, 2]
    assert next(cnt) == 4
    assert repr(Iter().takewhile(T.a) == 'Iter().takewhile(T.a)')

    range_iter = iter(range(7))
    out = glom(range_iter, Iter().dropwhile(lambda x: x < 3 or x > 5))
    assert list(out) == [3, 4, 5, 6]  # 6 still here despite the x>5 above

    out = glom(range(10), Iter().dropwhile(lambda x: x >= 0).limit(10))
    assert list(out) == []

    out = glom(range(8), Iter().dropwhile((T.bit_length(), lambda x: x < 3)))
    assert list(out) == [4, 5, 6, 7]
    assert repr(Iter().dropwhile(T.a) == 'Iter().dropwhile(T.a)')
def extraires_deputes(archive, deputes, deputes_partis, deputes_groupes):
    with ZipFile(archive) as arc, deputes.open("w") as f_deputes, deputes_partis.open(
        "w"
    ) as f_partis, deputes_groupes.open("w") as f_groupes:
        w = csv.DictWriter(
            f_deputes,
            fieldnames=[f for f in spec_depute if f not in ["groupes", "partis"]],
        )
        wp = csv.DictWriter(f_partis, fieldnames=spec_membre)
        wg = csv.DictWriter(f_groupes, fieldnames=[*spec_membre, "relation"])

        w.writeheader()
        wp.writeheader()
        wg.writeheader()

        acteurs = (a for a in arc.namelist() if ACTEUR_RE.search(a))

        for d in glom(
            acteurs, Iter(partial(parser_deputes, archive=arc)).map(spec_depute)
        ):
            wp.writerows(d.pop("partis"))
            wg.writerows(d.pop("groupes"))
            w.writerow(d)
Ejemplo n.º 16
0
def generer_fichier_deputes(
    deputes_path,
    groupes_path,
    partis_path,
    deputes_groupes_path,
    deputes_partis_path,
    dest,
):
    deputes = pd.read_csv(deputes_path)
    groupes = pd.read_csv(groupes_path)
    deputes_groupes = pd.read_csv(deputes_groupes_path).join(
        groupes.set_index("code")[["nom", "sigle"]], on="code")
    deputes_groupes = (
        deputes_groupes[deputes_groupes.date_fin.isnull()].sort_values(
            ["code_depute", "relation"]).drop_duplicates(
                "code_depute",
                keep="last")  # garder "P" (président) plutôt que "M" (membre)
        .set_index("code_depute"))
    deputes_groupes[
        "groupe"] = deputes_groupes.nom + " (" + deputes_groupes.sigle + ")"

    partis = pd.read_csv(partis_path)
    deputes_partis = pd.read_csv(deputes_partis_path).join(
        partis.set_index("code")[["nom", "sigle"]], on="code")
    deputes_partis = deputes_partis[
        deputes_partis.date_fin.isnull()].set_index("code_depute")
    deputes_partis = deputes_partis.nom + " (" + deputes_partis.sigle + ")"
    deputes_partis.name = "parti"

    deputes = deputes.join(deputes_groupes[["groupe", "relation"]],
                           on=["code"]).join(deputes_partis, on=["code"])

    with lzma.open(dest, "wt") as f, id_from_file(
            "circonscriptions_legislatives.csv") as id_circos, id_from_file(
                "deputes.csv") as id_deputes:

        spec = {
            "id":
            Invoke(id_deputes).specs(code=T.code),
            "circonscription_id":
            Invoke(id_circos).specs(code=T.circonscription),
            **{
                c: getattr(T, c)
                for c in [
                    "code",
                    "nom",
                    "prenom",
                    "sexe",
                    "date_naissance",
                    "legislature",
                    "date_debut_mandat",
                ]
            },
            "groupe":
            Coalesce(T.groupe, skip=pd.isna, default=""),
            "parti":
            Coalesce(T.parti, skip=pd.isna, default=""),
            "date_fin_mandat":
            Coalesce(T.date_fin_mandat, skip=pd.isna, default=NULL),
            "relation":
            Coalesce(T.relation, skip=pd.isna, default=""),
            "profession":
            Val(NULL),
        }

        w = csv.DictWriter(f, fieldnames=spec)
        w.writeheader()
        w.writerows(glom(deputes.itertuples(), Iter(spec)))
Ejemplo n.º 17
0
def test_map():
    spec = Iter().map(lambda x: x * 2)
    out = glom(RANGE_5, spec)
    assert list(out) == [0, 2, 4, 6, 8]
    assert repr(Iter().map(T.a.b)).startswith('Iter().map(T.a.b)')