def test_all(): int_iter = iter(range(10)) out = glom(int_iter, Iter().all()) assert out == list(range(10)) assert next(int_iter, None) is None assert repr(Iter().all()) == repr((Iter(), list))
def test_filter(): is_odd = lambda x: x % 2 odd_spec = Iter().filter(is_odd) out = glom(RANGE_5, odd_spec) assert list(out) == [1, 3] # let's just make sure we're actually streaming just in case counter = count() out = glom(counter, odd_spec) assert next(out) == 1 assert next(out) == 3 assert next(counter) == 4 assert next(counter) == 5 assert next(out) == 7 bools = [True, False, False, True, False] spec = Iter().filter().all() out = glom(bools, spec) assert out == [True, True] imags = [0j, 1j, 2, 2j, 3j] spec = Iter().filter( Check(T.imag.real, type=float, one_of=(0, 2), default=SKIP)).all() out = glom(imags, spec) assert out == [0j, 2j] assert repr(Iter().filter(T.a.b)).startswith('Iter().filter(T.a.b)')
def test_iter_composition(): int_list = list(range(10)) out = glom(int_list, (Iter(), Iter(), list)) assert out == int_list out = glom([int_list] * 3, Iter(Iter(lambda x: x % 4)).flatten().unique()) assert list(out) == [0, 1, 2, 3]
def test_chunked(): int_list = list(range(9)) spec = Iter().chunked(3) out = glom(int_list, spec) assert list(out) == [[0, 1, 2], [3, 4, 5], [6, 7, 8]] spec = Iter().chunked(3).map(sum) out = glom(int_list, spec) assert list(out) == [3, 12, 21]
def test_unique(): int_list = list(range(10)) spec = Iter().unique() out = glom(int_list, spec) assert list(out) == int_list spec = Iter(lambda x: x % 4).unique() out = glom(int_list, spec) assert list(out) == int_list[:4] assert repr(Iter().unique(T.a)) == 'Iter().unique(T.a)'
def test_split_flatten(): falsey_stream = [1, None, None, 2, 3, None, 4] spec = Iter().split() out = glom(falsey_stream, spec) assert list(out) == [[1], [2, 3], [4]] spec = Iter().split().flatten() out = glom(falsey_stream, spec) assert list(out) == [1, 2, 3, 4] assert repr(Iter().split( sep=None, maxsplit=2)) == 'Iter().split(sep=None, maxsplit=2)' assert repr(Iter(T.a.b[1]).flatten()) == 'Iter(T.a.b[1]).flatten()'
def test_slice(): cnt = count() spec = Iter().slice(3) out = glom(cnt, spec) assert list(out) == [0, 1, 2] assert next(cnt) == 3 out = glom(range(10), Iter().slice(1, 5)) assert list(out) == [1, 2, 3, 4] out = glom(range(10), Iter().slice(1, 6, 2)) assert list(out) == [1, 3, 5] assert repr(Iter().slice(1, 6, 2)) == 'Iter().slice(1, 6, 2)' out = glom(range(10), Iter().limit(3)) assert list(out) == [0, 1, 2] assert repr(Iter().limit(3)) == 'Iter().limit(3)' out = glom(range(5), Iter().limit(10)) assert list(out) == [0, 1, 2, 3, 4] # test broken args with pytest.raises(TypeError): Iter().slice(1, 2, 3, 4)
def test_windowed(): int_list = list(range(5)) spec = Iter().windowed(3) out = glom(int_list, spec) assert list(out) == [(0, 1, 2), (1, 2, 3), (2, 3, 4)] assert repr(spec) == 'Iter().windowed(3)' spec = spec.filter(lambda x: bool(x[0] % 2)).map(sum) out = glom(int_list, spec) assert next(out) == 6 out = glom(range(10), spec) assert list(out) == [6, 12, 18, 24]
def test_first(): spec = Iter().first(T.imag) target = iter([1, 2, 3j, 4]) out = glom(target, spec) assert out == 3j assert next(target) == 4 assert repr(spec) == '(Iter(), First(T.imag))' spec = Iter().first(T.imag, default=0) target = iter([1, 2, 4]) out = glom(target, spec) assert out == 0 assert repr(spec) == '(Iter(), First(T.imag, default=0))'
def test_check_pkg(pkg): reports = check_pkg(pkg.descriptor) # only errors in the bad files assert all( map( lambda fp: "bad" in fp, glom(glom(reports, [("tables", ["source"])]), Iter().flatten()), )) counter = defaultdict(int) def _proc(err): counter[err["code"]] += 1 return ( err["row-number"], err["column-number"], ) err_row_cols = tuple( flatten_list(glom(reports, [("tables", [("errors", [_proc])])]))) # incorrect type: (99, 2) int > float, and (101, 3) bool -> string assert counter["type-or-format-error"] == 2 # FIXME: not clear why missing value detection fails if 0 == counter["missing-value"]: pytest.xfail("cannot detect missing-value, not clear why") # missing value: (11, 9) assert counter["missing-value"] == 1 assert (11, 9, 99, 2, 101, 3) == err_row_cols
def test_ref(): assert glom([[[]]], Ref('item', [Ref('item')])) == [[[]]] with pytest.raises( Exception ): # check that it recurses downards and dies on int iteration glom([[[1]]], Ref('item', [Ref('item')])) assert repr(Ref('item', (T[1], Ref('item')))) == "Ref('item', (T[1], Ref('item')))" etree2dicts = Ref( 'ElementTree', { "tag": "tag", "text": "text", "attrib": "attrib", "children": (iter, [Ref('ElementTree')]) }) etree2tuples = Fill( Ref('ElementTree', (T.tag, Iter(Ref('ElementTree')).all()))) etree = ElementTree.fromstring(''' <html> <head> <title>the title</title> </head> <body id="the-body"> <p>A paragraph</p> </body> </html>''') glom(etree, etree2dicts) glom(etree, etree2tuples)
def test_faulty_iterate(): glommer = Glommer() def bad_iter(obj): raise RuntimeError('oops') glommer.register(str, iterate=bad_iter) with pytest.raises(TypeError): glommer.glom('abc', (Iter(), list))
def test_iter(): assert list(glom(['1', '2', '3'], Iter(int))) == [1, 2, 3] cnt = count() cnt_1 = glom(cnt, Iter(lambda t: t + 1)) assert (next(cnt_1), next(cnt_1)) == (1, 2) assert next(cnt) == 2 assert list(glom(['1', '2', '3'], (Iter(int), enumerate))) == [(0, 1), (1, 2), (2, 3)] assert list(glom([1, SKIP, 2], Iter())) == [1, 2] assert list(glom([1, STOP, 2], Iter())) == [1] with pytest.raises(TypeError): Iter(nonexistent_kwarg=True)
def test_while(): cnt = count() out = glom(cnt, Iter().takewhile(lambda x: x < 3)) assert list(out) == [0, 1, 2] assert next(cnt) == 4 assert repr(Iter().takewhile(T.a) == 'Iter().takewhile(T.a)') range_iter = iter(range(7)) out = glom(range_iter, Iter().dropwhile(lambda x: x < 3 or x > 5)) assert list(out) == [3, 4, 5, 6] # 6 still here despite the x>5 above out = glom(range(10), Iter().dropwhile(lambda x: x >= 0).limit(10)) assert list(out) == [] out = glom(range(8), Iter().dropwhile((T.bit_length(), lambda x: x < 3))) assert list(out) == [4, 5, 6, 7] assert repr(Iter().dropwhile(T.a) == 'Iter().dropwhile(T.a)')
def extraires_deputes(archive, deputes, deputes_partis, deputes_groupes): with ZipFile(archive) as arc, deputes.open("w") as f_deputes, deputes_partis.open( "w" ) as f_partis, deputes_groupes.open("w") as f_groupes: w = csv.DictWriter( f_deputes, fieldnames=[f for f in spec_depute if f not in ["groupes", "partis"]], ) wp = csv.DictWriter(f_partis, fieldnames=spec_membre) wg = csv.DictWriter(f_groupes, fieldnames=[*spec_membre, "relation"]) w.writeheader() wp.writeheader() wg.writeheader() acteurs = (a for a in arc.namelist() if ACTEUR_RE.search(a)) for d in glom( acteurs, Iter(partial(parser_deputes, archive=arc)).map(spec_depute) ): wp.writerows(d.pop("partis")) wg.writerows(d.pop("groupes")) w.writerow(d)
def generer_fichier_deputes( deputes_path, groupes_path, partis_path, deputes_groupes_path, deputes_partis_path, dest, ): deputes = pd.read_csv(deputes_path) groupes = pd.read_csv(groupes_path) deputes_groupes = pd.read_csv(deputes_groupes_path).join( groupes.set_index("code")[["nom", "sigle"]], on="code") deputes_groupes = ( deputes_groupes[deputes_groupes.date_fin.isnull()].sort_values( ["code_depute", "relation"]).drop_duplicates( "code_depute", keep="last") # garder "P" (président) plutôt que "M" (membre) .set_index("code_depute")) deputes_groupes[ "groupe"] = deputes_groupes.nom + " (" + deputes_groupes.sigle + ")" partis = pd.read_csv(partis_path) deputes_partis = pd.read_csv(deputes_partis_path).join( partis.set_index("code")[["nom", "sigle"]], on="code") deputes_partis = deputes_partis[ deputes_partis.date_fin.isnull()].set_index("code_depute") deputes_partis = deputes_partis.nom + " (" + deputes_partis.sigle + ")" deputes_partis.name = "parti" deputes = deputes.join(deputes_groupes[["groupe", "relation"]], on=["code"]).join(deputes_partis, on=["code"]) with lzma.open(dest, "wt") as f, id_from_file( "circonscriptions_legislatives.csv") as id_circos, id_from_file( "deputes.csv") as id_deputes: spec = { "id": Invoke(id_deputes).specs(code=T.code), "circonscription_id": Invoke(id_circos).specs(code=T.circonscription), **{ c: getattr(T, c) for c in [ "code", "nom", "prenom", "sexe", "date_naissance", "legislature", "date_debut_mandat", ] }, "groupe": Coalesce(T.groupe, skip=pd.isna, default=""), "parti": Coalesce(T.parti, skip=pd.isna, default=""), "date_fin_mandat": Coalesce(T.date_fin_mandat, skip=pd.isna, default=NULL), "relation": Coalesce(T.relation, skip=pd.isna, default=""), "profession": Val(NULL), } w = csv.DictWriter(f, fieldnames=spec) w.writeheader() w.writerows(glom(deputes.itertuples(), Iter(spec)))
def test_map(): spec = Iter().map(lambda x: x * 2) out = glom(RANGE_5, spec) assert list(out) == [0, 2, 4, 6, 8] assert repr(Iter().map(T.a.b)).startswith('Iter().map(T.a.b)')