def test_extract_csv_json_equal(input_type): delim = "\t" if input_type == "tsv" else "," keys = ST_DATA["header"] csv_rows = [delim.join(keys)] csv_rows.extend( delim.join(str(row[k]) for k in keys) for row in ST_DATA["rows"]) kwds = dict(filename_format="{age_group}//{now_dead}//{name}.csv", url_format="{name}_{debut_season}.com", meta=["group={age_group}"]) json_output = au.extract(*au._read(json_stream(ST_DATA["rows"]), "json"), **kwds) csv_output = au.extract(*au._read(csv_rows, input_type), **kwds) eq_(json_output, csv_output)
def test_extract(): info, subpaths = au.extract( ST_DATA["rows"], url_format="{name}_{debut_season}.com", filename_format="{age_group}//{now_dead}//{name}.csv") eq_(subpaths, [ "adult", "kid", op.join("adult", "no"), op.join("adult", "yes"), op.join("kid", "no") ]) eq_([d["url"] for d in info], ["will_1.com", "bob_2.com", "scott_1.com", "max_2.com"]) eq_([d["filename"] for d in info], [ op.join("kid", "no", "will.csv"), op.join("adult", "yes", "bob.csv"), op.join("adult", "no", "scott.csv"), op.join("kid", "no", "max.csv") ]) expects = [{ "name": "will", "age_group": "kid", "debut_season": "1", "now_dead": "no" }, { "name": "bob", "age_group": "adult", "debut_season": "2", "now_dead": "yes" }, { "name": "scott", "age_group": "adult", "debut_season": "1", "now_dead": "no" }, { "name": "max", "age_group": "kid", "debut_season": "2", "now_dead": "no" }] for d, expect in zip(info, expects): assert_dict_equal(d["meta_args"], expect) eq_([d["subpath"] for d in info], [ op.join("kid", "no"), op.join("adult", "yes"), op.join("adult", "no"), op.join("kid", "no") ])
def test_extract_disable_autometa(): info, _ = au.extract(ST_DATA["rows"], url_format="{name}_{debut_season}.com", filename_format="{age_group}//{now_dead}//{name}.csv", exclude_autometa="*", meta=["group={age_group}"]) eq_([d["meta_args"] for d in info], [{ "group": "kid" }, { "group": "adult" }, { "group": "adult" }, { "group": "kid" }])
def test_extract_exclude_autometa_regexp(): info, _ = au.extract(ST_DATA["rows"], url_format="{name}_{debut_season}.com", filename_format="{age_group}//{now_dead}//{name}.csv", exclude_autometa="ea") expects = [{ "name": "will", "age_group": "kid" }, { "name": "bob", "age_group": "adult" }, { "name": "scott", "age_group": "adult" }, { "name": "max", "age_group": "kid" }] for d, expect in zip(info, expects): assert_dict_equal(d["meta_args"], expect)