def test_list_defaulter(): choices = [ "foo", "bar" ] x = namestand.combine([ namestand.defaulter(choices, "other") ]) orig = [ "gah", "bar", "foo" ] assert(x(orig) == [ "other", "bar", "foo" ])
def test_translator(): converter = namestand.combine([ namestand.downscore, namestand.translator("estimate_", "est_"), namestand.translator("percent_", "pct_"), namestand.translator("margin_of_error_", "moe_"), namestand.translator("employment_status", "status"), namestand.translator("population", "pop"), namestand.translator("_years_and_over", "y"), namestand.translator("_civilian", "_civ"), namestand.translator("_labor_force", "_lf"), ]) c = converter(cols) assert(c[2] == "geography") assert(c[3] == "est_status_pop_16y") assert(c[6] == "pct_moe_status_pop_16y") assert(c[-1] == "pct_moe_status_in_lf_civ_lf_employed")
import namestand import us import glob import sys, os import itertools import re flatten = lambda x: list(itertools.chain.from_iterable(x)) t = namestand.translator standardizer = namestand.combine([ namestand.downscore, t(re.compile(r"address(\d)"), r"address_\1"), t(re.compile(r"(number|nbr)"), r"no"), t(re.compile(r"(visa_type|visa_class|case_type)"), "visa_type"), t("recent_decision_date", "last_event_date"), t("decision_date", "last_event_date"), t("last_sig_event", "case_status"), t("case_num", "case_no"), t("emp_", "employer_"), t("num_aliens", "no_workers_requested"), t("no_workers_requsted", "no_workers_requested"), t("npc_submitted_date", "case_received_date"), ]) year_pat = re.compile(r"FY(\d+)") def get_fy_from_path(path): last = path.split("/")[-1] found = int(re.search(year_pat, last).group(1)) fy = found if found > 99 else 2000 + found return fy visa_type_pat = re.compile(r"H-?2(A|B)")
def test_fn_defaulter(): x = namestand.combine([ namestand.falsey_replacer("NOPE") ]) orig = [ None, False, "hi", "there" ] assert(x(orig) == [ "NOPE", "NOPE", "hi", "there" ])
import six flatten = lambda x: list(itertools.chain.from_iterable(x)) t = namestand.translator standardizer = namestand.combine([ namestand.downscore, t(re.compile(r"address(\d)"), r"address_\1"), t(re.compile(r"(number|nbr)"), r"no"), t(re.compile(r"(visa_type|visa_class|case_type)"), "visa_type"), t("recent_decision_date", "last_event_date"), t("decision_date", "last_event_date"), t("last_sig_event", "case_status"), t("case_num", "case_no"), t("emp_", "employer_"), t("num_aliens", "n_requested"), t("no_workers_requsted", "n_requested"), t("no_workers_requested", "n_requested"), t("no_workers_requested", "n_requested"), t("no_workers_certified", "n_certified"), t("npc_submitted_date", "case_received_date"), t("att_agent", "agent_attorney"), t("att_", "agent_attorney_"), t("occ_title", "job_title"), t("alien_work", "worksite"), t("worksite_location", "worksite"), t(re.compile("^(agent_attorney_firm|lawfirm_name)$"), "agent_attorney_firm_name"), ]) year_pat = re.compile(r"FY(\d+)")