def crosswalk(table, api_obj): '''Given a table and an API object, determine if any crosswalks need to be performed''' pums_schema_name = BasePums.get_schema_name() pums5_schema_name = BasePums5.get_schema_name() registered_crosswalks = [ {"column": "industry_iocode", "schema": "bea", "mapping": industry_iocode_func}, {"column": "commodity_iocode", "schema": "bea", "mapping": iocode_map}, {"column": "naics", "schema": "bls", "mapping": pums_to_bls_naics_map}, {"column": "naics", "schema": "bls", "mapping": pums_to_growth_map, "table": GrowthI, "avoid": CesYi}, {"column": "soc", "schema": "bls", "mapping": pums_to_bls_soc_map}, {"column": "soc", "schema": "onet", "mapping": onet_parents}, {"column": "cip", "schema": "onet", "mapping": onet_cip_parents}, # cbp uses same naics coding as bls {"column": "naics", "schema": "cbp", "mapping": pums_to_bls_naics_map}, {"column": "naics", "schema": pums_schema_name, "mapping": naics_map}, {"column": "cip", "schema": pums_schema_name, "mapping": truncate_cip}, {"column": "geo", "schema": pums_schema_name, "mapping": pums_parent_puma}, {"column": "naics", "schema": pums5_schema_name, "mapping": naics_map}, {"column": "cip", "schema": pums5_schema_name, "mapping": truncate_cip}, {"column": "geo", "schema": pums5_schema_name, "mapping": pums_parent_puma}, {"column": "geo", "schema": "chr", "mapping": chr_parents} ] exclusives = {r["table"]: True for r in registered_crosswalks if "table" in r} for rcrosswalk in registered_crosswalks: column = rcrosswalk['column'] schema = rcrosswalk['schema'] mapping = rcrosswalk['mapping'] target_table = rcrosswalk['table'] if 'table' in rcrosswalk else None avoid = rcrosswalk['avoid'] if 'avoid' in rcrosswalk else None if avoid: if table.full_name() == avoid.full_name(): continue if column in api_obj.vars_and_vals.keys() and table.__table_args__['schema'] == schema: if table in exclusives and (not target_table or target_table.__tablename__ != table.__tablename__): continue curr_vals_str = api_obj.vars_and_vals[column] curr_vals = splitter(curr_vals_str) if isinstance(mapping, dict): new_vals = [mapping[val] if val in mapping else val for val in curr_vals] else: new_vals = [mapping(val, api_obj=api_obj) for val in curr_vals] new_val_str = OR.join(new_vals) api_obj.vars_and_vals[column] = new_val_str # detect if any changes actually happend if curr_vals_str != new_val_str: api_obj.subs[column] = new_val_str return api_obj
''' Script used to generate the query that makes up the search table ''' from datausa.pums.abstract_models import BasePums pums_schema_name = BasePums.get_schema_name() # Industry and Occupation Z-scoring attrs = [("soc", "{}.yo".format(pums_schema_name), "avg_wage", [0, 1, 2, 3]), ("naics", "{}.yi".format(pums_schema_name), "num_ppl", [0, 1, 2])] qry = '''SELECT g.{0} as id, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.name as display, a.level::text as sumlevel, -1 as is_stem, a.url_name as url_name FROM {1} g LEFT JOIN pums_attrs.pums_{0} a ON (a.id = g.{0} and a.level = g.{0}_level) CROSS JOIN (select STDDEV({2}) as st, AVG({2}) as average FROM {1} WHERE {0}_level={3} AND year=2014) stats WHERE g.{0}_level = {3} AND g.year = 2014''' queries = [] for attr, table, metric, levels in attrs: for level in levels: queries.append(qry.format(attr, table, metric, level)) #print queries[0] # CIP codes cip_qry = '''SELECT g.{0}, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.name as display, a.level::text as sumlevel, a.is_stem as is_stem, a.url_name as url_name FROM {1} g LEFT JOIN attrs.course a ON (a.id = g.{0}) CROSS JOIN (select STDDEV({2}) as st, AVG({2}) as average FROM {1} WHERE char_length({0}) = {3} AND year=2014) stats
''' Script used to generate the query that makes up the search table ''' from datausa.pums.abstract_models import BasePums pums_schema_name = BasePums.get_schema_name() # Industry and Occupation Z-scoring attrs = [("soc", "{}.yo".format(pums_schema_name), "avg_wage", [0, 1, 2, 3]), ("naics", "{}.yi".format(pums_schema_name), "num_ppl", [0, 1, 2])] qry = '''SELECT g.{0} as id, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.name as display, a.level::text as sumlevel, -1 as is_stem, a.url_name as url_name, a.keywords as keywords FROM {1} g LEFT JOIN pums_attrs.pums_{0} a ON (a.id = g.{0} and a.level = g.{0}_level) CROSS JOIN (select STDDEV({2}) as st, AVG({2}) as average FROM {1} WHERE {0}_level={3} AND year=2015) stats WHERE g.{0}_level = {3} AND g.year = 2015''' queries = [] for attr, table, metric, levels in attrs: for level in levels: queries.append(qry.format(attr, table, metric, level)) #print queries[0] # CIP codes cip_qry = '''SELECT g.{0}, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.name as display, a.level::text as sumlevel, a.is_stem as is_stem, a.url_name as url_name, a.keywords as keywords FROM {1} g LEFT JOIN attrs.course a ON (a.id = g.{0})