예제 #1
0
def gen_combos(tables, colname, val):
    '''Generate the required logical condition combinations to optionally
    join two tables'''
    combos = []
    relevant_tables = tables_by_col(tables, colname)

    possible_combos = list(itertools.combinations(relevant_tables, 2))
    if len(possible_combos) > 0:
        for table1, table2 in possible_combos:
            val1 = splitter(val)
            val2 = splitter(val)
            if colname == consts.YEAR and val in [
                    consts.LATEST, consts.OLDEST
            ]:
                years1 = TableManager.table_years[table1.full_name()]
                years2 = TableManager.table_years[table2.full_name()]
                val1 = [years1[val]]
                val2 = [years2[val]]
            cond1 = and_(
                getattr(table1, colname).in_(val1),
                getattr(table2, colname).in_(val2))
            cond2 = and_(
                getattr(table1, colname).in_(val1),
                getattr(table2, colname) == None)
            cond3 = and_(
                getattr(table1, colname) == None,
                getattr(table2, colname).in_(val2))
            combos.append(or_(cond1, cond2, cond3))
    elif not len(possible_combos) and len(relevant_tables) == 1:
        # if we're just referencing a single table
        safe_colname = colname.rsplit(".", 1)[-1]
        combos.append(getattr(relevant_tables[0], safe_colname) == val)
    return combos
예제 #2
0
def where_filters(table, where_str):
    if not where_str:
        return []
    filts = []

    wheres = splitter(where_str)
    for where in wheres:
        colname, cond = where.split(":")
        cols = None
        if "/" in colname:
            cols = [getattr(table, c) for c in colname.split("/")]
        else:
            col = getattr(table, colname)
        method, value, negate = parse_method_and_val(cond)
        if method == "ne":
            expr = col != value
        elif method == "gt":
            expr = col > value
        elif method == "lt":
            expr = col < value
        elif method == "rt":
            expr = and_(cols[1] != 0, cols[0] / cols[1] < value)
        elif method == "rg":
            expr = and_(cols[1] != 0, cols[0] / cols[1] > value)
        else:
            expr = getattr(col, method)(value)
        if negate:
            expr = ~expr
        filts.append(expr)
    return filts
예제 #3
0
파일: api.py 프로젝트: DataUSA/datausa-api
def where_filters(table, where_str):
    if not where_str:
        return []
    filts = []

    wheres = splitter(where_str)
    for where in wheres:
        colname, cond = where.split(":")
        cols = None
        if "/" in colname:
            cols = [getattr(table, c) for c in colname.split("/")]
        else:
            col = getattr(table, colname)
        method, value, negate = parse_method_and_val(cond)
        if method == "ne":
            expr = col != value
        elif method == "gt":
            expr = col > value
        elif method == "lt":
            expr = col < value
        elif method == "rt":
            expr = and_(cols[1] != 0, cols[0] / cols[1] < value)
        elif method == "rg":
            expr = and_(cols[1] != 0, cols[0] / cols[1] > value)
        else:
            expr = getattr(col, method)(value)
        if negate:
            expr = ~expr
        filts.append(expr)
    return filts
예제 #4
0
def multitable_value_filters(tables, api_obj):
    '''This method examines the values pased in query args (e.g. year=2014 or
    geo=04000US25), and applies the logic depending on the crosswalk mode.
    If the auto-crosswalk is not enabled, special logic (gen_combos) is required
    to preserve null values so the user will see that no value is available.
    Otherwise, if auto-crosswalk is enabled, treat each filter as an AND conjunction.
    Return the list of filters to be applied.
    '''
    filts = []

    for colname, val in api_obj.vars_and_vals.items():
        related_tables = tables_by_col(tables, colname)
        if not api_obj.auto_crosswalk:
            filts += gen_combos(related_tables, colname, val)
        else:
            for table in related_tables:
                if colname == consts.YEAR and val in [
                        consts.LATEST, consts.OLDEST
                ]:
                    years = TableManager.table_years[table_name(table)]
                    my_year = years[val]
                    filt = or_(table.year == my_year, table.year == None)
                    api_obj.set_year(my_year)
                else:
                    api_obj_tmp = crosswalk(
                        table,
                        ApiObject(vars_and_vals={colname: val},
                                  limit=None,
                                  exclude=None))
                    new_vals = splitter(api_obj_tmp.vars_and_vals[colname])
                    mycol = getattr(table, colname)
                    filt = mycol.in_(new_vals)
                filts.append(filt)
    return filts
예제 #5
0
def where_filters(tables, api_obj):
    '''Process the where query argument from an API call'''
    if not api_obj.where:
        return []
    filts = []

    wheres = splitter(api_obj.where)
    for where in wheres:
        colname, cond = where.split(":")
        target_var, filt_col = colname.rsplit(".", 1)

        if filt_col == 'sumlevel':
            filt_col = api_obj.shows_and_levels.keys()[0]
            cols = get_column_from_tables(tables, target_var, False)
            table = tables_by_col(tables, target_var, return_first=True)
            args = (table, "{}_filter".format(filt_col))
            if hasattr(*args):
                func = getattr(*args)
                filts.append(func(cond))
        else:
            cols = get_column_from_tables(tables, target_var, False)
            for col in cols:
                table = col.class_
                filt_col = getattr(table, filt_col)
                filt = make_filter(filt_col, cond)
                filts.append(filt)
    return filts
예제 #6
0
def copy_where_literals(api_obj):
    if hasattr(api_obj, "where") and api_obj.where:
        wheres = splitter(api_obj.where)
        for where in wheres:
            colname, cond = where.split(":")
            if colname not in api_obj.vars_and_vals:
                api_obj.vars_and_vals[colname] = cond
    return api_obj
예제 #7
0
파일: api.py 프로젝트: DataUSA/datausa-api
def copy_where_literals(api_obj):
    if hasattr(api_obj, "where") and api_obj.where:
        wheres = splitter(api_obj.where)
        for where in wheres:
            colname, cond = where.split(":")
            if colname not in api_obj.vars_and_vals:
                api_obj.vars_and_vals[colname] = cond
    return api_obj
예제 #8
0
def crosswalk(table, api_obj):
    '''Given a table and an API object, determine if any crosswalks need
    to be performed'''
    pums_schema_name = BasePums.get_schema_name()
    pums5_schema_name = BasePums5.get_schema_name()

    registered_crosswalks = [
        {"column": "industry_iocode", "schema": "bea", "mapping": industry_iocode_func},
        {"column": "commodity_iocode", "schema": "bea", "mapping": iocode_map},
        {"column": "naics", "schema": "bls", "mapping": pums_to_bls_naics_map},
        {"column": "naics", "schema": "bls", "mapping": pums_to_growth_map, "table": GrowthI, "avoid": CesYi},
        {"column": "soc", "schema": "bls", "mapping": pums_to_bls_soc_map},
        {"column": "soc", "schema": "onet", "mapping": onet_parents},
        {"column": "cip", "schema": "onet", "mapping": onet_cip_parents},

        # cbp uses same naics coding as bls
        {"column": "naics", "schema": "cbp", "mapping": pums_to_bls_naics_map},
        {"column": "naics", "schema": pums_schema_name, "mapping": naics_map},
        {"column": "cip", "schema": pums_schema_name, "mapping": truncate_cip},
        {"column": "geo", "schema": pums_schema_name, "mapping": pums_parent_puma},
        {"column": "naics", "schema": pums5_schema_name, "mapping": naics_map},
        {"column": "cip", "schema": pums5_schema_name, "mapping": truncate_cip},
        {"column": "geo", "schema": pums5_schema_name, "mapping": pums_parent_puma},
        {"column": "geo", "schema": "chr", "mapping": chr_parents}

    ]
    exclusives = {r["table"]: True for r in registered_crosswalks if "table" in r}

    for rcrosswalk in registered_crosswalks:
        column = rcrosswalk['column']
        schema = rcrosswalk['schema']
        mapping = rcrosswalk['mapping']
        target_table = rcrosswalk['table'] if 'table' in rcrosswalk else None
        avoid = rcrosswalk['avoid'] if 'avoid' in rcrosswalk else None

        if avoid:
            if table.full_name() == avoid.full_name():
                continue

        if column in api_obj.vars_and_vals.keys() and table.__table_args__['schema'] == schema:
            if table in exclusives and (not target_table or target_table.__tablename__ != table.__tablename__):
                continue

            curr_vals_str = api_obj.vars_and_vals[column]
            curr_vals = splitter(curr_vals_str)
            if isinstance(mapping, dict):
                new_vals = [mapping[val] if val in mapping else val for val in curr_vals]
            else:
                new_vals = [mapping(val, api_obj=api_obj) for val in curr_vals]
            new_val_str = OR.join(new_vals)
            api_obj.vars_and_vals[column] = new_val_str

            # detect if any changes actually happend
            if curr_vals_str != new_val_str:
                api_obj.subs[column] = new_val_str
    return api_obj
예제 #9
0
def crosswalk(table, api_obj):
    '''Given a table and an API object, determine if any crosswalks need
    to be performed'''
    pums_schema_name = BasePums.get_schema_name()
    pums5_schema_name = BasePums5.get_schema_name()

    registered_crosswalks = [
        {"column": "industry_iocode", "schema": "bea", "mapping": industry_iocode_func},
        {"column": "commodity_iocode", "schema": "bea", "mapping": iocode_map},
        {"column": "naics", "schema": "bls", "mapping": pums_to_bls_naics_map},
        {"column": "naics", "schema": "bls", "mapping": pums_to_growth_map, "table": GrowthI, "avoid": CesYi},
        {"column": "soc", "schema": "bls", "mapping": pums_to_bls_soc_map},
        {"column": "soc", "schema": "onet", "mapping": onet_parents},
        {"column": "cip", "schema": "onet", "mapping": onet_cip_parents},

        # cbp uses same naics coding as bls
        {"column": "naics", "schema": "cbp", "mapping": pums_to_bls_naics_map},
        {"column": "naics", "schema": pums_schema_name, "mapping": naics_map},
        {"column": "cip", "schema": pums_schema_name, "mapping": truncate_cip},
        {"column": "geo", "schema": pums_schema_name, "mapping": pums_parent_puma},
        {"column": "naics", "schema": pums5_schema_name, "mapping": naics_map},
        {"column": "cip", "schema": pums5_schema_name, "mapping": truncate_cip},
        {"column": "geo", "schema": pums5_schema_name, "mapping": pums_parent_puma},
        {"column": "geo", "schema": "chr", "mapping": chr_parents}

    ]
    exclusives = {r["table"]: True for r in registered_crosswalks if "table" in r}

    for rcrosswalk in registered_crosswalks:
        column = rcrosswalk['column']
        schema = rcrosswalk['schema']
        mapping = rcrosswalk['mapping']
        target_table = rcrosswalk['table'] if 'table' in rcrosswalk else None
        avoid = rcrosswalk['avoid'] if 'avoid' in rcrosswalk else None

        if avoid:
            if table.full_name() == avoid.full_name():
                continue

        if column in api_obj.vars_and_vals.keys() and table.__table_args__['schema'] == schema:
            if table in exclusives and (not target_table or target_table.__tablename__ != table.__tablename__):
                continue

            curr_vals_str = api_obj.vars_and_vals[column]
            curr_vals = splitter(curr_vals_str)
            if isinstance(mapping, dict):
                new_vals = [mapping[val] if val in mapping else val for val in curr_vals]
            else:
                new_vals = [mapping(val, api_obj=api_obj) for val in curr_vals]
            new_val_str = OR.join(new_vals)
            api_obj.vars_and_vals[column] = new_val_str

            # detect if any changes actually happend
            if curr_vals_str != new_val_str:
                api_obj.subs[column] = new_val_str
    return api_obj
예제 #10
0
파일: api.py 프로젝트: ANTco/datausa-api
def process_value_filters(table, vars_and_vals, api_obj):
    filts = []
    for var, val in vars_and_vals.items():
        if var == consts.YEAR and val in [consts.LATEST, consts.OLDEST]:
            years = TableManager.table_years[table_name(table)]
            my_year = years[val]
            filt = table.year == my_year
            api_obj.set_year(my_year)
        elif consts.OR in val:
            filt = getattr(table, var).in_(splitter(val))
        else:
            filt = getattr(table, var) == val
        filts.append(filt)
    return filts
예제 #11
0
def process_value_filters(table, vars_and_vals, api_obj):
    filts = []
    for var, val in vars_and_vals.items():
        if var == consts.YEAR and val in [consts.LATEST, consts.OLDEST]:
            years = TableManager.table_years[table_name(table)]
            my_year = years[val]
            filt = table.year == my_year
            api_obj.set_year(my_year)
        elif consts.OR in val:
            filt = getattr(table, var).in_(splitter(val))
        else:
            filt = getattr(table, var) == val
        filts.append(filt)
    return filts