def required_tables(cls, api_obj): '''Given a list of X, do Y''' vars_needed = api_obj.vars_needed + api_obj.where_vars() if api_obj.order and api_obj.order in cls.possible_variables: vars_needed = vars_needed + [api_obj.order] universe = set(vars_needed) tables_to_use = [] table_cols = [] # Make a set of the variables that will be needed to answer the query while universe: # first find the tables with biggest overlap candidates = cls.list_partial_tables(universe, api_obj) # raise Exception(candidates) top_choices = sorted(candidates.items(), key=operator.itemgetter(1), reverse=True) # take the table with the biggest overlap tbl, overlap = top_choices.pop(0) # ensure the tables are joinable, for now that means # having atleast one column with the same name if tables_to_use: while not set(table_cols).intersection([str(c.key) for c in get_columns(tbl)]): if top_choices: tbl, overlap = top_choices.pop(0) else: raise DataUSAException("can't join tables!") tables_to_use.append(tbl) tmp_cols = [str(c.key) for c in get_columns(tbl)] table_cols += tmp_cols # remove the acquired columns from the universe universe = universe - set(tmp_cols) return tables_to_use
def query(table, api_obj): vars_and_vals = api_obj.vars_and_vals shows_and_levels = api_obj.shows_and_levels values = api_obj.values filters = process_value_filters(table, vars_and_vals) filters += where_filters(table, api_obj.where) filters += sumlevel_filtering(table, api_obj) if values: pk = [col for col in table.__table__.columns if col.primary_key] cols = pk + values else: cols = get_columns(table) needs_show_filter = any([v != consts.ALL for v in shows_and_levels.values()]) if needs_show_filter and hasattr(table, "gen_show_level_filters"): filters += table.gen_show_level_filters(shows_and_levels) qry = table.query.with_entities(*cols).filter(*filters) if api_obj.order: sort = "desc" if api_obj.sort == "desc" else "asc" qry = qry.order_by("{} {}".format(api_obj.order, sort)) if api_obj.limit: qry = qry.limit(api_obj.limit) data = qry.all() return simple_format(table, cols, data, api_obj.subs)
def query(table, api_obj, stream=False): vars_and_vals = api_obj.vars_and_vals values = api_obj.values exclude = api_obj.exclude filters = process_value_filters(table, vars_and_vals, api_obj) filters += where_filters(table, api_obj.where) filters += sumlevel_filtering(table, api_obj) if values: pk = [col for col in table.__table__.columns if col.primary_key and col.key not in values] cols = pk + [getattr(table, col_name) for col_name in values] else: cols = get_columns(table) if exclude: cols = [col for col in cols if (isinstance(col, basestring) and col not in exclude) or col.key not in exclude] # qry = table.query.with_entities(*cols) qry = table.query if hasattr(table, "crosswalk_join"): qry = table.crosswalk_join(qry) if stream or api_obj.display_names: qry, cols = use_attr_names(table, qry, cols) qry = qry.with_entities(*cols) if hasattr(table, "JOINED_FILTER"): qry, filters = handle_join(qry, filters, table, api_obj) qry = qry.filter(*filters) if api_obj.order: sort = desc if api_obj.sort == "desc" else asc if api_obj.order not in TableManager.possible_variables: if api_obj.order == 'abs(pct_change)': pass # allow this else: raise DataUSAException("Bad order parameter", api_obj.order) # sort_stmt = text("{} {} NULLS LAST".format(api_obj.order, sort)) if api_obj.order == 'abs(pct_change)': target_col = func.abs(table.pct_change) else: target_col = getattr(table, api_obj.order) qry = qry.order_by(sort(target_col).nullslast()) if api_obj.limit: qry = qry.limit(api_obj.limit) if stream: return stream_format(table, cols, qry, api_obj) return simple_format(table, cols, qry, api_obj)
def table_has_some_cols(cls, table, vars_needed): ''' Go through the list of required variables find tables that have atleast 2 variables (if more than one variable is needed). The reason atleast 2 are required is allow a join to occur (one for the value, one to potentially join). ''' table_cols = get_columns(table) cols = set([col.key for col in table_cols]) # min_overlap = 2 if len(vars_needed) > 1 else 1 intersection = set(vars_needed).intersection(cols) if intersection: return len(intersection) return None # TODO review this
def table_has_cols(cls, table, vars_needed): table_cols = get_columns(table) cols = set([col.key for col in table_cols]) return set(vars_needed).issubset(cols)
def table_has_cols(cls, table, vars_needed): table_cols = get_columns(table) cols = set([col.key for col in table_cols]) # if table.__tablename__ == 'ygd': # raise Exception(vars_needed, cols, set(vars_needed).issubset(cols)) return set(vars_needed).issubset(cols)