Beispiel #1
0
 def __init__(self, **kwargs):
     allowed = [
         "vars_needed", "vars_and_vals", "values", "shows_and_levels",
         "force", "where", "order", "sort", "limit", "exclude",
         "auto_crosswalk", "display_names", "offset"
     ]
     self._year = None
     self.auto_crosswalk = False
     self.display_names = False
     self.offset = None
     self.vars_and_vals = {}
     for keyword, value in kwargs.items():
         if keyword in allowed:
             setattr(self, keyword, value)
         else:
             raise DataUSAException("Invalid ApiObject attribute")
     if self.limit:
         self.limit = int(self.limit)
     if self.offset:
         self.offset = int(self.offset)
     self.subs = {}
     self.table_list = []
     self.warnings = []
     if self.exclude:
         self.exclude = self.exclude.split(",")
     if hasattr(self, "year") and self.year != ALL:
         self._year = self.year
     self.force_schema = None
     self.auto_crosswalk = self.auto_crosswalk in [True, 'true', '1']
     self.display_names = self.display_names in ['true', '1']
Beispiel #2
0
 def required_tables(cls, api_obj):
     '''Given a list of X, do Y'''
     vars_needed = api_obj.vars_needed + api_obj.where_vars()
     if api_obj.order and api_obj.order in cls.possible_variables:
         vars_needed = vars_needed + [api_obj.order]
     universe = set(vars_needed)
     tables_to_use = []
     table_cols = []
     # Make a set of the variables that will be needed to answer the query
     while universe:
         # first find the tables with biggest overlap
         candidates = cls.list_partial_tables(universe, api_obj)
         # raise Exception(candidates)
         top_choices = sorted(candidates.items(), key=operator.itemgetter(1),
                              reverse=True)
         # take the table with the biggest overlap
         tbl, overlap = top_choices.pop(0)
         # ensure the tables are joinable, for now that means
         # having atleast one column with the same name
         if tables_to_use:
             while not set(table_cols).intersection([str(c.key) for c in get_columns(tbl)]):
                 if top_choices:
                     tbl, overlap = top_choices.pop(0)
                 else:
                     raise DataUSAException("can't join tables!")
         tables_to_use.append(tbl)
         tmp_cols = [str(c.key) for c in get_columns(tbl)]
         table_cols += tmp_cols
         # remove the acquired columns from the universe
         universe = universe - set(tmp_cols)
     return tables_to_use
Beispiel #3
0
def handle_ordering(tables, api_obj):
    '''Process sort and order parameters from the API'''
    sort = "desc" if api_obj.sort == "desc" else "asc"
    if api_obj.order not in TableManager.possible_variables:
        raise DataUSAException("Bad order parameter", api_obj.order)
    my_col = get_column_from_tables(tables, api_obj.order)
    sort_expr = getattr(my_col, sort)()
    return sort_expr.nullslast()
Beispiel #4
0
def query(table, api_obj, stream=False):
    vars_and_vals = api_obj.vars_and_vals
    shows_and_levels = api_obj.shows_and_levels
    values = api_obj.values
    exclude = api_obj.exclude

    filters = process_value_filters(table, vars_and_vals, api_obj)
    filters += where_filters(table, api_obj.where)
    filters += sumlevel_filtering(table, api_obj)

    if values:
        pk = [
            col for col in table.__table__.columns
            if col.primary_key and col.key not in values
        ]
        cols = pk + values
    else:
        cols = get_columns(table)

    if exclude:
        cols = [
            col for col in cols
            if (isinstance(col, basestring) and col not in exclude)
            or col.key not in exclude
        ]

    # qry = table.query.with_entities(*cols)
    qry = table.query

    if hasattr(table, "crosswalk_join"):
        qry = table.crosswalk_join(qry)

    if stream:
        qry, cols = use_attr_names(table, qry, cols)
    qry = qry.with_entities(*cols)

    if hasattr(table, "JOINED_FILTER"):
        qry, filters = handle_join(qry, filters, table, api_obj)

    qry = qry.filter(*filters)

    if api_obj.order:
        sort = "desc" if api_obj.sort == "desc" else "asc"
        if api_obj.order not in TableManager.possible_variables:
            if api_obj.order == 'abs(pct_change)':
                pass  # allow this
            else:
                raise DataUSAException("Bad order parameter", api_obj.order)
        sort_stmt = text("{} {} NULLS LAST".format(api_obj.order, sort))
        qry = qry.order_by(sort_stmt)
    if api_obj.limit:
        qry = qry.limit(api_obj.limit)

    if stream:
        return stream_format(table, cols, qry, api_obj)

    return simple_format(table, cols, qry, api_obj)
Beispiel #5
0
def api_join_view(csv=None):
    api_obj = build_api_obj(default_limit=500)
    if api_obj.limit and api_obj.limit > 80000:
        raise DataUSAException("Limit parameter must be less than 80,000")
    tables = manager.required_tables(api_obj)
    data = join_api.joinable_query(tables,
                                   api_obj,
                                   manager.table_years,
                                   csv_format=csv)
    return data
Beispiel #6
0
 def all_tables(cls, api_obj):
     vars_needed = api_obj.vars_needed
     candidates = []
     for table in registered_models:
         if api_obj.order and api_obj.order in cls.possible_variables:
             vars_needed = vars_needed + [api_obj.order]
         if TableManager.table_has_cols(table, vars_needed):
             if TableManager.table_can_show(table, api_obj):
                 candidates.append(table)
     candidates = sorted(candidates, key=attrgetter('median_moe'))
     if not candidates:
         raise DataUSAException("No tables can match the specified query.")
     return candidates
Beispiel #7
0
 def list_partial_tables(cls, vars_needed, api_obj):
     candidates = {}
     for table in registered_models:
         overlap_size = TableManager.table_has_some_cols(table, vars_needed)
         if overlap_size > 0:
             if TableManager.table_can_show(table, api_obj):
                 # to break ties, we'll use median moe to penalize and subtract
                 # since larger values will be chosen first.
                 penalty = (1 - (1.0 / table.median_moe)) if table.median_moe > 0 else 0
                 candidates[table] = overlap_size - penalty
     if not candidates:
         raise DataUSAException("No tables can match the specified query.")
     return candidates
Beispiel #8
0
 def __init__(self, **kwargs):
     allowed = [
         "vars_needed", "vars_and_vals", "values", "shows_and_levels",
         "force", "where", "order", "sort", "limit", "exclude"
     ]
     self._year = None
     for keyword, value in kwargs.items():
         if keyword in allowed:
             setattr(self, keyword, value)
         else:
             raise DataUSAException("Invalid ApiObject attribute")
     if self.limit:
         self.limit = int(self.limit)
     self.subs = {}
     self.table_list = []
     if self.exclude:
         self.exclude = self.exclude.split(",")
     if hasattr(self, "year") and self.year != ALL:
         self._year = self.year
     self.force_schema = None