def call_resource(path, qs): app = task_utils.get_app() endpoint, arguments = app.url_map.bind('').match(path) resource_type = app.view_functions[endpoint].view_class resource = resource_type() fields, kwargs = parse_kwargs(resource, qs) kwargs = utils.extend(arguments, kwargs) for field in IGNORE_FIELDS: kwargs.pop(field, None) query, model, schema = unpack(resource.build_query(**kwargs), 3) count, _ = counts.get_count(query, db.session, resource.use_estimated_counts) return { 'path': path, 'qs': qs, 'name': get_s3_name(path, qs), 'query': query, 'schema': schema or resource.schema, 'resource': resource, 'count': count, 'timestamp': datetime.datetime.utcnow(), 'fields': fields, 'kwargs': kwargs, }
def get(self, **kwargs): """Get itemized resources. If multiple values are passed for `committee_id`, create a subquery for each and combine with `UNION ALL`. This is necessary to avoid slow queries when one or more relevant committees has many records. """ committee_ids = kwargs.get('committee_id', []) if len(committee_ids) > 10: raise exceptions.ApiError( 'Can only specify up to ten values for "committee_id".', status_code=422, ) if len(committee_ids) > 1: query, count = self.join_committee_queries(kwargs) return utils.fetch_seek_page(query, kwargs, self.index_column, count=count) query = self.build_query(**kwargs) count, _ = counts.get_count(query, models.db.session) return utils.fetch_seek_page(query, kwargs, self.index_column, count=count, cap=self.cap)
def get(self, **kwargs): query = self.build_query(**kwargs) count, _ = counts.get_count(query, models.db.session) return utils.fetch_page(query, kwargs, model=models.EFilings, count=count)
def test_use_estimated_counts_over_threshold(self, get_query_plan_mock): query = db.session.query(models.ScheduleA) # Estimated rows == 2000000 get_query_plan_mock.return_value = [('Seq Scan on fec_fitem_sched_a (cost=0.00..10.60 rows=2000000 width=1289)',)] count, estimate = counts.get_count(query, db.session) self.assertEqual(count, 2000000) self.assertEqual(estimate, True)
def get(self, **kwargs): """Get itemized resources. If multiple values are passed for `committee_id`, create a subquery for each and combine with `UNION ALL`. This is necessary to avoid slow queries when one or more relevant committees has many records. """ if kwargs.get("last_index"): if all( kwargs.get("last_{}".format(option)) is None for option in self.sort_options ) and not kwargs.get("sort_null_only"): raise exceptions.ApiError( "When paginating through results, both values from the \ previous page's `last_indexes` object are needed. For more information, \ see https://api.open.fec.gov/developers/. Please add one of the following \ filters to your query: `sort_null_only`=True, {}".format( ", ".join("`last_" + option + "`" for option in self.sort_options) ), status_code=422, ) committee_ids = kwargs.get('committee_id', []) if len(committee_ids) > 10: raise exceptions.ApiError( 'Can only specify up to ten values for "committee_id".', status_code=422, ) if len(committee_ids) > 1: query, count = self.join_committee_queries(kwargs) return utils.fetch_seek_page(query, kwargs, self.index_column, count=count) query = self.build_query(**kwargs) count, _ = counts.get_count(query, models.db.session) return utils.fetch_seek_page(query, kwargs, self.index_column, count=count, cap=self.cap)
def build_committee_query(self, kwargs, committee_id): """Build a subquery by committee. """ query = self.build_query(_apply_options=False, **utils.extend(kwargs, {'committee_id': [committee_id]})) sort, hide_null = kwargs['sort'], kwargs['sort_hide_null'] query, _ = sorting.sort(query, sort, model=self.model, hide_null=hide_null) page_query = utils.fetch_seek_page(query, kwargs, self.index_column, count=-1, eager=False).results count, _ = counts.get_count(query, models.db.session) return page_query, count
def get(self, *args, **kwargs): query = self.build_query(*args, **kwargs) count, _ = counts.get_count(query, models.db.session, self.use_estimated_counts) multi = False if isinstance(kwargs['sort'], (list, tuple)): multi = True return utils.fetch_page( query, kwargs, count=count, model=self.model, join_columns=self.join_columns, aliases=self.aliases, index_column=self.index_column, cap=self.cap, multi=multi, )
def test_schedule_e_efile_uses_exact_count(self, get_query_plan_mock): schedule_e_efile = [ factories.ScheduleEEfileFactory() for i in range(5) ] factories.EFilingsFactory(file_number=123) db.session.flush() query = db.session.query(models.ScheduleEEfile) # Estimated rows = 6000000 get_query_plan_mock.return_value = [('Seq Scan on real_efile_se_f57_vw (cost=0.00..10.60 rows=6000000 width=1289)',)] resource = sched_e.ScheduleEEfileView() count, estimate = counts.get_count(query, db.session, resource.use_estimated_counts) # Always use exact count for Schedule E efile self.assertEqual(count, 5) self.assertEqual(estimate, False)
def test_use_actual_counts_under_threshold(self, get_query_plan_mock): receipts = [ factories.ScheduleAFactory( report_year=2016, contribution_receipt_date=datetime.date(2016, 1, 1), two_year_transaction_period=2016 ), factories.ScheduleAFactory( report_year=2015, contribution_receipt_date=datetime.date(2015, 1, 1), two_year_transaction_period=2016 ), ] query = db.session.query(models.ScheduleA) # Estimated rows == 200 get_query_plan_mock.return_value = [('Seq Scan on fec_fitem_sched_a (cost=0.00..10.60 rows=200 width=1289)',)] count, estimate = counts.get_count(query, db.session) self.assertEqual(count, 2) self.assertEqual(estimate, False)