def setUpClass(cls): cls.db2_name = 'cchq_ucr_tests' db_conn_parts = settings.SQL_REPORTING_DATABASE_URL.split('/') db_conn_parts[-1] = cls.db2_name cls.db2_url = '/'.join(db_conn_parts) # setup patches cls.connection_string_patch = patch('corehq.sql_db.connections.connection_manager.get_connection_string') def connection_string_for_engine(engine_id): if engine_id == 'engine-1': return settings.SQL_REPORTING_DATABASE_URL else: return cls.db2_url mock_manager = cls.connection_string_patch.start() mock_manager.side_effect = connection_string_for_engine # setup data sources data_source_template = get_sample_data_source() cls.ds_1 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_1.engine_id = 'engine-1' cls.ds_1.save() cls.ds_2 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_2.engine_id = 'engine-2' cls.ds_2.save() cls.db_context = temporary_database(cls.db2_name) cls.db_context.__enter__() cls.ds1_adapter = IndicatorSqlAdapter(cls.ds_1) cls.ds2_adapter = IndicatorSqlAdapter(cls.ds_2)
def setUpClass(cls): cls.db2_name = 'cchq_ucr_tests' db_conn_parts = settings.SQL_REPORTING_DATABASE_URL.split('/') db_conn_parts[-1] = cls.db2_name cls.db2_url = '/'.join(db_conn_parts) # setup patches cls.engine_id_patches = ( # unfortunately we need to patch this directly in modules that import it as well patch('corehq.apps.userreports.sql.connection.get_engine_id'), patch('corehq.apps.userreports.sql.adapter.get_engine_id'), patch('corehq.apps.userreports.reports.data_source.get_engine_id'), ) cls.connection_string_patch = patch( 'corehq.db.connection_manager.get_connection_string') for engine_id_patch in cls.engine_id_patches: mock_engine_id_method = engine_id_patch.start() mock_engine_id_method.side_effect = lambda x: x.engine_id def connection_string_for_engine(engine_id): if engine_id == 'engine-1': return settings.SQL_REPORTING_DATABASE_URL else: return cls.db2_url mock_manager = cls.connection_string_patch.start() mock_manager.side_effect = connection_string_for_engine # setup data sources data_source_template = get_sample_data_source() cls.ds_1 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_1.engine_id = 'engine-1' cls.ds_1.save() cls.ds_2 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_2.engine_id = 'engine-2' cls.ds_2.save() # use db1 engine to create db2 http://stackoverflow.com/a/8977109/8207 cls.root_engine = create_engine(settings.SQL_REPORTING_DATABASE_URL) conn = cls.root_engine.connect() conn.execute('commit') try: conn.execute('CREATE DATABASE {}'.format(cls.db2_name)) except ProgrammingError: # optimistically assume it failed because was already created. pass conn.close() cls.ds1_adapter = IndicatorSqlAdapter(cls.ds_1) cls.ds2_adapter = IndicatorSqlAdapter(cls.ds_2)
def export_data_source(request, domain, config_id): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = _("There was a problem executing your query, please make " "sure your parameters are valid.") return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def test_column_uniqueness_when_truncated(self): problem_spec = { "display_name": "practicing_lessons", "property_name": "long_column", "choices": [ "duplicate_choice_1", "duplicate_choice_2", ], "select_style": "multiple", "column_id": "a_very_long_base_selection_column_name_with_limited_room", "type": "choice_list", } data_source_config = DataSourceConfiguration( domain='test', display_name='foo', referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[problem_spec], ) adapter = IndicatorSqlAdapter(data_source_config) adapter.rebuild_table() # ensure we can save data to the table. adapter.save({ '_id': uuid.uuid4().hex, 'domain': 'test', 'doc_type': 'CommCareCase', 'long_column': 'duplicate_choice_1', }) # and query it back q = adapter.get_query_object() self.assertEqual(1, q.count())
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith( CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format( doc['_id'], e)) if not is_static: config.meta.build.finished = True config.save()
def export_aggregate_ucr(request, domain, table_id): table_definition = get_object_or_404( AggregateTableDefinition, domain=domain, table_id=table_id ) aggregate_table_adapter = IndicatorSqlAdapter(table_definition) url = reverse('export_aggregate_ucr', args=[domain, table_definition.table_id]) return export_sql_adapter_view(request, domain, aggregate_table_adapter, url)
def rebuild_aggregate_ucr(request, domain, table_id): table_definition = get_object_or_404( AggregateTableDefinition, domain=domain, table_id=table_id ) aggregate_table_adapter = IndicatorSqlAdapter(table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data_task.delay(table_definition.id) messages.success(request, 'Table rebuild successfully started.') return HttpResponseRedirect(reverse(AggregateUCRView.urlname, args=[domain, table_id]))
def setUp(self): super(ReportDataTest, self).setUp() # Create report self.domain = 'test-ucr-report-data' self.data_source = DataSourceConfiguration( domain=self.domain, referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[{ "type": "expression", "expression": { "type": "property_name", "property_name": 'name' }, "column_id": 'name', "display_name": 'name', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'number' }, "column_id": 'number', "display_name": 'number', "datatype": "integer" }], ) self.data_source.validate() self.data_source.save() IndicatorSqlAdapter(self.data_source).rebuild_table() self.addCleanup(self.data_source.delete) # initialize a report on the data self.report_config = ReportConfiguration( domain=self.domain, config_id=self.data_source._id, aggregation_columns=['doc_id'], columns=[{ "type": "field", "field": "name", "column_id": "name", "display": "Name", "aggregation": "simple", }, { "type": "field", "field": "number", "column_id": "number", "display": "Number", "aggregation": "simple", }], filters=[], configured_charts=[]) self.report_config.save() self.addCleanup(self.report_config.delete)
def _get_aggregation_from_primary_table(aggregate_table_definition, column_id, sqlalchemy_agg_fn, last_update): primary_data_source = aggregate_table_definition.data_source primary_data_source_adapter = IndicatorSqlAdapter(primary_data_source) with primary_data_source_adapter.session_helper.session_context( ) as session: primary_table = primary_data_source_adapter.get_table() aggregation_sql_column = primary_table.c[column_id] query = session.query(sqlalchemy_agg_fn(aggregation_sql_column)) return session.execute(query).scalar()
def delete_data_source_shared(domain, config_id, request=None): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = IndicatorSqlAdapter(config) adapter.drop_table() config.delete() if request: messages.success( request, _(u'Data source "{}" has been deleted.'.format( config.display_name)))
def page_context(self): context = super(PreviewAggregateUCRView, self).page_context adapter = IndicatorSqlAdapter(self.table_definition) q = adapter.get_query_object() context.update({ 'columns': q.column_descriptions, 'data': [list(row) for row in q[:20]], 'total_rows': q.count(), }) return context
def bootstrap(self, configs=None): # sets up the initial stuff if configs is None: configs = self.get_all_configs() self.table_adapters = [ IndicatorSqlAdapter(config) for config in configs ] self.rebuild_tables_if_necessary() self.bootstrapped = True self.last_bootstrapped = datetime.utcnow()
def page_context(self): config, is_static = get_datasource_config_or_404( self.config_id, self.domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() return { 'data_source': config, 'columns': q.column_descriptions, 'data': q[:20], 'total_rows': q.count(), }
def _check_weekly_results(self): aggregate_table_adapter = IndicatorSqlAdapter( self.weekly_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] week_column = aggregate_table.c['week'] # before december the case should not exist self.assertEqual( 0, aggregate_query.filter(doc_id_column == self.case_id, week_column <= '2017-12-17').count()) # from the monday in december where the case was opened, it case should exist, # but should not be flagged as pregnant for monday in ('2017-12-18', '2017-12-25', '2018-01-01'): row = aggregate_query.filter(doc_id_column == self.case_id, week_column == monday).one() self.assertEqual(self.case_name, row.name) self.assertEqual(1, row.open_in_month) self.assertEqual(0, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # from monday of the EDD the case should exist, and be flagged as pregnant for monday in ('2018-01-15', '2018-01-22', '2018-01-29'): row = aggregate_query.filter( doc_id_column == self.case_id, week_column == monday, ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the monday of the march visit, the should exist, be flagged as pregnant, and there is a form row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-12').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(1, row.fu_forms_in_month) # but the monday after there are no forms again row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-19').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the week of the april 9, the case should exist, be flagged as pregnant, and there are 2 forms row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-04-09').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(2, row.fu_forms_in_month)
def handle(self, engine_id, **options): db_alias = connection_manager.get_django_db_alias(engine_id) call_options = copy(options) call_options['database'] = db_alias call_command('migrate', **call_options) for ds in StaticDataSourceConfiguration.by_domain(DASHBOARD_DOMAIN): if engine_id == ds.engine_id or engine_id in ds.mirrored_engine_ids: adapter = IndicatorSqlAdapter(ds, engine_id=engine_id) adapter.build_table()
def export_data_source(request, domain, config_id): config, _ = get_datasource_config_or_404(config_id, domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) allowed_formats = [ Format.CSV, Format.HTML, Format.XLS, Format.XLS_2007, ] if params.format not in allowed_formats: msg = ugettext_lazy( 'format must be one of the following: {}').format( ', '.join(allowed_formats)) return HttpResponse(msg, status=400) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # xls format has limit of 65536 rows # First row is taken up by headers if params.format == Format.XLS and q.count() >= 65535: keyword_params = dict(**request.GET) # use default format if 'format' in keyword_params: del keyword_params['format'] return HttpResponseRedirect( '%s?%s' % (reverse('export_configurable_data_source', args=[domain, config._id]), urlencode(keyword_params))) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = ugettext_lazy("There was a problem executing your query, " "please make sure your parameters are valid.") return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def preview_data_source(request, domain, config_id): config, is_static = get_datasource_config_or_404(config_id, domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() context = _shared_context(domain) context.update({ 'data_source': config, 'columns': q.column_descriptions, 'data': q[:20], 'total_rows': q.count(), }) return render(request, "userreports/preview_data.html", context)
def test_weekly_aggregation(self): # generate our table aggregate_table_adapter = IndicatorSqlAdapter( self.weekly_aggregate_table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results() # confirm it's also idempotent populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results()
def setUpClass(cls): super(BaseICDSDatasourceTest, cls).setUpClass() cls._call_center_domain_mock.start() cls.static_datasource = StaticDataSourceConfiguration.wrap( cls.get_json(cls.datasource_filename) ) cls.domain = cls.static_datasource.domains[0] cls.datasource = StaticDataSourceConfiguration._get_datasource_config( cls.static_datasource, cls.domain, ) cls.casefactory = CaseFactory(domain=cls.domain) adapter = IndicatorSqlAdapter(cls.datasource) adapter.build_table()
def rebuild_indicators(indicator_config_id, initiated_by=None): config = _get_config_by_id(indicator_config_id) success = _('Your UCR table {} has finished rebuilding').format(config.table_id) failure = _('There was an error rebuilding Your UCR table {}.').format(config.table_id) send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by) with notify_someone(initiated_by, success_message=success, error_message=failure, send=send): adapter = IndicatorSqlAdapter(config) if not id_is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() adapter.rebuild_table() _iteratively_build_table(config)
def _check_basic_results(self): aggregate_table_adapter = IndicatorSqlAdapter( self.basic_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] # before december the case should not exist self.assertEqual( 1, aggregate_query.filter(doc_id_column == self.case_id, ).count()) row = aggregate_query.filter(doc_id_column == self.case_id, ).one() self.assertEqual(self.case_name, row.name) self.assertEqual('2018-01-21', row.pregnancy_start_date) self.assertEqual(3, row.fu_forms)
def test_table_population(self): adapter = IndicatorSqlAdapter(self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ { "start_time": now, "end_time": now + one_hour, "person": "al" }, { "start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris" }, { "start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie" }, ] doc = _test_doc(form={'time_logs': logs}) # Save this document into the table adapter.save(doc) # Get rows from the table rows = adapter.get_query_object() retrieved_logs = [{ 'start_time': r.start_time, 'end_time': r.end_time, 'person': r.person, } for r in rows] # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def _get_distinct_values(data_source_configuration, column_config, expansion_limit=DEFAULT_MAXIMUM_EXPANSION): """ Return a tuple. The first item is a list of distinct values in the given ExpandedColumn no longer than expansion_limit. The second is a boolean which is True if the number of distinct values in the column is greater than the limit. :param data_source_configuration: :param column_config: :param expansion_limit: :return: """ from corehq.apps.userreports.sql import IndicatorSqlAdapter adapter = IndicatorSqlAdapter(data_source_configuration) too_many_values = False table = adapter.get_table() if not table.exists(bind=adapter.engine): return [], False if column_config.field not in table.c: raise ColumnNotFoundError( _('The column "{}" does not exist in the report source! ' 'Please double check your report configuration.').format( column_config.field)) column = table.c[column_config.field] query = adapter.session_helper.Session.query(column).limit( expansion_limit + 1).distinct() result = query.all() distinct_values = [x[0] for x in result] if len(distinct_values) > expansion_limit: distinct_values = distinct_values[:expansion_limit] too_many_values = True return distinct_values, too_many_values
def populate_aggregate_table_data_task(aggregate_table_id): definition = AggregateTableDefinition.objects.get(id=aggregate_table_id) return populate_aggregate_table_data(IndicatorSqlAdapter(definition))
def _adapter(self): return IndicatorSqlAdapter(self.report.config)
def _get_sql_adapter(domain_name, data_source_json): from corehq.apps.userreports.sql import IndicatorSqlAdapter data_source = _make_data_source_for_domain(data_source_json, domain_name) return IndicatorSqlAdapter(data_source)
def deactivate(self): if not self.is_static: self.is_deactivated = True self.save() IndicatorSqlAdapter(self).drop_table()
def populate_aggregate_table_data_for_time_period(aggregate_table_adapter, window): """ For a given period (start/end) - populate all data in the aggregate table associated with that period. """ doing_time_aggregation = window is not None if doing_time_aggregation: aggregation_params = { window.start.name: window.start.value, window.end.name: window.end.value, } else: aggregation_params = {} primary_column_adapters = list( aggregate_table_adapter.config.get_primary_column_adapters()) primary_table = IndicatorSqlAdapter( aggregate_table_adapter.config.data_source).get_table() all_query_columns = [ pca.to_sqlalchemy_query_column(primary_table, aggregation_params) for pca in primary_column_adapters ] for secondary_table in aggregate_table_adapter.config.secondary_tables.all( ): sqlalchemy_secondary_table = IndicatorSqlAdapter( secondary_table.data_source).get_table() for column_adapter in secondary_table.get_column_adapters(): all_query_columns.append( column_adapter.to_sqlalchemy_query_column( sqlalchemy_secondary_table, aggregation_params)) select_statement = sqlalchemy.select(all_query_columns) # now construct join select_table = primary_table for secondary_table in aggregate_table_adapter.config.secondary_tables.all( ): sqlalchemy_secondary_table = IndicatorSqlAdapter( secondary_table.data_source).get_table() # apply join filters along with period start/end filters (if necessary) for related model join_conditions = [ (primary_table.c[secondary_table.join_column_primary] == sqlalchemy_secondary_table.c[ secondary_table.join_column_secondary]) ] if doing_time_aggregation: join_conditions.extend([ sqlalchemy_secondary_table.c[ secondary_table.time_window_column] >= window.start.value, sqlalchemy_secondary_table.c[ secondary_table.time_window_column] < window.end.value ]) select_table = select_table.outerjoin( sqlalchemy_secondary_table, sqlalchemy.and_(*join_conditions)) select_statement = select_statement.select_from(select_table) # apply period start/end filters for primary model # to match, start should be before the end of the period and end should be after the start # this makes the first and last periods inclusive. if doing_time_aggregation: select_statement = select_statement.where( primary_table.c[window.start.mapped_column_id] < window.end.value) select_statement = select_statement.where( sqlalchemy.or_( primary_table.c[window.end.mapped_column_id] == None, # noqa this is sqlalchemy primary_table.c[window.end.mapped_column_id] >= window.start.value)) for primary_column_adapter in primary_column_adapters: if primary_column_adapter.is_groupable(): select_statement = select_statement.group_by( primary_column_adapter.to_sqlalchemy_query_column( primary_table, aggregation_params)) aggregate_table = aggregate_table_adapter.get_table() primary_key_columns = [ aggregate_table.c[spec.column_id] for spec in primary_column_adapters if spec.is_primary_key() ] secondary_column_ids = [ spec.column_id for spec in aggregate_table_adapter.config.get_column_adapters() if not spec.is_primary_key() ] insert_statement = insert(aggregate_table).from_select( aggregate_table.c, select_statement) insert_statement = insert_statement.on_conflict_do_update( index_elements=primary_key_columns, set_={k: insert_statement.excluded[k] for k in secondary_column_ids}) with aggregate_table_adapter.session_helper.session_context() as session: session.execute(insert_statement)
def _rebuild_table_get_query_object(self): rebuild_indicators(self.datasource._id) adapter = IndicatorSqlAdapter(self.datasource) return adapter.get_query_object()
def setUp(self): self.config = get_sample_data_source() self.pillow = ConfigurableIndicatorPillow() self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.config) self.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886)