class IndicatorPillowTestBase(TestCase): def setUp(self): self.config = get_sample_data_source() self.config.save() self.adapter = IndicatorSqlAdapter(self.config) self.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886) def tearDown(self): self.config.delete() self.adapter.drop_table() @patch('corehq.apps.userreports.specs.datetime') def _check_sample_doc_state(self, expected_indicators, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now self.assertEqual(1, self.adapter.get_query_object().count()) row = self.adapter.get_query_object()[0] for k in row.keys(): v = getattr(row, k) if isinstance(expected_indicators[k], decimal.Decimal): self.assertAlmostEqual(expected_indicators[k], v) else: self.assertEqual( expected_indicators[k], v, 'mismatched property: {} (expected {}, was {})'.format( k, expected_indicators[k], v ) )
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() redis_key = _get_redis_key_for_config(config) adapter.rebuild_table() relevant_ids_chunk = [] for relevant_id in iterate_doc_ids_in_domain_by_type( config.domain, config.referenced_doc_type, chunk_size=CHUNK_SIZE, database=couchdb): relevant_ids_chunk.append(relevant_id) if len(relevant_ids_chunk) >= CHUNK_SIZE: redis_client.sadd(redis_key, *relevant_ids_chunk) _build_indicators(indicator_config_id, relevant_ids_chunk) relevant_ids_chunk = [] if relevant_ids_chunk: redis_client.sadd(redis_key, *relevant_ids_chunk) _build_indicators(indicator_config_id, relevant_ids_chunk)
def export_data_source(request, domain, config_id): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = _("There was a problem executing your query, please make " "sure your parameters are valid.") return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) if not is_static: config.meta.build.finished = True config.save()
def _build_indicators(indicator_config_id, relevant_ids): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) redis_client.srem(redis_key, doc.get('_id')) except Exception as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) if not is_static(indicator_config_id): redis_client.delete(redis_key) config.meta.build.finished = True try: config.save() except ResourceConflict: current_config = DataSourceConfiguration.get(config._id) # check that a new build has not yet started if config.meta.build.initiated == current_config.meta.build.initiated: current_config.meta.build.finished = True current_config.save()
def _get_distinct_values(data_source_configuration, column_config, expansion_limit=DEFAULT_MAXIMUM_EXPANSION): """ Return a tuple. The first item is a list of distinct values in the given ExpandedColumn no longer than expansion_limit. The second is a boolean which is True if the number of distinct values in the column is greater than the limit. :param data_source_configuration: :param column_config: :param expansion_limit: :return: """ from corehq.apps.userreports.sql import IndicatorSqlAdapter adapter = IndicatorSqlAdapter(data_source_configuration) too_many_values = False table = adapter.get_table() if not table.exists(bind=adapter.engine): return [], False if column_config.field not in table.c: raise ColumnNotFoundError(_( 'The column "{}" does not exist in the report source! ' 'Please double check your report configuration.').format(column_config.field) ) column = table.c[column_config.field] query = adapter.session_helper.Session.query(column).limit(expansion_limit + 1).distinct() result = query.all() distinct_values = [x[0] for x in result] if len(distinct_values) > expansion_limit: distinct_values = distinct_values[:expansion_limit] too_many_values = True return distinct_values, too_many_values
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() redis_key = _get_redis_key_for_config(config) adapter.rebuild_table() relevant_ids = get_doc_ids_in_domain_by_type( config.domain, config.referenced_doc_type, database=couchdb, ) for docs in chunked(relevant_ids, 1000): redis_client.sadd(redis_key, *docs) _build_indicators(indicator_config_id, relevant_ids)
def setUpClass(cls): cls.db2_name = 'cchq_ucr_tests' db_conn_parts = settings.SQL_REPORTING_DATABASE_URL.split('/') db_conn_parts[-1] = cls.db2_name cls.db2_url = '/'.join(db_conn_parts) # setup patches cls.connection_string_patch = patch('corehq.sql_db.connections.connection_manager.get_connection_string') def connection_string_for_engine(engine_id): if engine_id == 'engine-1': return settings.SQL_REPORTING_DATABASE_URL else: return cls.db2_url mock_manager = cls.connection_string_patch.start() mock_manager.side_effect = connection_string_for_engine # setup data sources data_source_template = get_sample_data_source() cls.ds_1 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_1.engine_id = 'engine-1' cls.ds_1.save() cls.ds_2 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_2.engine_id = 'engine-2' cls.ds_2.save() cls.db_context = temporary_database(cls.db2_name) cls.db_context.__enter__() cls.ds1_adapter = IndicatorSqlAdapter(cls.ds_1) cls.ds2_adapter = IndicatorSqlAdapter(cls.ds_2)
class IndicatorPillowTestBase(TestCase): def setUp(self): super(IndicatorPillowTestBase, self).setUp() self.config = get_sample_data_source() self.config.save() self.adapter = IndicatorSqlAdapter(self.config) self.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886) def tearDown(self): self.config.delete() self.adapter.drop_table() super(IndicatorPillowTestBase, self).tearDown() @patch('corehq.apps.userreports.specs.datetime') def _check_sample_doc_state(self, expected_indicators, datetime_mock): datetime_mock.utcnow.return_value = self.fake_time_now self.assertEqual(1, self.adapter.get_query_object().count()) row = self.adapter.get_query_object()[0] for k in row.keys(): v = getattr(row, k) if isinstance(expected_indicators[k], decimal.Decimal): self.assertAlmostEqual(expected_indicators[k], v) else: self.assertEqual( expected_indicators[k], v, 'mismatched property: {} (expected {}, was {})'.format( k, expected_indicators[k], v))
def _delete_data_source_shared(request, domain, config_id): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.drop_table() config.delete() messages.success(request, _(u'Data source "{}" has been deleted.'.format(config.display_name)))
def delete_data_source(request, domain, config_id): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.drop_table() config.delete() messages.success(request, _(u'Data source "{}" has been deleted.'.format(config.display_name))) return HttpResponseRedirect(reverse('configurable_reports_home', args=[domain]))
def rebuild_aggregate_ucr(request, domain, table_id): table_definition = get_object_or_404( AggregateTableDefinition, domain=domain, table_id=table_id ) aggregate_table_adapter = IndicatorSqlAdapter(table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data_task.delay(table_definition.id) messages.success(request, 'Table rebuild successfully started.') return HttpResponseRedirect(reverse(AggregateUCRView.urlname, args=[domain, table_id]))
def export_data_source(request, domain, config_id): config, _ = get_datasource_config_or_404(config_id, domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) allowed_formats = [ Format.CSV, Format.HTML, Format.XLS, Format.XLS_2007, ] if params.format not in allowed_formats: msg = ugettext_lazy('format must be one of the following: {}').format(', '.join(allowed_formats)) return HttpResponse(msg, status=400) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # xls format has limit of 65536 rows # First row is taken up by headers if params.format == Format.XLS and q.count() >= 65535: keyword_params = dict(**request.GET) # use default format if 'format' in keyword_params: del keyword_params['format'] return HttpResponseRedirect( '%s?%s' % ( reverse('export_configurable_data_source', args=[domain, config._id]), urlencode(keyword_params) ) ) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = ugettext_lazy( "There was a problem executing your query, " "please make sure your parameters are valid." ) return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def page_context(self): context = super(PreviewAggregateUCRView, self).page_context adapter = IndicatorSqlAdapter(self.table_definition) q = adapter.get_query_object() context.update({ 'columns': q.column_descriptions, 'data': [list(row) for row in q[:20]], 'total_rows': q.count(), }) return context
def delete_data_source_shared(domain, config_id, request=None): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = IndicatorSqlAdapter(config) adapter.drop_table() config.delete() if request: messages.success( request, _(u'Data source "{}" has been deleted.'.format( config.display_name)))
def _get_aggregation_from_primary_table(aggregate_table_definition, column_id, sqlalchemy_agg_fn, last_update): primary_data_source = aggregate_table_definition.data_source primary_data_source_adapter = IndicatorSqlAdapter(primary_data_source) with primary_data_source_adapter.session_helper.session_context( ) as session: primary_table = primary_data_source_adapter.get_table() aggregation_sql_column = primary_table.c[column_id] query = session.query(sqlalchemy_agg_fn(aggregation_sql_column)) return session.execute(query).scalar()
def page_context(self): config, is_static = get_datasource_config_or_404(self.config_id, self.domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() return { 'data_source': config, 'columns': q.column_descriptions, 'data': q[:20], 'total_rows': q.count(), }
def _check_weekly_results(self): aggregate_table_adapter = IndicatorSqlAdapter( self.weekly_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] week_column = aggregate_table.c['week'] # before december the case should not exist self.assertEqual( 0, aggregate_query.filter(doc_id_column == self.case_id, week_column <= '2017-12-17').count()) # from the monday in december where the case was opened, it case should exist, # but should not be flagged as pregnant for monday in ('2017-12-18', '2017-12-25', '2018-01-01'): row = aggregate_query.filter(doc_id_column == self.case_id, week_column == monday).one() self.assertEqual(self.case_name, row.name) self.assertEqual(1, row.open_in_month) self.assertEqual(0, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # from monday of the EDD the case should exist, and be flagged as pregnant for monday in ('2018-01-15', '2018-01-22', '2018-01-29'): row = aggregate_query.filter( doc_id_column == self.case_id, week_column == monday, ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the monday of the march visit, the should exist, be flagged as pregnant, and there is a form row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-12').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(1, row.fu_forms_in_month) # but the monday after there are no forms again row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-19').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the week of the april 9, the case should exist, be flagged as pregnant, and there are 2 forms row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-04-09').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(2, row.fu_forms_in_month)
def handle(self, engine_id, **options): db_alias = connection_manager.get_django_db_alias(engine_id) call_options = copy(options) call_options['database'] = db_alias call_command('migrate', **call_options) for ds in StaticDataSourceConfiguration.by_domain(DASHBOARD_DOMAIN): if engine_id == ds.engine_id or engine_id in ds.mirrored_engine_ids: adapter = IndicatorSqlAdapter(ds, engine_id=engine_id) adapter.build_table()
def page_context(self): config, is_static = get_datasource_config_or_404( self.config_id, self.domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() return { 'data_source': config, 'columns': q.column_descriptions, 'data': q[:20], 'total_rows': q.count(), }
def export_data_source(request, domain, config_id): config, _ = get_datasource_config_or_404(config_id, domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) allowed_formats = [ Format.CSV, Format.HTML, Format.XLS, Format.XLS_2007, ] if params.format not in allowed_formats: msg = ugettext_lazy( 'format must be one of the following: {}').format( ', '.join(allowed_formats)) return HttpResponse(msg, status=400) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # xls format has limit of 65536 rows # First row is taken up by headers if params.format == Format.XLS and q.count() >= 65535: keyword_params = dict(**request.GET) # use default format if 'format' in keyword_params: del keyword_params['format'] return HttpResponseRedirect( '%s?%s' % (reverse('export_configurable_data_source', args=[domain, config._id]), urlencode(keyword_params))) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = ugettext_lazy("There was a problem executing your query, " "please make sure your parameters are valid.") return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def test_weekly_aggregation(self): # generate our table aggregate_table_adapter = IndicatorSqlAdapter(self.weekly_aggregate_table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results() # confirm it's also idempotent populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results()
def test_weekly_aggregation(self): # generate our table aggregate_table_adapter = IndicatorSqlAdapter( self.weekly_aggregate_table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results() # confirm it's also idempotent populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results()
def preview_data_source(request, domain, config_id): config, is_static = get_datasource_config_or_404(config_id, domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() context = _shared_context(domain) context.update({ 'data_source': config, 'columns': q.column_descriptions, 'data': q[:20], 'total_rows': q.count(), }) return render(request, "userreports/preview_data.html", context)
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() adapter.rebuild_table() _iteratively_build_table(config)
def setUpClass(cls): cls.db2_name = 'cchq_ucr_tests' db_conn_parts = settings.SQL_REPORTING_DATABASE_URL.split('/') db_conn_parts[-1] = cls.db2_name cls.db2_url = '/'.join(db_conn_parts) # setup patches cls.engine_id_patches = ( # unfortunately we need to patch this directly in modules that import it as well patch('corehq.apps.userreports.sql.connection.get_engine_id'), patch('corehq.apps.userreports.sql.adapter.get_engine_id'), patch('corehq.apps.userreports.reports.data_source.get_engine_id'), ) cls.connection_string_patch = patch( 'corehq.db.connection_manager.get_connection_string') for engine_id_patch in cls.engine_id_patches: mock_engine_id_method = engine_id_patch.start() mock_engine_id_method.side_effect = lambda x: x.engine_id def connection_string_for_engine(engine_id): if engine_id == 'engine-1': return settings.SQL_REPORTING_DATABASE_URL else: return cls.db2_url mock_manager = cls.connection_string_patch.start() mock_manager.side_effect = connection_string_for_engine # setup data sources data_source_template = get_sample_data_source() cls.ds_1 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_1.engine_id = 'engine-1' cls.ds_1.save() cls.ds_2 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_2.engine_id = 'engine-2' cls.ds_2.save() # use db1 engine to create db2 http://stackoverflow.com/a/8977109/8207 cls.root_engine = create_engine(settings.SQL_REPORTING_DATABASE_URL) conn = cls.root_engine.connect() conn.execute('commit') try: conn.execute('CREATE DATABASE {}'.format(cls.db2_name)) except ProgrammingError: # optimistically assume it failed because was already created. pass conn.close() cls.ds1_adapter = IndicatorSqlAdapter(cls.ds_1) cls.ds2_adapter = IndicatorSqlAdapter(cls.ds_2)
def setUpClass(cls): super(BaseICDSDatasourceTest, cls).setUpClass() cls._call_center_domain_mock.start() cls.static_datasource = StaticDataSourceConfiguration.wrap( cls.get_json(cls.datasource_filename) ) cls.domain = cls.static_datasource.domains[0] cls.datasource = StaticDataSourceConfiguration._get_datasource_config( cls.static_datasource, cls.domain, ) cls.casefactory = CaseFactory(domain=cls.domain) adapter = IndicatorSqlAdapter(cls.datasource) adapter.build_table()
def rebuild_indicators(indicator_config_id): config = DataSourceConfiguration.get(indicator_config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): if config.filter.filter(doc): try: adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e))
def get_choices_from_data_source_column(query_context): adapter = IndicatorSqlAdapter(query_context.report.config) table = adapter.get_table() sql_column = table.c[query_context.report_filter.field] query = adapter.session_helper.Session.query(sql_column) if query_context.query: query = query.filter(sql_column.contains(query_context.query)) try: return [ Choice(v[0], v[0]) for v in query.distinct().order_by(sql_column).limit(query_context.limit).offset(query_context.offset) ] except ProgrammingError: return []
def export_aggregate_ucr(request, domain, table_id): table_definition = get_object_or_404( AggregateTableDefinition, domain=domain, table_id=table_id ) aggregate_table_adapter = IndicatorSqlAdapter(table_definition) url = reverse('export_aggregate_ucr', args=[domain, table_definition.table_id]) return export_sql_adapter_view(request, domain, aggregate_table_adapter, url)
def setUp(self): self.config = get_sample_data_source() self.pillow = ConfigurableIndicatorPillow() self.engine = self.pillow.get_sql_engine() self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.engine, self.config) self.adapter.rebuild_table()
def setUp(self): self.config = get_sample_data_source() self.config.save() self.pillow = ConfigurableIndicatorPillow() self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.config) self.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886)
def rebuild_indicators(indicator_config_id, initiated_by=None): config = _get_config_by_id(indicator_config_id) success = _('Your UCR table {} has finished rebuilding').format(config.table_id) failure = _('There was an error rebuilding Your UCR table {}.').format(config.table_id) send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by) with notify_someone(initiated_by, success_message=success, error_message=failure, send=send): adapter = IndicatorSqlAdapter(config) if not id_is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() adapter.rebuild_table() _iteratively_build_table(config)
def _check_basic_results(self): aggregate_table_adapter = IndicatorSqlAdapter( self.basic_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] # before december the case should not exist self.assertEqual( 1, aggregate_query.filter(doc_id_column == self.case_id, ).count()) row = aggregate_query.filter(doc_id_column == self.case_id, ).one() self.assertEqual(self.case_name, row.name) self.assertEqual('2018-01-21', row.pregnancy_start_date) self.assertEqual(3, row.fu_forms)
def setUp(self): self.config = get_sample_data_source() self.pillow = ConfigurableIndicatorPillow() self.engine = connection_manager.get_engine(DEFAULT_ENGINE_ID) self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.config) self.adapter.rebuild_table() self.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886)
def test_column_uniqueness_when_truncated(self): problem_spec = { "display_name": "practicing_lessons", "property_name": "long_column", "choices": [ "duplicate_choice_1", "duplicate_choice_2", ], "select_style": "multiple", "column_id": "a_very_long_base_selection_column_name_with_limited_room", "type": "choice_list", } data_source_config = DataSourceConfiguration( domain='test', display_name='foo', referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[problem_spec], ) adapter = IndicatorSqlAdapter(data_source_config) adapter.rebuild_table() # ensure we can save data to the table. adapter.save({ '_id': uuid.uuid4().hex, 'domain': 'test', 'doc_type': 'CommCareCase', 'long_column': 'duplicate_choice_1', }) # and query it back q = adapter.get_query_object() self.assertEqual(1, q.count())
def get_choices(data_source, filter, search_term=None, limit=20, page=0): # todo: we may want to log this as soon as mobile UCR stops hitting this # for misconfigured filters if not isinstance(filter, DynamicChoiceListFilter): return [] adapter = IndicatorSqlAdapter(data_source) table = adapter.get_table() sql_column = table.c[filter.field] query = adapter.session_helper.Session.query(sql_column) if search_term: query = query.filter(sql_column.contains(search_term)) offset = page * limit try: return [v[0] for v in query.distinct().order_by(sql_column).limit(limit).offset(offset)] except ProgrammingError: return []
def _check_basic_results(self): aggregate_table_adapter = IndicatorSqlAdapter(self.basic_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] # before december the case should not exist self.assertEqual(1, aggregate_query.filter( doc_id_column == self.case_id, ).count()) row = aggregate_query.filter( doc_id_column == self.case_id, ).one() self.assertEqual(self.case_name, row.name) self.assertEqual('2018-01-21', row.pregnancy_start_date) self.assertEqual(3, row.fu_forms)
def setUp(self): super(ReportDataTest, self).setUp() # Create report self.domain = 'test-ucr-report-data' self.data_source = DataSourceConfiguration( domain=self.domain, referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[{ "type": "expression", "expression": { "type": "property_name", "property_name": 'name' }, "column_id": 'name', "display_name": 'name', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'number' }, "column_id": 'number', "display_name": 'number', "datatype": "integer" }], ) self.data_source.validate() self.data_source.save() IndicatorSqlAdapter(self.data_source).rebuild_table() self.addCleanup(self.data_source.delete) # initialize a report on the data self.report_config = ReportConfiguration( domain=self.domain, config_id=self.data_source._id, aggregation_columns=['doc_id'], columns=[{ "type": "field", "field": "name", "column_id": "name", "display": "Name", "aggregation": "simple", }, { "type": "field", "field": "number", "column_id": "number", "display": "Number", "aggregation": "simple", }], filters=[], configured_charts=[]) self.report_config.save() self.addCleanup(self.report_config.delete)
class IndicatorPillowTest(TestCase): def setUp(self): folder = os.path.join(os.path.dirname(__file__), 'data', 'configs') sample_file = os.path.join(folder, 'sample_indicator_config.json') self.pillow = ConfigurableIndicatorPillow() self.engine = self.pillow.get_sql_engine() with open(sample_file) as f: structure = json.loads(f.read()) self.config = DataSourceConfiguration.wrap(structure) self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.engine, self.config) self.adapter.rebuild_table() def tearDown(self): self.adapter.drop_table() self.engine.dispose() def testFilter(self): # note: this is a silly test now that python_filter always returns true not_matching = [ dict(doc_type="NotCommCareCase", domain='user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='not-user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='user-reports', type='not-ticket'), ] for document in not_matching: self.assertTrue(self.pillow.python_filter(document)) self.assertTrue(self.pillow.python_filter( dict(doc_type="CommCareCase", domain='user-reports', type='ticket') )) def testChangeTransport(self): # indicators sample_doc, expected_indicators = get_sample_doc_and_indicators() self.pillow.change_transport(sample_doc) with self.engine.begin() as connection: rows = connection.execute(sqlalchemy.select([self.adapter.get_table()])) self.assertEqual(1, rows.rowcount) row = rows.fetchone() for k, v in row.items(): self.assertEqual(expected_indicators[k], v)
def bootstrap(self, configs=None): # sets up the initial stuff if configs is None: configs = self.get_all_configs() self.table_adapters = [ IndicatorSqlAdapter(config) for config in configs ] self.rebuild_tables_if_necessary() self.bootstrapped = True self.last_bootstrapped = datetime.utcnow()
def setUp(self): folder = os.path.join(os.path.dirname(__file__), 'data', 'configs') sample_file = os.path.join(folder, 'sample_indicator_config.json') self.pillow = ConfigurableIndicatorPillow() self.engine = self.pillow.get_sql_engine() with open(sample_file) as f: structure = json.loads(f.read()) self.config = DataSourceConfiguration.wrap(structure) self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.engine, self.config) self.adapter.rebuild_table()
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) adapter = IndicatorSqlAdapter(get_engine(), config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) adapter.engine.dispose()
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(StaticDataSourceConfiguration._datasource_id_prefix) if is_static: config = StaticDataSourceConfiguration.by_id(indicator_config_id) rev = 'static' else: config = DataSourceConfiguration.get(indicator_config_id) rev = config._rev # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) client = get_redis_client().client.get_client() redis_key = 'ucr_queue-{}:{}'.format(indicator_config_id, rev) if len(client.smembers(redis_key)) > 0: relevant_ids = client.smembers(redis_key) else: adapter.rebuild_table() relevant_ids = get_doc_ids_in_domain_by_type(config.domain, config.referenced_doc_type, database=couchdb) if relevant_ids: client.sadd(redis_key, *relevant_ids) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) client.srem(redis_key, doc.get('_id')) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) if not is_static: client.delete(redis_key) config.meta.build.finished = True config.save()
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith( CustomDataSourceConfiguration._datasource_id_prefix) if is_static: config = CustomDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception('problem saving document {} to table. {}'.format( doc['_id'], e)) if not is_static: config.meta.build.finished = True config.save()
def test_table_population(self): adapter = IndicatorSqlAdapter(self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ {"start_time": now, "end_time": now + one_hour, "person": "al"}, {"start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris"}, {"start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie"}, ] doc = _test_doc(form={"time_logs": logs}) # Save this document into the table adapter.save(doc) # Get rows from the table rows = adapter.get_query_object() retrieved_logs = [{"start_time": r.start_time, "end_time": r.end_time, "person": r.person} for r in rows] # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def _get_distinct_values(data_source_configuration, column_config, expansion_limit=DEFAULT_MAXIMUM_EXPANSION): """ Return a tuple. The first item is a list of distinct values in the given ExpandedColumn no longer than expansion_limit. The second is a boolean which is True if the number of distinct values in the column is greater than the limit. :param data_source_configuration: :param column_config: :param expansion_limit: :return: """ from corehq.apps.userreports.sql import IndicatorSqlAdapter adapter = IndicatorSqlAdapter(data_source_configuration) too_many_values = False table = adapter.get_table() if not table.exists(bind=adapter.engine): return [], False if column_config.field not in table.c: raise ColumnNotFoundError( _('The column "{}" does not exist in the report source! ' 'Please double check your report configuration.').format( column_config.field)) column = table.c[column_config.field] query = adapter.session_helper.Session.query(column).limit( expansion_limit + 1).distinct() result = query.all() distinct_values = [x[0] for x in result] if len(distinct_values) > expansion_limit: distinct_values = distinct_values[:expansion_limit] too_many_values = True return distinct_values, too_many_values
def test_table_population(self): adapter = IndicatorSqlAdapter(self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ { "start_time": now, "end_time": now + one_hour, "person": "al" }, { "start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris" }, { "start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie" }, ] doc = _test_doc(form={'time_logs': logs}) # Save this document into the table adapter.save(doc) # Get rows from the table rows = adapter.get_query_object() retrieved_logs = [{ 'start_time': r.start_time, 'end_time': r.end_time, 'person': r.person, } for r in rows] # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def setUp(self): self.config = get_sample_data_source() self.pillow = ConfigurableIndicatorPillow() self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.config) self.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886)
def populate_aggregate_table_data_task(aggregate_table_id): definition = AggregateTableDefinition.objects.get(id=aggregate_table_id) return populate_aggregate_table_data(IndicatorSqlAdapter(definition))
def _adapter(self): return IndicatorSqlAdapter(self.report.config)
def _get_sql_adapter(domain_name, data_source_json): from corehq.apps.userreports.sql import IndicatorSqlAdapter data_source = _make_data_source_for_domain(data_source_json, domain_name) return IndicatorSqlAdapter(data_source)
def deactivate(self): if not self.is_static: self.is_deactivated = True self.save() IndicatorSqlAdapter(self).drop_table()
def _rebuild_table_get_query_object(self): rebuild_indicators(self.datasource._id) adapter = IndicatorSqlAdapter(self.datasource) return adapter.get_query_object()