def test_column_uniqueness_when_truncated(self): problem_spec = { "display_name": "practicing_lessons", "property_name": "long_column", "choices": [ "duplicate_choice_1", "duplicate_choice_2", ], "select_style": "multiple", "column_id": "a_very_long_base_selection_column_name_with_limited_room", "type": "choice_list", } data_source_config = DataSourceConfiguration( domain='test', display_name='foo', referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[problem_spec], ) adapter = IndicatorSqlAdapter(data_source_config) adapter.rebuild_table() # ensure we can save data to the table. adapter.save({ '_id': uuid.uuid4().hex, 'domain': 'test', 'doc_type': 'CommCareCase', 'long_column': 'duplicate_choice_1', }) # and query it back q = Session.query(adapter.get_table()) self.assertEqual(1, q.count())
def export_data_source(request, domain, config_id): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = _("There was a problem executing your query, please make " "sure your parameters are valid.") return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def _get_distinct_values(data_source_configuration, column_config, expansion_limit=DEFAULT_MAXIMUM_EXPANSION): """ Return a tuple. The first item is a list of distinct values in the given ExpandedColumn no longer than expansion_limit. The second is a boolean which is True if the number of distinct values in the column is greater than the limit. :param data_source_configuration: :param column_config: :param expansion_limit: :return: """ from corehq.apps.userreports.sql import IndicatorSqlAdapter adapter = IndicatorSqlAdapter(data_source_configuration) too_many_values = False table = adapter.get_table() if not table.exists(bind=adapter.engine): return [], False if column_config.field not in table.c: raise ColumnNotFoundError(_( 'The column "{}" does not exist in the report source! ' 'Please double check your report configuration.').format(column_config.field) ) column = table.c[column_config.field] query = adapter.session_helper.Session.query(column).limit(expansion_limit + 1).distinct() result = query.all() distinct_values = [x[0] for x in result] if len(distinct_values) > expansion_limit: distinct_values = distinct_values[:expansion_limit] too_many_values = True return distinct_values, too_many_values
def export_data_source(request, domain, config_id): config, _ = get_datasource_config_or_404(config_id, domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) allowed_formats = [ Format.CSV, Format.HTML, Format.XLS, Format.XLS_2007, ] if params.format not in allowed_formats: msg = ugettext_lazy('format must be one of the following: {}').format(', '.join(allowed_formats)) return HttpResponse(msg, status=400) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # xls format has limit of 65536 rows # First row is taken up by headers if params.format == Format.XLS and q.count() >= 65535: keyword_params = dict(**request.GET) # use default format if 'format' in keyword_params: del keyword_params['format'] return HttpResponseRedirect( '%s?%s' % ( reverse('export_configurable_data_source', args=[domain, config._id]), urlencode(keyword_params) ) ) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = ugettext_lazy( "There was a problem executing your query, " "please make sure your parameters are valid." ) return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def _get_aggregation_from_primary_table(aggregate_table_definition, column_id, sqlalchemy_agg_fn, last_update): primary_data_source = aggregate_table_definition.data_source primary_data_source_adapter = IndicatorSqlAdapter(primary_data_source) with primary_data_source_adapter.session_helper.session_context( ) as session: primary_table = primary_data_source_adapter.get_table() aggregation_sql_column = primary_table.c[column_id] query = session.query(sqlalchemy_agg_fn(aggregation_sql_column)) return session.execute(query).scalar()
def export_data_source(request, domain, config_id): config, _ = get_datasource_config_or_404(config_id, domain) adapter = IndicatorSqlAdapter(config) q = adapter.get_query_object() table = adapter.get_table() try: params = process_url_params(request.GET, table.columns) allowed_formats = [ Format.CSV, Format.HTML, Format.XLS, Format.XLS_2007, ] if params.format not in allowed_formats: msg = ugettext_lazy( 'format must be one of the following: {}').format( ', '.join(allowed_formats)) return HttpResponse(msg, status=400) except UserQueryError as e: return HttpResponse(e.message, status=400) q = q.filter_by(**params.keyword_filters) for sql_filter in params.sql_filters: q = q.filter(sql_filter) # xls format has limit of 65536 rows # First row is taken up by headers if params.format == Format.XLS and q.count() >= 65535: keyword_params = dict(**request.GET) # use default format if 'format' in keyword_params: del keyword_params['format'] return HttpResponseRedirect( '%s?%s' % (reverse('export_configurable_data_source', args=[domain, config._id]), urlencode(keyword_params))) # build export def get_table(q): yield table.columns.keys() for row in q: yield row fd, path = tempfile.mkstemp() with os.fdopen(fd, 'wb') as tmpfile: try: tables = [[config.table_id, get_table(q)]] export_from_tables(tables, tmpfile, params.format) except exc.DataError: msg = ugettext_lazy("There was a problem executing your query, " "please make sure your parameters are valid.") return HttpResponse(msg, status=400) return export_response(Temp(path), params.format, config.display_name)
def _check_weekly_results(self): aggregate_table_adapter = IndicatorSqlAdapter( self.weekly_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] week_column = aggregate_table.c['week'] # before december the case should not exist self.assertEqual( 0, aggregate_query.filter(doc_id_column == self.case_id, week_column <= '2017-12-17').count()) # from the monday in december where the case was opened, it case should exist, # but should not be flagged as pregnant for monday in ('2017-12-18', '2017-12-25', '2018-01-01'): row = aggregate_query.filter(doc_id_column == self.case_id, week_column == monday).one() self.assertEqual(self.case_name, row.name) self.assertEqual(1, row.open_in_month) self.assertEqual(0, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # from monday of the EDD the case should exist, and be flagged as pregnant for monday in ('2018-01-15', '2018-01-22', '2018-01-29'): row = aggregate_query.filter( doc_id_column == self.case_id, week_column == monday, ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the monday of the march visit, the should exist, be flagged as pregnant, and there is a form row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-12').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(1, row.fu_forms_in_month) # but the monday after there are no forms again row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-19').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the week of the april 9, the case should exist, be flagged as pregnant, and there are 2 forms row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-04-09').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(2, row.fu_forms_in_month)
def get_choices_from_data_source_column(query_context): adapter = IndicatorSqlAdapter(query_context.report.config) table = adapter.get_table() sql_column = table.c[query_context.report_filter.field] query = adapter.session_helper.Session.query(sql_column) if query_context.query: query = query.filter(sql_column.contains(query_context.query)) try: return [ Choice(v[0], v[0]) for v in query.distinct().order_by(sql_column).limit(query_context.limit).offset(query_context.offset) ] except ProgrammingError: return []
def _check_basic_results(self): aggregate_table_adapter = IndicatorSqlAdapter( self.basic_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] # before december the case should not exist self.assertEqual( 1, aggregate_query.filter(doc_id_column == self.case_id, ).count()) row = aggregate_query.filter(doc_id_column == self.case_id, ).one() self.assertEqual(self.case_name, row.name) self.assertEqual('2018-01-21', row.pregnancy_start_date) self.assertEqual(3, row.fu_forms)
def get_choices(data_source, filter, search_term=None, limit=20, page=0): # todo: we may want to log this as soon as mobile UCR stops hitting this # for misconfigured filters if not isinstance(filter, DynamicChoiceListFilter): return [] adapter = IndicatorSqlAdapter(data_source) table = adapter.get_table() sql_column = table.c[filter.field] query = adapter.session_helper.Session.query(sql_column) if search_term: query = query.filter(sql_column.contains(search_term)) offset = page * limit try: return [v[0] for v in query.distinct().order_by(sql_column).limit(limit).offset(offset)] except ProgrammingError: return []
def _check_basic_results(self): aggregate_table_adapter = IndicatorSqlAdapter(self.basic_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] # before december the case should not exist self.assertEqual(1, aggregate_query.filter( doc_id_column == self.case_id, ).count()) row = aggregate_query.filter( doc_id_column == self.case_id, ).one() self.assertEqual(self.case_name, row.name) self.assertEqual('2018-01-21', row.pregnancy_start_date) self.assertEqual(3, row.fu_forms)
class IndicatorPillowTest(TestCase): def setUp(self): folder = os.path.join(os.path.dirname(__file__), 'data', 'configs') sample_file = os.path.join(folder, 'sample_indicator_config.json') self.pillow = ConfigurableIndicatorPillow() self.engine = self.pillow.get_sql_engine() with open(sample_file) as f: structure = json.loads(f.read()) self.config = DataSourceConfiguration.wrap(structure) self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.engine, self.config) self.adapter.rebuild_table() def tearDown(self): self.adapter.drop_table() self.engine.dispose() def testFilter(self): # note: this is a silly test now that python_filter always returns true not_matching = [ dict(doc_type="NotCommCareCase", domain='user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='not-user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='user-reports', type='not-ticket'), ] for document in not_matching: self.assertTrue(self.pillow.python_filter(document)) self.assertTrue(self.pillow.python_filter( dict(doc_type="CommCareCase", domain='user-reports', type='ticket') )) def testChangeTransport(self): # indicators sample_doc, expected_indicators = get_sample_doc_and_indicators() self.pillow.change_transport(sample_doc) with self.engine.begin() as connection: rows = connection.execute(sqlalchemy.select([self.adapter.get_table()])) self.assertEqual(1, rows.rowcount) row = rows.fetchone() for k, v in row.items(): self.assertEqual(expected_indicators[k], v)
def test_table_population(self): engine = get_engine() adapter = IndicatorSqlAdapter(engine, self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ {"start_time": now, "end_time": now + one_hour, "person": "al"}, {"start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris"}, {"start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie"}, ] doc = _test_doc(form={'time_logs': logs}) # Save this document into the table adapter.save(doc) # Get rows from the table with engine.connect() as connection: rows = connection.execute(adapter.get_table().select()) retrieved_logs = [ { 'start_time': r[3], 'end_time': r[4], 'person': r[5], } for r in rows ] # Clean up engine.dispose() # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def _get_distinct_values(data_source_configuration, column_config, expansion_limit=DEFAULT_MAXIMUM_EXPANSION): """ Return a tuple. The first item is a list of distinct values in the given ExpandedColumn no longer than expansion_limit. The second is a boolean which is True if the number of distinct values in the column is greater than the limit. :param data_source_configuration: :param column_config: :param expansion_limit: :return: """ from corehq.apps.userreports.sql import IndicatorSqlAdapter adapter = IndicatorSqlAdapter(data_source_configuration) too_many_values = False table = adapter.get_table() if not table.exists(bind=adapter.engine): return [], False if column_config.field not in table.c: raise ColumnNotFoundError( _('The column "{}" does not exist in the report source! ' 'Please double check your report configuration.').format( column_config.field)) column = table.c[column_config.field] query = adapter.session_helper.Session.query(column).limit( expansion_limit + 1).distinct() result = query.all() distinct_values = [x[0] for x in result] if len(distinct_values) > expansion_limit: distinct_values = distinct_values[:expansion_limit] too_many_values = True return distinct_values, too_many_values
def table_exists(self): adapter = IndicatorSqlAdapter(get_engine(), self) table = adapter.get_table() return table.exists()
def _check_weekly_results(self): aggregate_table_adapter = IndicatorSqlAdapter(self.weekly_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] week_column = aggregate_table.c['week'] # before december the case should not exist self.assertEqual(0, aggregate_query.filter( doc_id_column == self.case_id, week_column <= '2017-12-17' ).count()) # from the monday in december where the case was opened, it case should exist, # but should not be flagged as pregnant for monday in ('2017-12-18', '2017-12-25', '2018-01-01'): row = aggregate_query.filter( doc_id_column == self.case_id, week_column == monday ).one() self.assertEqual(self.case_name, row.name) self.assertEqual(1, row.open_in_month) self.assertEqual(0, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # from monday of the EDD the case should exist, and be flagged as pregnant for monday in ('2018-01-15', '2018-01-22', '2018-01-29'): row = aggregate_query.filter( doc_id_column == self.case_id, week_column == monday, ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the monday of the march visit, the should exist, be flagged as pregnant, and there is a form row = aggregate_query.filter( doc_id_column == self.case_id, week_column == '2018-03-12' ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(1, row.fu_forms_in_month) # but the monday after there are no forms again row = aggregate_query.filter( doc_id_column == self.case_id, week_column == '2018-03-19' ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the week of the april 9, the case should exist, be flagged as pregnant, and there are 2 forms row = aggregate_query.filter( doc_id_column == self.case_id, week_column == '2018-04-09' ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(2, row.fu_forms_in_month)
class IndicatorPillowTest(TestCase): def setUp(self): self.config = get_sample_data_source() self.pillow = ConfigurableIndicatorPillow() self.engine = self.pillow.get_sql_engine() self.pillow.bootstrap(configs=[self.config]) self.adapter = IndicatorSqlAdapter(self.engine, self.config) self.adapter.rebuild_table() def tearDown(self): self.adapter.drop_table() self.engine.dispose() def test_filter(self): # note: this is a silly test now that python_filter always returns true not_matching = [ dict(doc_type="NotCommCareCase", domain='user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='not-user-reports', type='ticket'), dict(doc_type="CommCareCase", domain='user-reports', type='not-ticket'), ] for document in not_matching: self.assertTrue(self.pillow.python_filter(document)) self.assertTrue(self.pillow.python_filter( dict(doc_type="CommCareCase", domain='user-reports', type='ticket') )) def test_change_transport(self): sample_doc, _ = get_sample_doc_and_indicators() self.pillow.change_transport(sample_doc) self._check_sample_doc_state() def test_rebuild_indicators(self): self.config.save() sample_doc, _ = get_sample_doc_and_indicators() CommCareCase.get_db().save_doc(sample_doc) rebuild_indicators(self.config._id) self._check_sample_doc_state() def test_bad_integer_datatype(self): self.config.save() bad_ints = ['a', '', None] for bad_value in bad_ints: self.pillow.change_transport({ '_id': uuid.uuid4().hex, 'doc_type': 'CommCareCase', 'domain': 'user-reports', 'type': 'ticket', 'priority': bad_value }) # make sure we saved rows to the table for everything with self.engine.begin() as connection: rows = connection.execute(sqlalchemy.select([self.adapter.get_table()])) self.assertEqual(len(bad_ints), rows.rowcount) def _check_sample_doc_state(self): _, expected_indicators = get_sample_doc_and_indicators() with self.engine.begin() as connection: rows = connection.execute(sqlalchemy.select([self.adapter.get_table()])) self.assertEqual(1, rows.rowcount) row = rows.fetchone() for k, v in row.items(): if isinstance(expected_indicators[k], decimal.Decimal): self.assertAlmostEqual(expected_indicators[k], v) else: self.assertEqual(expected_indicators[k], v)