def handle(self, domain, old_data_source_id, new_data_source_id, **options): old_config, _ = get_datasource_config(old_data_source_id, domain) new_config, _ = get_datasource_config(new_data_source_id, domain) assert old_config.referenced_doc_type == new_config.referenced_doc_type old_filter = old_config.get_case_type_or_xmlns_filter() new_filter = new_config.get_case_type_or_xmlns_filter() assert set(old_filter) == set(new_filter) old_adapter = get_indicator_adapter(old_config) new_adapter = get_indicator_adapter(new_config) old_table = old_adapter.get_table() new_table = new_adapter.get_table() assert hasattr(old_table.columns, options['date_column']) column = getattr(old_table.columns, options['date_column']) new_adapter.build_table(initiated_by=options['initiated'], source='migrate_ucr') end_date = date(2016, 1, 1) query = self.insert_query(old_table, new_table, column, end_date=end_date) self.run_query(new_adapter, query) start_date = end_date end_date = end_date + relativedelta(months=1) while start_date < date.today(): query = self.insert_query(old_table, new_table, column, start_date, end_date) self.run_query(new_adapter, query) start_date += relativedelta(months=1) end_date += relativedelta(months=1) query = self.insert_query(old_table, new_table, column, start_date) self.run_query(new_adapter, query)
def setUpClass(cls): super(UCRAggregationTest, cls).setUpClass() # cleanup any previous data cls._cleanup_data() # setup app factory = AppFactory(domain=cls.domain) # parent case module, incl opening child cases of main type m_parent, f_parent = factory.new_basic_module('Parent Module', cls.parent_case_type) factory.form_opens_case(f_parent, case_type=cls.parent_case_type) factory.form_opens_case(f_parent, case_type=cls.case_type, is_subcase=True) # main module m0, f0 = factory.new_basic_module('A Module', cls.case_type) f1 = factory.new_form(m0) f1.source = cls._get_xform() factory.form_requires_case(f1, case_type=cls.case_type, update={ cp[0]: '/data/{}'.format(cp[0]) for cp in cls.case_properties }) cls.followup_form = f1 cls.app = factory.app cls.app.save() # create form and case ucrs cls.form_data_source = get_form_data_source(cls.app, cls.followup_form) cls.case_data_source = get_case_data_source(cls.app, cls.case_type) cls.parent_case_data_source = get_case_data_source(cls.app, cls.parent_case_type) # create some data - first just create the case cls.parent_case_id = cls._create_parent_case(cls.parent_name) cls.case_id = cls._create_case(cls.parent_case_id) for fu_date in cls.fu_visit_dates: cls._submit_followup_form(cls.case_id, received_on=fu_date) # the closed case causes there to be some data with an end_column cls.closed_case_id = cls._create_closed_case() # populate the UCRs with the data we just created cls.form_adapter = get_indicator_adapter(cls.form_data_source) cls.case_adapter = get_indicator_adapter(cls.case_data_source) cls.parent_case_adapter = get_indicator_adapter(cls.parent_case_data_source) cls.form_adapter.rebuild_table() cls.case_adapter.rebuild_table() cls.parent_case_adapter.rebuild_table() _iteratively_build_table(cls.form_data_source) _iteratively_build_table(cls.case_data_source) _iteratively_build_table(cls.parent_case_data_source) # setup AggregateTableDefinition cls.monthly_aggregate_table_definition = cls._get_monthly_aggregate_table_definition() cls.weekly_aggregate_table_definition = cls._get_weekly_aggregate_table_definition() cls.basic_aggregate_table_definition = cls._get_basic_aggregate_table_definition() # and adapter cls.monthly_adapter = get_indicator_adapter(cls.monthly_aggregate_table_definition)
def handle(self, domain, type_, case_type_or_xmlns, data_source_ids, **options): assert type_ in ('xform', 'case') self.referenced_type = CASE_DOC_TYPE if type_ == 'case' else XFORM_DOC_TYPE configs = [] for data_source_id in data_source_ids: config, _ = get_datasource_config(data_source_id, domain) assert config.asynchronous assert config.referenced_doc_type == self.referenced_type configs.append(config) for config in configs: adapter = get_indicator_adapter(config) adapter.build_table(initiated_by=options['initiated'], source='async_rebuild_table') self.domain = domain self.case_type_or_xmlns = case_type_or_xmlns self.bulk = options['bulk'] self.database = options['database'] self.config_ids = [config._id for config in configs] ids = [] for id_ in self._get_ids_to_process(): ids.append(id_) if len(ids) > 999: self._save_ids(ids) ids = [] self._save_ids(ids) for config in configs: if not config.is_static: config.meta.build.rebuilt_asynchronously = True config.save()
def recalculate_stagnant_cases(): domain = 'icds-cas' config_ids = [ 'static-icds-cas-static-ccs_record_cases_monthly_v2', 'static-icds-cas-static-ccs_record_cases_monthly_tableau_v2', 'static-icds-cas-static-child_cases_monthly_v2', ] stagnant_cases = set() for config_id in config_ids: config, is_static = get_datasource_config(config_id, domain) adapter = get_indicator_adapter(config) case_ids = _find_stagnant_cases(adapter) celery_task_logger.info( "Found {} stagnant cases in config {}".format(len(case_ids), config_id) ) stagnant_cases = stagnant_cases.union(set(case_ids)) celery_task_logger.info( "Total number of stagant cases is now {}".format(len(stagnant_cases)) ) case_accessor = CaseAccessors(domain) num_stagnant_cases = len(stagnant_cases) current_case_num = 0 for case_ids in chunked(stagnant_cases, 1000): current_case_num += len(case_ids) cases = case_accessor.get_cases(list(case_ids)) for case in cases: publish_case_saved(case, send_post_save_signal=False) celery_task_logger.info( "Resaved {} / {} cases".format(current_case_num, num_stagnant_cases) )
def test_array_type_column(self): problem_spec = { "column_id": "referral_health_problem", "datatype": "array", "type": "expression", "expression": { "type": "split_string", "string_expression": { "type": "property_name", "property_name": "referral_health_problem", } }, } data_source_config = DataSourceConfiguration( domain='test', display_name='foo', referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[problem_spec], ) adapter = get_indicator_adapter(data_source_config) adapter.rebuild_table() self.addCleanup(adapter.drop_table) # ensure we can save data to the table. adapter.save({ '_id': uuid.uuid4().hex, 'domain': 'test', 'doc_type': 'CommCareCase', 'referral_health_problem': 'bleeding convulsions', }) # and query it back qs = adapter.get_query_object() self.assertEqual(1, qs.count()) self.assertEqual(qs.first().referral_health_problem, ['bleeding', 'convulsions'])
def _setup_ucr_tables(): with mock.patch('corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider'): with override_settings(SERVER_ENVIRONMENT=TEST_ENVIRONMENT): configs = StaticDataSourceConfiguration.by_domain(TEST_DOMAIN) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine('aaa-data') metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) for file_name in os.listdir(INPUT_PATH): with open(os.path.join(INPUT_PATH, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', columns=columns )
def handle(self, domain, data_source_id, *args, **kwargs): config, _ = get_datasource_config(data_source_id, domain) adapter = get_indicator_adapter(config) q = adapter.get_query_object() document_store = get_document_store_for_doc_type(domain, config.referenced_doc_type) bad_rows = [] for row in with_progress_bar(q, length=q.count()): doc_id = row.doc_id doc = document_store.get_document(doc_id) current_rows = config.get_all_values(doc) if len(current_rows) > 1: raise ValueError("this command doesn't work for datasources returning multiple rows per doc") try: current_row = current_rows[0] except KeyError: continue # don't compare the 'inserted_at' columns current_row = [val for val in current_row if val.column.database_column_name != 'inserted_at'] for val in current_row: try: inserted_value = getattr(row, val.column.database_column_name) if (inserted_value != val.value or row.inserted_at.replace(tzinfo=pytz.utc) < parse_datetime(doc['server_modified_on'])): bad_rows.append({ 'doc_id': row.doc_id, 'column_name': val.column.database_column_name, 'inserted_at': row.inserted_at.isoformat(), 'server_modified_on': doc['server_modified_on'], 'stored_value': getattr(row, val.column.database_column_name), 'desired_value': val.value, 'message': ('column mismatch' if inserted_value != val.value else "modified date early"), }) except AttributeError: bad_rows.append({ 'doc_id': row.doc_id, 'column_name': val.column.database_column_name, 'inserted_at': 'missing', 'server_modified_on': doc['server_modified_on'], 'stored_value': 'missing', 'desired_value': val.value, 'message': 'doc missing', }) filename = 'datasource_mismatches_{}_{}.csv'.format( data_source_id[-8:], datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S") ) with open(filename, 'w', encoding='utf-8') as f: headers = ['doc_id', 'column_name', 'inserted_at', 'server_modified_on', 'stored_value', 'desired_value', 'message'] writer = csv.DictWriter(f, headers) writer.writeheader() writer.writerows(bad_rows) print("Found {} mismatches. Check {} for more details".format(len(bad_rows), filename))
def test_add_nullable_column(self): self._setup_data_source('add_nullable_col') # assert new date isn't in the config insp = reflection.Inspector.from_engine(self.engine) table_name = get_table_name(self.config.domain, self.config.table_id) self.assertEqual( len([c for c in insp.get_columns(table_name) if c['name'] == 'new_date']), 0 ) # add the column to the config config = self._get_config('add_nullable_col') self.addCleanup(config.delete) config.configured_indicators.append({ "column_id": "new_date", "type": "raw", "display_name": "new_date opened", "datatype": "datetime", "property_name": "other_opened_on", "is_nullable": True }) config.save() adapter = get_indicator_adapter(config) engine = adapter.engine # mock rebuild table to ensure the column is added without rebuild table pillow = get_case_pillow(ucr_configs=[config]) pillow.processors[0].rebuild_table = mock.MagicMock() self.assertFalse(pillow.processors[0].rebuild_table.called) insp = reflection.Inspector.from_engine(engine) self.assertEqual( len([c for c in insp.get_columns(table_name) if c['name'] == 'new_date']), 1 )
def _init_table(cls, data_source_id): datasource_id = StaticDataSourceConfiguration.get_doc_id(cls.domain, data_source_id) datasource = StaticDataSourceConfiguration.by_id(datasource_id) adapter = get_indicator_adapter(datasource) adapter.build_table() cls.adapters.append(adapter) return adapter
def _rebuild_sql_tables(self, adapters): # todo move this code to sql adapter rebuild_if_necessary tables_by_engine = defaultdict(dict) for adapter in adapters: sql_adapter = get_indicator_adapter(adapter.config) tables_by_engine[sql_adapter.engine_id][sql_adapter.get_table().name] = sql_adapter _assert = soft_assert(to='@'.join(['czue', 'dimagi.com'])) _notify_cory = lambda msg, obj: _assert(False, msg, obj) for engine_id, table_map in tables_by_engine.items(): engine = connection_manager.get_engine(engine_id) with engine.begin() as connection: migration_context = get_migration_context(connection, table_map.keys()) raw_diffs = compare_metadata(migration_context, metadata) diffs = reformat_alembic_diffs(raw_diffs) tables_to_rebuild = get_tables_to_rebuild(diffs, table_map.keys()) for table_name in tables_to_rebuild: sql_adapter = table_map[table_name] if not sql_adapter.config.is_static: try: rev_before_rebuild = sql_adapter.config.get_db().get_rev(sql_adapter.config._id) self.rebuild_table(sql_adapter) except TableRebuildError, e: _notify_cory(unicode(e), sql_adapter.config.to_json()) else: self.rebuild_table(sql_adapter)
def setUpModule(): if isinstance(Domain.get_db(), Mock): # needed to skip setUp for javascript tests thread on Travis return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('champ-cameroon') with override_settings(SERVER_ENVIRONMENT='production'): configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', header=True ) _call_center_domain_mock.stop()
def tearDownModule(): if settings.USE_PARTITIONED_DATABASE: return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() with override_settings(SERVER_ENVIRONMENT='icds'): configs = StaticDataSourceConfiguration.by_domain('icds-cas') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: if adapter.config.table_id == 'static-child_health_cases': # hack because this is in a migration adapter.clear_table() continue adapter.drop_table() engine = connection_manager.get_engine(ICDS_UCR_ENGINE_ID) with engine.begin() as connection: metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) table = metadata.tables['ucr_table_name_mapping'] delete = table.delete() connection.execute(delete) LocationType.objects.filter(domain='icds-cas').delete() SQLLocation.objects.filter(domain='icds-cas').delete() Domain.get_by_name('icds-cas').delete() _call_center_domain_mock.stop()
def _rebuild_sql_tables(self, adapters): tables_by_engine = defaultdict(dict) for adapter in adapters: sql_adapter = get_indicator_adapter(adapter.config) try: tables_by_engine[sql_adapter.engine_id][sql_adapter.get_table().name] = sql_adapter except BadSpecError: _soft_assert = soft_assert(to='{}@{}'.format('jemord', 'dimagi.com')) _soft_assert(False, "Broken data source {}".format(adapter.config.get_id)) _assert = soft_assert(notify_admins=True) _notify_rebuild = lambda msg, obj: _assert(False, msg, obj) for engine_id, table_map in tables_by_engine.items(): engine = connection_manager.get_engine(engine_id) table_names = list(table_map) with engine.begin() as connection: migration_context = get_migration_context(connection, table_names) raw_diffs = compare_metadata(migration_context, metadata) diffs = reformat_alembic_diffs(raw_diffs) tables_to_rebuild = get_tables_to_rebuild(diffs, table_names) for table_name in tables_to_rebuild: sql_adapter = table_map[table_name] if not sql_adapter.config.is_static: try: self.rebuild_table(sql_adapter) except TableRebuildError as e: _notify_rebuild(six.text_type(e), sql_adapter.config.to_json()) else: self.rebuild_table(sql_adapter) tables_to_migrate = get_tables_to_migrate(diffs, table_names) tables_to_migrate -= tables_to_rebuild migrate_tables(engine, raw_diffs, tables_to_migrate)
def test_column_uniqueness_when_truncated(self): problem_spec = { "display_name": "practicing_lessons", "property_name": "long_column", "choices": [ "duplicate_choice_1", "duplicate_choice_2", ], "select_style": "multiple", "column_id": "a_very_long_base_selection_column_name_with_limited_room", "type": "choice_list", } data_source_config = DataSourceConfiguration( domain='test', display_name='foo', referenced_doc_type='CommCareCase', table_id=uuid.uuid4().hex, configured_filter={}, configured_indicators=[problem_spec], ) adapter = get_indicator_adapter(data_source_config) adapter.rebuild_table() # ensure we can save data to the table. adapter.save({ '_id': uuid.uuid4().hex, 'domain': 'test', 'doc_type': 'CommCareCase', 'long_column': 'duplicate_choice_1', }) adapter.refresh_table() # and query it back q = adapter.get_query_object() self.assertEqual(1, q.count())
def test_table_population(self): adapter = get_indicator_adapter(self.config) # Delete and create table adapter.rebuild_table() # Create a doc now = datetime.datetime.now() one_hour = datetime.timedelta(hours=1) logs = [ {"start_time": now, "end_time": now + one_hour, "person": "al"}, {"start_time": now + one_hour, "end_time": now + (one_hour * 2), "person": "chris"}, {"start_time": now + (one_hour * 2), "end_time": now + (one_hour * 3), "person": "katie"}, ] doc = _test_doc(form={'time_logs': logs}) # Save this document into the table adapter.save(doc) adapter.refresh_table() # Get rows from the table rows = adapter.get_query_object() retrieved_logs = [ { 'start_time': r.start_time, 'end_time': r.end_time, 'person': r.person, } for r in rows ] # Check those rows against the expected result self.assertItemsEqual( retrieved_logs, logs, "The repeat data saved in the data source table did not match the expected data!" )
def test_stale_rebuild(self): # rebuild indicators in another test will save this later_config = DataSourceConfiguration.get(self.config._id) later_config.save() self.assertNotEqual(self.config._rev, later_config._rev) with self.assertRaises(StaleRebuildError): self.pillow.processors[0].rebuild_table(get_indicator_adapter(self.config))
def setUp(self): self.config = get_data_source_with_related_doc_type() self.config.save() self.pillow = get_case_pillow(topics=['case-sql'], ucr_configs=[self.config], processor_chunk_size=0) self.adapter = get_indicator_adapter(self.config) self.pillow.get_change_feed().get_latest_offsets()
def handle(self, file_path, *args, **options): domain = 'icds-cas' data_source_id = StaticDataSourceConfiguration.get_doc_id(domain, PERSON_TABLE_ID) config = StaticDataSourceConfiguration.by_id(data_source_id) adapter = get_indicator_adapter(config) session_helper = connection_manager.get_session_helper(adapter.engine_id) person_table_name = get_table_name(domain, PERSON_TABLE_ID) awc_location_table_name = get_table_name(domain, AWC_LOCATION_TABLE_ID) session = session_helper.Session with open( os.path.join(os.path.dirname(__file__), 'sql_scripts', 'nos_of_deaths.sql'), encoding='utf-8' ) as f: sql_script = f.read() rows = session.execute( sql_script % { 'person_table_name': person_table_name, 'awc_location_table_name': awc_location_table_name } ) with open(file_path, 'w', encoding='utf-8') as file_object: writer = csv.writer(file_object) writer.writerow([ 'State', 'District', 'AWC', 'Month', 'Deaths', ]) writer.writerows(rows)
def _create_data_source(cls): cls.data_sources = {} cls.adapters = {} for backend_id in UCR_BACKENDS: config = DataSourceConfiguration( backend_id=backend_id, domain=cls.domain, display_name=cls.domain, referenced_doc_type='CommCareCase', table_id="foo", configured_filter={ "type": "boolean_expression", "operator": "eq", "expression": { "type": "property_name", "property_name": "type" }, "property_value": cls.case_type, }, configured_indicators=[ { "type": "expression", "expression": { "type": "property_name", "property_name": 'state' }, "column_id": 'indicator_col_id_state', "display_name": 'indicator_display_name_state', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'city' }, "column_id": 'indicator_col_id_city', "display_name": 'indicator_display_name_city', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'number' }, "column_id": 'indicator_col_id_number', "datatype": "integer" }, ], ) config.validate() config.save() rebuild_indicators(config._id) adapter = get_indicator_adapter(config) adapter.refresh_table() cls.data_sources[backend_id] = config cls.adapters[backend_id] = adapter
def assertDataSourceAccurate(self, expected_locations): adapter = get_indicator_adapter(self.data_source_config) query = adapter.get_query_object() data_source = query.all() self.assertItemsEqual( expected_locations, [row[-1] for row in data_source] )
def setUpClass(cls): super(ChunkedUCRProcessorTest, cls).setUpClass() cls.config = get_sample_data_source() cls.config.save() cls.adapter = get_indicator_adapter(cls.config) cls.adapter.build_table() cls.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886) cls.pillow = get_case_pillow(processor_chunk_size=100, ucr_configs=[cls.config])
def setUpClass(cls): super(AsyncIndicatorTest, cls).setUpClass() cls.config = get_data_source_with_related_doc_type() cls.config.asynchronous = True cls.config.save() cls.adapter = get_indicator_adapter(cls.config) cls.pillow = get_case_pillow(ucr_configs=[cls.config]) cls.pillow.get_change_feed().get_latest_offsets()
def setUpClass(cls): super(IndicatorPillowTest, cls).setUpClass() cls.config = get_sample_data_source() cls.config.save() cls.adapter = get_indicator_adapter(cls.config) cls.adapter.build_table() cls.fake_time_now = datetime(2015, 4, 24, 12, 30, 8, 24886) cls.pillow = _get_pillow([cls.config])
def setUpClass(cls): super(DataSourceConfigurationPartitionTest, cls).setUpClass() cls.data_source = get_sample_data_source() cls.data_source.sql_settings.partition_config = [ SQLPartition(column=cls.column, subtype=cls.subtype, constraint=cls.constraint) ] cls.data_source.save() cls.adapter = get_indicator_adapter(cls.data_source) cls.adapter.build_table()
def bootstrap(self, configs=None): # sets up the initial stuff if configs is None: configs = self.get_all_configs() self.table_adapters = [get_indicator_adapter(config, can_handle_laboratory=True) for config in configs] self.rebuild_tables_if_necessary() self.bootstrapped = True self.last_bootstrapped = datetime.utcnow()
def delete_all_ucr_tables_for_domain(domain): """ For a given domain, delete all the known UCR data source tables This only deletes "known" data sources for the domain. To identify "orphaned" tables, see the prune_old_datasources management command. """ for config in get_datasources_for_domain(domain): adapter = get_indicator_adapter(config) adapter.drop_table()
def delete_data_source_shared(domain, config_id, request=None): config = get_document_or_404(DataSourceConfiguration, domain, config_id) adapter = get_indicator_adapter(config) adapter.drop_table() config.delete() if request: messages.success( request, _(u'Data source "{}" has been deleted.'.format(config.display_name)) )
def page_context(self): config, is_static = get_datasource_config_or_404(self.config_id, self.domain) adapter = get_indicator_adapter(config) q = adapter.get_query_object() return { 'data_source': config, 'columns': q.column_descriptions, 'data': q[:20], 'total_rows': q.count(), }
def _create_data_source(cls): cls.data_sources = {} cls.adapters = {} config = DataSourceConfiguration( domain=cls.domain, display_name=cls.domain, referenced_doc_type='CommCareCase', table_id="foo", configured_filter={ "type": "boolean_expression", "operator": "eq", "expression": { "type": "property_name", "property_name": "type" }, "property_value": cls.case_type, }, configured_indicators=[ { "type": "expression", "expression": { "type": "property_name", "property_name": 'my_date' }, "column_id": 'date_as_string', "display_name": 'date_as_string', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'my_date' }, "column_id": 'date_as_date', "datatype": "date" }, { "type": "expression", "expression": { "type": "property_name", "property_name": "my_datetime", }, "column_id": "datetime_as_datetime", "datatype": "datetime" } ], ) config.validate() config.save() rebuild_indicators(config._id) adapter = get_indicator_adapter(config) cls.data_sources[UCR_SQL_BACKEND] = config cls.adapters[UCR_SQL_BACKEND] = adapter
def rebuild_aggregate_ucr(request, domain, table_id): table_definition = get_object_or_404(AggregateTableDefinition, domain=domain, table_id=table_id) aggregate_table_adapter = get_indicator_adapter(table_definition) aggregate_table_adapter.rebuild_table(initiated_by=request.user.username, source='rebuild_aggregate_ucr') populate_aggregate_table_data_task.delay(table_definition.id) messages.success(request, 'Table rebuild successfully started.') return HttpResponseRedirect( reverse(AggregateUCRView.urlname, args=[domain, table_id]))
def setUpClass(cls): super(AsyncIndicatorTest, cls).setUpClass() cls.pillow = get_kafka_ucr_pillow() cls.config = get_data_source_with_related_doc_type() cls.config.asynchronous = True cls.config.save() cls.adapter = get_indicator_adapter(cls.config) cls.pillow.bootstrap(configs=[cls.config]) with trap_extra_setup(KafkaUnavailableError): cls.pillow.get_change_feed().get_latest_offsets()
def _save_document_helper(indicator, doc): eval_context = EvaluationContext(doc) something_failed = False configs_to_remove = [] configs = dict() for config_id in indicator.indicator_config_ids: try: configs[config_id] = _get_config(config_id) except (ResourceNotFound, StaticDataSourceConfigurationNotFoundError): celery_task_logger.info( "{} no longer exists, skipping".format(config_id)) configs_to_remove.append(config_id) continue except ESError: celery_task_logger.info( "ES errored when trying to retrieve config") something_failed = True continue for config_id, config in six.iteritems(configs): adapter = None try: adapter = get_indicator_adapter(config, can_handle_laboratory=True) adapter.save(doc, eval_context) eval_context.reset_iteration() except (ProtocolError, ReadTimeout): celery_task_logger.info( "Riak error when saving config: {}".format(config_id)) something_failed = True except RequestError: celery_task_logger.info( "Couch error when saving config: {}".format(config_id)) something_failed = True except (ESError, ConnectionTimeout): # a database had an issue so log it and go on to the next document celery_task_logger.info( "ES error when saving config: {}".format(config_id)) something_failed = True except (DatabaseError, InternalError): # a database had an issue so log it and go on to the next document celery_task_logger.info( "psql error when saving config: {}".format(config_id)) something_failed = True except Exception as e: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, e) something_failed = True else: configs_to_remove.append(config_id) rebuild_related_docs = any(config.icds_rebuild_related_docs for config in six.itervalues(configs) if config) return (not something_failed, configs_to_remove, rebuild_related_docs)
def page_context(self): config, is_static = get_datasource_config_or_404( self.config_id, self.domain) adapter = get_indicator_adapter(config) q = adapter.get_query_object() return { 'data_source': config, 'columns': q.column_descriptions, 'data': q[:20], 'total_rows': q.count(), }
def _build_indicators(config, document_store, relevant_ids): adapter = get_indicator_adapter(config, raise_errors=True) for doc in document_store.iter_documents(relevant_ids): if config.asynchronous: AsyncIndicator.update_record(doc.get('_id'), config.referenced_doc_type, config.domain, [config._id]) else: # save is a noop if the filter doesn't match adapter.best_effort_save(doc)
def _iteratively_build_table(config, resume_helper=None, in_place=False, limit=-1): resume_helper = resume_helper or DataSourceResumeHelper(config) indicator_config_id = config._id case_type_or_xmlns_list = config.get_case_type_or_xmlns_filter() completed_ct_xmlns = resume_helper.get_completed_case_type_or_xmlns() if completed_ct_xmlns: case_type_or_xmlns_list = [ case_type_or_xmlns for case_type_or_xmlns in case_type_or_xmlns_list if case_type_or_xmlns not in completed_ct_xmlns ] for case_type_or_xmlns in case_type_or_xmlns_list: relevant_ids = [] document_store = get_document_store_for_doc_type( config.domain, config.referenced_doc_type, case_type_or_xmlns=case_type_or_xmlns) for i, relevant_id in enumerate(document_store.iter_document_ids()): if i >= limit > -1: break relevant_ids.append(relevant_id) if len(relevant_ids) >= ID_CHUNK_SIZE: _build_indicators(config, document_store, relevant_ids) relevant_ids = [] if relevant_ids: _build_indicators(config, document_store, relevant_ids) resume_helper.add_completed_case_type_or_xmlns(case_type_or_xmlns) resume_helper.clear_resume_info() if not id_is_static(indicator_config_id): if in_place: config.meta.build.finished_in_place = True else: config.meta.build.finished = True try: config.save() except ResourceConflict: current_config = DataSourceConfiguration.get(config._id) # check that a new build has not yet started if in_place: if config.meta.build.initiated_in_place == current_config.meta.build.initiated_in_place: current_config.meta.build.finished_in_place = True else: if config.meta.build.initiated == current_config.meta.build.initiated: current_config.meta.build.finished = True current_config.save() adapter = get_indicator_adapter(config, raise_errors=True)
def _build_indicators(config, document_store, relevant_ids): adapter = get_indicator_adapter(config, raise_errors=True, load_source='build_indicators') for doc in document_store.iter_documents(relevant_ids): if config.asynchronous: AsyncIndicator.update_record( doc.get('_id'), config.referenced_doc_type, config.domain, [config._id] ) else: # save is a noop if the filter doesn't match adapter.best_effort_save(doc)
def _check_weekly_results(self): aggregate_table_adapter = get_indicator_adapter( self.weekly_aggregate_table_definition) aggregate_table = aggregate_table_adapter.get_table() aggregate_query = aggregate_table_adapter.get_query_object() doc_id_column = aggregate_table.c['doc_id'] week_column = aggregate_table.c['week'] # before december the case should not exist self.assertEqual( 0, aggregate_query.filter(doc_id_column == self.case_id, week_column <= '2017-12-17').count()) # from the monday in december where the case was opened, it case should exist, # but should not be flagged as pregnant for monday in ('2017-12-18', '2017-12-25', '2018-01-01'): row = aggregate_query.filter(doc_id_column == self.case_id, week_column == monday).one() self.assertEqual(self.case_name, row.name) self.assertEqual(1, row.open_in_month) self.assertEqual(0, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # from monday of the EDD the case should exist, and be flagged as pregnant for monday in ('2018-01-15', '2018-01-22', '2018-01-29'): row = aggregate_query.filter( doc_id_column == self.case_id, week_column == monday, ).one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the monday of the march visit, the should exist, be flagged as pregnant, and there is a form row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-12').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(1, row.fu_forms_in_month) # but the monday after there are no forms again row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-03-19').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(None, row.fu_forms_in_month) # the week of the april 9, the case should exist, be flagged as pregnant, and there are 2 forms row = aggregate_query.filter(doc_id_column == self.case_id, week_column == '2018-04-09').one() self.assertEqual(1, row.open_in_month) self.assertEqual(1, row.pregnant_in_month) self.assertEqual(2, row.fu_forms_in_month)
def setUpClass(cls): super(DataSourceConfigurationPartitionTest, cls).setUpClass() cls.data_source = get_sample_data_source() cls.data_source.sql_settings.partition_config = [ SQLPartition(column=cls.column, subtype=cls.subtype, constraint=cls.constraint) ] cls.data_source.save() cls.adapter = get_indicator_adapter(cls.data_source) cls.adapter.build_table()
def test_weekly_aggregation(self): # generate our table aggregate_table_adapter = get_indicator_adapter(self.weekly_aggregate_table_definition) aggregate_table_adapter.rebuild_table() populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results() # confirm it's also idempotent populate_aggregate_table_data(aggregate_table_adapter) self._check_weekly_results()
def _get_engine_ids(self, data_sources, engine_id): engine_ids = set() for data_source in data_sources: if engine_id and data_source.engine_id != engine_id: continue # Magic: getting the table adds the table to the global sqlalchemy metadata object adapter = get_indicator_adapter(data_source) adapter.get_table() engine_ids.add(adapter.engine_id) return engine_ids
def handle(self, *args, **options): fake_change_doc = {'doc_type': CASE_DOC_TYPE, 'domain': DOMAIN} for data_source_id in DATA_SOURCES: print("processing data source %s" % data_source_id) data_source, is_static = get_datasource_config(data_source_id, DOMAIN) assert is_static adapter = get_indicator_adapter(data_source) table = adapter.get_table() for case_id in self._get_case_ids_to_process(adapter, table, data_source_id): change = FakeChange(case_id, fake_change_doc) AsyncIndicator.update_from_kafka_change(change, [data_source_id])
def test_raise_error_for_missing_table(self): adapter = get_indicator_adapter(self.config, raise_errors=True) adapter.drop_table() doc = { "_id": '123', "domain": "domain", "doc_type": "CommCareCase", "name": 'bob' } with self.assertRaises(TableNotFoundWarning): adapter.best_effort_save(doc)
def _create_data_source(cls): cls.data_source = DataSourceConfiguration( domain=cls.domain, display_name=cls.domain, referenced_doc_type='CommCareCase', table_id="foo", configured_filter={ "type": "boolean_expression", "operator": "eq", "expression": { "type": "property_name", "property_name": "type" }, "property_value": cls.case_type, }, configured_indicators=[ { "type": "expression", "expression": { "type": "property_name", "property_name": 'state' }, "column_id": 'indicator_col_id_state', "display_name": 'indicator_display_name_state', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'city' }, "column_id": 'indicator_col_id_city', "display_name": 'indicator_display_name_city', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'number' }, "column_id": 'indicator_col_id_number', "datatype": "integer" }, ], ) cls.data_source.validate() cls.data_source.save() rebuild_indicators(cls.data_source._id) adapter = get_indicator_adapter(cls.data_source) adapter.refresh_table() cls.adapter = adapter
def bootstrap(self, configs=None): # sets up the initial stuff if configs is None: configs = self.get_all_configs() self.table_adapters_by_domain = defaultdict(list) for config in configs: self.table_adapters_by_domain[config.domain].append( get_indicator_adapter(config, can_handle_laboratory=True)) self.rebuild_tables_if_necessary() self.bootstrapped = True self.last_bootstrapped = datetime.utcnow()
def _build_indicators(config, document_store, relevant_ids, resume_helper): adapter = get_indicator_adapter(config, raise_errors=True, can_handle_laboratory=True) last_id = None for doc in document_store.iter_documents(relevant_ids): # save is a noop if the filter doesn't match adapter.best_effort_save(doc) last_id = doc.get('_id') resume_helper.remove_id(last_id) if last_id: resume_helper.add_id(last_id)
def handle(self, domain, old_data_source_id, new_data_source_id, **options): old_config, _ = get_datasource_config(old_data_source_id, domain) new_config, _ = get_datasource_config(new_data_source_id, domain) assert old_config.referenced_doc_type == new_config.referenced_doc_type old_filter = old_config.get_case_type_or_xmlns_filter() new_filter = new_config.get_case_type_or_xmlns_filter() assert set(old_filter) == set(new_filter) old_adapter = get_indicator_adapter(old_config) new_adapter = get_indicator_adapter(new_config) old_table = old_adapter.get_table() new_table = new_adapter.get_table() assert hasattr(old_table.columns, options['date_column']) column = getattr(old_table.columns, options['date_column']) new_adapter.build_table() end_date = date(2016, 1, 1) query = self.insert_query(old_table, new_table, column, end_date=end_date) self.run_query(new_adapter, query) start_date = end_date end_date = end_date + relativedelta(months=1) while start_date < date.today(): query = self.insert_query(old_table, new_table, column, start_date, end_date) self.run_query(new_adapter, query) start_date += relativedelta(months=1) end_date += relativedelta(months=1) query = self.insert_query(old_table, new_table, column, start_date) self.run_query(new_adapter, query)
def _create_data_source(cls): cls.data_sources = {} cls.adapters = {} config = DataSourceConfiguration( domain=cls.domain, display_name=cls.domain, referenced_doc_type='CommCareCase', table_id="foo", configured_filter={ "type": "boolean_expression", "operator": "eq", "expression": { "type": "property_name", "property_name": "type" }, "property_value": cls.case_type, }, configured_indicators=[{ "type": "expression", "expression": { "type": "property_name", "property_name": 'my_date' }, "column_id": 'date_as_string', "display_name": 'date_as_string', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'my_date' }, "column_id": 'date_as_date', "datatype": "date" }, { "type": "expression", "expression": { "type": "property_name", "property_name": "my_datetime", }, "column_id": "datetime_as_datetime", "datatype": "datetime" }], ) config.validate() config.save() rebuild_indicators(config._id) adapter = get_indicator_adapter(config) adapter.refresh_table() cls.data_sources[UCR_SQL_BACKEND] = config cls.adapters[UCR_SQL_BACKEND] = adapter
def resume_building_indicators(indicator_config_id, initiated_by=None): config = _get_config_by_id(indicator_config_id) success = _('Your UCR table {} has finished rebuilding in {}').format(config.table_id, config.domain) failure = _('There was an error rebuilding Your UCR table {} in {}.').format(config.table_id, config.domain) send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by) with notify_someone(initiated_by, success_message=success, error_message=failure, send=send): resume_helper = DataSourceResumeHelper(config) adapter = get_indicator_adapter(config) adapter.log_table_build( initiated_by=initiated_by, source='resume_building_indicators', ) _iteratively_build_table(config, resume_helper)
def _create_data_source(cls): cls.data_sources = {} cls.adapters = {} # this is a hack to have both sql and es backends created in a class # method. alternative would be to have these created on each test run for backend_id in UCR_BACKENDS: config = DataSourceConfiguration( backend_id=backend_id, domain=cls.domain, display_name=cls.domain, referenced_doc_type='CommCareCase', table_id="foo", configured_filter={ "type": "boolean_expression", "operator": "eq", "expression": { "type": "property_name", "property_name": "type" }, "property_value": cls.case_type, }, configured_indicators=[ { "type": "expression", "expression": { "type": "property_name", "property_name": 'first_name' }, "column_id": 'indicator_col_id_first_name', "display_name": 'indicator_display_name_first_name', "datatype": "string" }, { "type": "expression", "expression": { "type": "property_name", "property_name": 'number' }, "column_id": 'indicator_col_id_number', "datatype": "integer" }, ], ) config.validate() config.save() rebuild_indicators(config._id) adapter = get_indicator_adapter(config) adapter.refresh_table() cls.data_sources[backend_id] = config cls.adapters[backend_id] = adapter
def save_document(doc_ids): lock_keys = [] for doc_id in doc_ids: lock_keys.append(get_async_indicator_modify_lock_key(doc_id)) with CriticalSection(lock_keys): indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids) if not indicators: return first_indicator = indicators[0] processed_indicators = [] failed_indicators = [] for i in indicators: assert i.domain == first_indicator.domain assert i.doc_type == first_indicator.doc_type indicator_by_doc_id = {i.doc_id: i for i in indicators} doc_store = get_document_store(first_indicator.domain, first_indicator.doc_type) for doc in doc_store.iter_documents(doc_ids): indicator = indicator_by_doc_id[doc['_id']] eval_context = EvaluationContext(doc) for config_id in indicator.indicator_config_ids: adapter = None try: config = _get_config(config_id) adapter = get_indicator_adapter(config, can_handle_laboratory=True) adapter.save(doc, eval_context) eval_context.reset_iteration() except (ESError, RequestError, ConnectionTimeout): # couch or es had an issue so don't log it and go on to the next doc failed_indicators.append(indicator.pk) break except Exception as e: # getting the config could fail before the adapter is set if adapter: adapter.handle_exception(doc, e) failed_indicators.append(indicator.pk) break else: processed_indicators.append(indicator.pk) AsyncIndicator.objects.filter(pk__in=processed_indicators).delete() AsyncIndicator.objects.filter(pk__in=failed_indicators).update( date_queued=None, unsuccessful_attempts=F('unsuccessful_attempts') + 1)
def setUp(self): config1 = get_data_source_with_related_doc_type() config1.save() config2 = get_data_source_with_related_doc_type() config2.table_id = 'other-config' config2.save() self.configs = [config1, config2] self.adapters = [get_indicator_adapter(c) for c in self.configs] # one pillow that has one config, the other has both configs self.pillow1 = get_case_pillow(topics=['case-sql'], ucr_configs=[config1], processor_chunk_size=0) self.pillow2 = get_case_pillow(topics=['case-sql'], ucr_configs=self.configs, processor_chunk_size=0) self.pillow1.get_change_feed().get_latest_offsets()
def get_distinct_values(data_source_configuration, column_config, expansion_limit=DEFAULT_MAXIMUM_EXPANSION): """ Return a tuple. The first item is a list of distinct values in the given ExpandedColumn no longer than expansion_limit. The second is a boolean which is True if the number of distinct values in the column is greater than the limit. :param data_source_configuration: :param column_config: :param expansion_limit: :return: """ adapter = get_indicator_adapter(data_source_configuration) return adapter.get_distinct_values(column_config.field, expansion_limit)
def tearDownModule(): _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() configs = StaticDataSourceConfiguration.by_domain('champ-cameroon') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.drop_table() Domain.get_by_name('champ-cameroon').delete() _call_center_domain_mock.stop()
def setUp(self): config1 = get_data_source_with_related_doc_type() config1.save() config2 = get_data_source_with_related_doc_type() config2.table_id = 'other-config' config2.save() self.configs = [config1, config2] self.adapters = [get_indicator_adapter(c) for c in self.configs] # one pillow that has one config, the other has both configs self.pillow1 = _get_pillow([config1]) self.pillow2 = _get_pillow(self.configs) self.pillow1.get_change_feed().get_latest_offsets()
def rebuild_indicators_in_place(indicator_config_id, initiated_by=None): config = _get_config_by_id(indicator_config_id) success = _('Your UCR table {} has finished rebuilding').format(config.table_id) failure = _('There was an error rebuilding Your UCR table {}.').format(config.table_id) send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by) with notify_someone(initiated_by, success_message=success, error_message=failure, send=send): adapter = get_indicator_adapter(config, can_handle_laboratory=True) if not id_is_static(indicator_config_id): config.meta.build.initiated_in_place = datetime.utcnow() config.meta.build.finished_in_place = False config.save() adapter.build_table() _iteratively_build_table(config, in_place=True)
def rebuild_indicators(indicator_config_id, initiated_by=None, limit=-1, source=None, engine_id=None, diffs=None, trigger_time=None, domain=None): config = get_ucr_datasource_config_by_id(indicator_config_id) if trigger_time is not None and trigger_time < config.last_modified: return success = _('Your UCR table {} has finished rebuilding in {}').format( config.table_id, config.domain) failure = _( 'There was an error rebuilding Your UCR table {} in {}.').format( config.table_id, config.domain) send = False if limit == -1: send = toggles.SEND_UCR_REBUILD_INFO.enabled(initiated_by) with notify_someone(initiated_by, success_message=success, error_message=failure, send=send): adapter = get_indicator_adapter(config) if engine_id: if getattr(adapter, 'all_adapters', None): adapter = [ adapter_ for adapter_ in adapter.all_adapters if adapter_.engine_id == engine_id ][0] elif adapter.engine_id != engine_id: raise AssertionError("Engine ID does not match adapter") if not id_is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.utcnow() config.meta.build.finished = False config.meta.build.rebuilt_asynchronously = False config.save() skip_log = bool( limit > 0) # don't store log for temporary report builder UCRs adapter.rebuild_table(initiated_by=initiated_by, source=source, skip_log=skip_log, diffs=diffs) _iteratively_build_table(config, limit=limit)
def bootstrap(self, configs=None): configs = self.get_filtered_configs(configs) if not configs: pillow_logging.warning("UCR pillow has no configs to process") self.table_adapters_by_domain = defaultdict(list) for config in configs: self.table_adapters_by_domain[config.domain].append( get_indicator_adapter(config, raise_errors=True)) self.rebuild_tables_if_necessary() self.bootstrapped = True self.last_bootstrapped = datetime.utcnow()
def setUpClass(cls): super(BaseICDSDatasourceTest, cls).setUpClass() cls._call_center_domain_mock.start() cls.static_datasource = StaticDataSourceConfiguration.wrap( cls.get_json(cls.datasource_filename) ) cls.domain = cls.static_datasource.domains[0] cls.datasource = StaticDataSourceConfiguration._get_datasource_config( cls.static_datasource, cls.domain, ) cls.casefactory = CaseFactory(domain=cls.domain) cls.adapter = get_indicator_adapter(cls.datasource) cls.adapter.rebuild_table()
def test_add_non_nullable_column(self): self._setup_data_source('add_non_nullable_col') # assert new date isn't in the config insp = reflection.Inspector.from_engine(self.engine) table_name = get_table_name(self.config.domain, self.config.table_id) self.assertEqual( len([ c for c in insp.get_columns(table_name) if c['name'] == 'new_date' ]), 0) # add the column to the config config = self._get_config('add_non_nullable_col') self.addCleanup(config.delete) config.configured_indicators.append({ "column_id": "new_date", "type": "raw", "display_name": "new_date opened", "datatype": "datetime", "property_name": "other_opened_on", "is_nullable": False }) config.save() adapter = get_indicator_adapter(config) engine = adapter.engine # mock rebuild table to ensure the table is rebuilt pillow = get_kafka_ucr_pillow() pillow.processors[0].rebuild_table = MagicMock() pillow.bootstrap([config]) self.assertTrue(pillow.processors[0].rebuild_table.called) # column doesn't exist because rebuild table was mocked insp = reflection.Inspector.from_engine(engine) self.assertEqual( len([ c for c in insp.get_columns(table_name) if c['name'] == 'new_date' ]), 0) # Another time without the mock to ensure the column is there pillow = get_kafka_ucr_pillow() pillow.bootstrap([config]) insp = reflection.Inspector.from_engine(engine) self.assertEqual( len([ c for c in insp.get_columns(table_name) if c['name'] == 'new_date' ]), 1)
def setUpClass(cls): super(UCRMultiDBTest, cls).setUpClass() cls.db2_name = 'cchq_ucr_tests' db_conn_parts = connections.connection_manager.get_connection_string('default').split('/') db_conn_parts[-1] = cls.db2_name cls.db2_url = '/'.join(db_conn_parts) cls.context_manager = connections.override_engine('engine-2', cls.db2_url) cls.context_manager.__enter__() # setup data sources data_source_template = get_sample_data_source() cls.ds_1 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_1.engine_id = 'engine-1' cls.ds_1.save() cls.ds_2 = DataSourceConfiguration.wrap(data_source_template.to_json()) cls.ds_2.engine_id = 'engine-2' cls.ds_2.save() cls.db_context = temporary_database(cls.db2_name) cls.db_context.__enter__() cls.ds1_adapter = get_indicator_adapter(cls.ds_1) cls.ds2_adapter = get_indicator_adapter(cls.ds_2)