def print_stats(self, domain, short=True, diffs_only=False): status = get_couch_sql_migration_status(domain) print("Couch to SQL migration status for {}: {}".format(domain, status)) db = get_diff_db(domain) try: diff_stats = db.get_diff_stats() except OperationalError: diff_stats = {} has_diffs = False for doc_type in doc_types(): form_ids_in_couch = set(get_form_ids_by_type(domain, doc_type)) form_ids_in_sql = set(FormAccessorSQL.get_form_ids_in_domain_by_type(domain, doc_type)) diff_count, num_docs_with_diffs = diff_stats.pop(doc_type, (0, 0)) has_diffs |= self._print_status( doc_type, form_ids_in_couch, form_ids_in_sql, diff_count, num_docs_with_diffs, short, diffs_only ) form_ids_in_couch = set(get_doc_ids_in_domain_by_type( domain, "XFormInstance-Deleted", XFormInstance.get_db()) ) form_ids_in_sql = set(FormAccessorSQL.get_deleted_form_ids_in_domain(domain)) diff_count, num_docs_with_diffs = diff_stats.pop("XFormInstance-Deleted", (0, 0)) has_diffs |= self._print_status( "XFormInstance-Deleted", form_ids_in_couch, form_ids_in_sql, diff_count, num_docs_with_diffs, short, diffs_only ) case_ids_in_couch = set(get_case_ids_in_domain(domain)) case_ids_in_sql = set(CaseAccessorSQL.get_case_ids_in_domain(domain)) diff_count, num_docs_with_diffs = diff_stats.pop("CommCareCase", (0, 0)) has_diffs |= self._print_status( 'CommCareCase', case_ids_in_couch, case_ids_in_sql, diff_count, num_docs_with_diffs, short, diffs_only ) case_ids_in_couch = set(get_doc_ids_in_domain_by_type( domain, "CommCareCase-Deleted", XFormInstance.get_db()) ) case_ids_in_sql = set(CaseAccessorSQL.get_deleted_case_ids_in_domain(domain)) diff_count, num_docs_with_diffs = diff_stats.pop("CommCareCase-Deleted", (0, 0)) has_diffs |= self._print_status( 'CommCareCase-Deleted', case_ids_in_couch, case_ids_in_sql, diff_count, num_docs_with_diffs, short, diffs_only ) if diff_stats: for key, counts in diff_stats.items(): diff_count, num_docs_with_diffs = counts has_diffs |= self._print_status( key, set(), set(), diff_count, num_docs_with_diffs, short, diffs_only ) if diffs_only and not has_diffs: print(shell_green("No differences found between old and new docs!")) return has_diffs
def get_diff_stats(self, domain): db = get_diff_db(domain) diff_stats = db.get_diff_stats() stats = {} def _update_stats(doc_type, couch_count, sql_count): diff_count, num_docs_with_diffs = diff_stats.pop(doc_type, (0, 0)) if diff_count or couch_count != sql_count: stats[doc_type] = (couch_count, sql_count, diff_count, num_docs_with_diffs) for doc_type in doc_types(): form_ids_in_couch = len(set(get_form_ids_by_type(domain, doc_type))) form_ids_in_sql = len( set( FormAccessorSQL.get_form_ids_in_domain_by_type( domain, doc_type))) _update_stats(doc_type, form_ids_in_couch, form_ids_in_sql) form_ids_in_couch = len( set( get_doc_ids_in_domain_by_type(domain, "XFormInstance-Deleted", XFormInstance.get_db()))) form_ids_in_sql = len( set(FormAccessorSQL.get_deleted_form_ids_in_domain(domain))) _update_stats("XFormInstance-Deleted", form_ids_in_couch, form_ids_in_sql) case_ids_in_couch = len(set(get_case_ids_in_domain(domain))) case_ids_in_sql = len( set(CaseAccessorSQL.get_case_ids_in_domain(domain))) _update_stats("CommCareCase", case_ids_in_couch, case_ids_in_sql) if self.strict: # only care about these in strict mode case_ids_in_couch = len( set( get_doc_ids_in_domain_by_type(domain, "CommCareCase-Deleted", XFormInstance.get_db()))) case_ids_in_sql = len( set(CaseAccessorSQL.get_deleted_case_ids_in_domain(domain))) _update_stats("CommCareCase-Deleted", case_ids_in_couch, case_ids_in_sql) if diff_stats: for key in diff_stats.keys(): _update_stats(key, 0, 0) return stats
def _user_ids_in_domain(self): from corehq.apps.domain.dbaccessors import get_doc_ids_in_domain_by_type user_ids = set() for doc_type in ('CommCareUser', 'WebUser'): user_ids.update( set(get_doc_ids_in_domain_by_type(self.domain, doc_type))) return user_ids
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() redis_key = _get_redis_key_for_config(config) adapter.rebuild_table() relevant_ids = get_doc_ids_in_domain_by_type( config.domain, config.referenced_doc_type, database=couchdb, ) for docs in chunked(relevant_ids, 1000): redis_client.sadd(redis_key, *docs) _build_indicators(indicator_config_id, relevant_ids)
def test_deleted_case_migration(self): parent_case_id = uuid.uuid4().hex child_case_id = uuid.uuid4().hex parent_case = create_and_save_a_case(self.domain_name, case_id=parent_case_id, case_name='test parent') child_case = create_and_save_a_case(self.domain_name, case_id=child_case_id, case_name='test child') set_parent_case(self.domain_name, child_case, parent_case) form_ids = self._get_form_ids() self.assertEqual(3, len(form_ids)) FormAccessors(self.domain.name).soft_delete_forms( form_ids, datetime.utcnow(), 'test-deletion-with-cases' ) CaseAccessors(self.domain.name).soft_delete_cases( [parent_case_id, child_case_id], datetime.utcnow(), 'test-deletion-with-cases' ) self.assertEqual(2, len(get_doc_ids_in_domain_by_type( self.domain_name, "CommCareCase-Deleted", XFormInstance.get_db()) )) self._do_migration_and_assert_flags(self.domain_name) self.assertEqual(2, len(CaseAccessorSQL.get_deleted_case_ids_in_domain(self.domain_name))) self._compare_diffs([]) parent_transactions = CaseAccessorSQL.get_transactions(parent_case_id) self.assertEqual(2, len(parent_transactions)) self.assertTrue(parent_transactions[0].is_case_create) self.assertTrue(parent_transactions[1].is_form_transaction) child_transactions = CaseAccessorSQL.get_transactions(child_case_id) self.assertEqual(2, len(child_transactions)) self.assertTrue(child_transactions[0].is_case_create) self.assertTrue(child_transactions[1].is_case_index)
def get_primary_db_case_ids(domain, doc_type): if should_use_sql_backend(domain): return get_sql_case_ids(domain, doc_type) else: return set( get_doc_ids_in_domain_by_type(domain, doc_type, CommCareCase.get_db()))
def get_primary_db_form_ids(domain, doc_type, startdate, enddate): if should_use_sql_backend(domain): return get_sql_form_ids(domain, doc_type, startdate, enddate) else: # date filtering not supported for couch return set( get_doc_ids_in_domain_by_type(domain, doc_type, CommCareCase.get_db()))
def test_hqsubmission_migration(self): form = create_and_save_a_form(self.domain_name) form.doc_type = 'HQSubmission' form.save() self.assertEqual(1, len(get_doc_ids_in_domain_by_type( self.domain_name, "HQSubmission", XFormInstance.get_db()) )) self._do_migration_and_assert_flags(self.domain_name) self.assertEqual(1, len(self._get_form_ids())) self._compare_diffs([])
def test_get_doc_ids_in_domain_by_type_nomatch(self): id = uuid.uuid4().hex doc = { '_id': id, 'domain': 'match-domain', 'doc_type': 'nomatch-type', } self.db.save_doc(doc) ids = get_doc_ids_in_domain_by_type('match-domain', 'match-type', self.db) self.assertEqual(0, len(ids)) self.db.delete_doc(doc)
def test_get_doc_ids_in_domain_by_type(self): user_role = UserRole(domain=self.domain) group = Group(domain=self.domain) xform = XFormInstance(domain=self.domain) user_role.save() group.save() xform.save() self.addCleanup(user_role.delete) self.addCleanup(group.delete) self.addCleanup(xform.delete) [doc_id] = get_doc_ids_in_domain_by_type(self.domain, UserRole) self.assertEqual(doc_id, user_role.get_id)
def test_deleted_form_migration(self): form = create_and_save_a_form(self.domain_name) FormAccessors(self.domain.name).soft_delete_forms( [form.form_id], datetime.utcnow(), 'test-deletion' ) self.assertEqual(1, len(get_doc_ids_in_domain_by_type( self.domain_name, "XFormInstance-Deleted", XFormInstance.get_db()) )) self._do_migration_and_assert_flags(self.domain_name) self.assertEqual(1, len(FormAccessorSQL.get_deleted_form_ids_in_domain(self.domain_name))) self._compare_diffs([])
def get_diff_stats(self, domain): db = get_diff_db(domain) diff_stats = db.get_diff_stats() stats = {} def _update_stats(doc_type, couch_count, sql_count): diff_count, num_docs_with_diffs = diff_stats.pop(doc_type, (0, 0)) if diff_count or couch_count != sql_count: stats[doc_type] = (couch_count, sql_count, diff_count, num_docs_with_diffs) for doc_type in doc_types(): form_ids_in_couch = len(set(get_form_ids_by_type(domain, doc_type))) form_ids_in_sql = len(set(FormAccessorSQL.get_form_ids_in_domain_by_type(domain, doc_type))) _update_stats(doc_type, form_ids_in_couch, form_ids_in_sql) form_ids_in_couch = len(set(get_doc_ids_in_domain_by_type( domain, "XFormInstance-Deleted", XFormInstance.get_db()) )) form_ids_in_sql = len(set(FormAccessorSQL.get_deleted_form_ids_in_domain(domain))) _update_stats("XFormInstance-Deleted", form_ids_in_couch, form_ids_in_sql) case_ids_in_couch = len(set(get_case_ids_in_domain(domain))) case_ids_in_sql = len(set(CaseAccessorSQL.get_case_ids_in_domain(domain))) _update_stats("CommCareCase", case_ids_in_couch, case_ids_in_sql) if self.strict: # only care about these in strict mode case_ids_in_couch = len(set(get_doc_ids_in_domain_by_type( domain, "CommCareCase-Deleted", XFormInstance.get_db()) )) case_ids_in_sql = len(set(CaseAccessorSQL.get_deleted_case_ids_in_domain(domain))) _update_stats("CommCareCase-Deleted", case_ids_in_couch, case_ids_in_sql) if diff_stats: for key in diff_stats.keys(): _update_stats(key, 0, 0) return stats
def handle(self, *args, **options): domain = args[0] old_db = Domain.get_db() new_db = RepeatRecord.get_db() assert old_db.dbname != new_db.dbname doc_ids = get_doc_ids_in_domain_by_type(domain, 'RepeatRecord', old_db) count = len(doc_ids) chunksize = 250 for i, docs in enumerate(chunked(iter_docs(old_db, doc_ids, chunksize), chunksize)): for doc in docs: if '_rev' in doc: del doc['_rev'] new_db.bulk_save(docs, new_edits=False) print 'checked %s / %s' % (i * chunksize, count)
def test_edited_deleted_form(self): form = create_and_save_a_form(self.domain_name) form.edited_on = datetime.utcnow() - timedelta(days=400) form.save() FormAccessors(self.domain.name).soft_delete_forms([form.form_id], datetime.utcnow(), 'test-deletion') self.assertEqual( get_doc_ids_in_domain_by_type(form.domain, "XFormInstance-Deleted", XFormInstance.get_db()), [form.form_id], ) self._do_migration_and_assert_flags(form.domain) self.assertEqual( FormAccessorSQL.get_deleted_form_ids_in_domain(form.domain), [form.form_id], ) self._compare_diffs([])
def test_edited_deleted_form(self): form = create_and_save_a_form(self.domain_name) form.edited_on = datetime.utcnow() - timedelta(days=400) form.save() FormAccessors(self.domain.name).soft_delete_forms( [form.form_id], datetime.utcnow(), 'test-deletion' ) self.assertEqual( get_doc_ids_in_domain_by_type( form.domain, "XFormInstance-Deleted", XFormInstance.get_db() ), [form.form_id], ) self._do_migration_and_assert_flags(form.domain) self.assertEqual( FormAccessorSQL.get_deleted_form_ids_in_domain(form.domain), [form.form_id], ) self._compare_diffs([])
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(StaticDataSourceConfiguration._datasource_id_prefix) if is_static: config = StaticDataSourceConfiguration.by_id(indicator_config_id) rev = 'static' else: config = DataSourceConfiguration.get(indicator_config_id) rev = config._rev # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) client = get_redis_client().client.get_client() redis_key = 'ucr_queue-{}:{}'.format(indicator_config_id, rev) if len(client.smembers(redis_key)) > 0: relevant_ids = client.smembers(redis_key) else: adapter.rebuild_table() relevant_ids = get_doc_ids_in_domain_by_type(config.domain, config.referenced_doc_type, database=couchdb) if relevant_ids: client.sadd(redis_key, *relevant_ids) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) client.srem(redis_key, doc.get('_id')) except DataError as e: logging.exception('problem saving document {} to table. {}'.format(doc['_id'], e)) if not is_static: client.delete(redis_key) config.meta.build.finished = True config.save()
def handle(self, *args, **options): domain = args[0] db = Domain.get_db() doc_ids = get_doc_ids_in_domain_by_type(domain, 'RepeatRecord', db) count = len(doc_ids) print 'found %s doc ids' % count latest = datetime.min latest_doc = None for i, doc in enumerate(iter_docs(db, doc_ids)): wrapped = RepeatRecord.wrap(doc) if i % 100 == 0: print 'checked %s / %s' % (i, count) if wrapped.last_checked and wrapped.last_checked > latest: latest = wrapped.last_checked latest_doc = wrapped print 'new latest: %s' % latest if latest_doc: print 'latest repeater date is %s' % latest print 'latest repeater is %s' % latest_doc._id else: print 'no relevant repeaters found'
def test_get_doc_ids_in_domain_by_type_initial_empty(self): self.assertEqual(0, len(get_doc_ids_in_domain_by_type('some-domain', 'some-doc-type', self.db)))
def print_stats(self, domain, short=True, diffs_only=False): status = get_couch_sql_migration_status(domain) print("Couch to SQL migration status for {}: {}".format( domain, status)) db = open_state_db(domain, self.state_dir) try: diff_stats = db.get_diff_stats() except OperationalError: diff_stats = {} has_diffs = False for doc_type in doc_types(): form_ids_in_couch = set(get_form_ids_by_type(domain, doc_type)) if doc_type == "XFormInstance": form_ids_in_couch.update( get_doc_ids_in_domain_by_type(domain, "HQSubmission", XFormInstance.get_db())) form_ids_in_sql = set( FormAccessorSQL.get_form_ids_in_domain_by_type( domain, doc_type)) diff_count, num_docs_with_diffs = diff_stats.pop(doc_type, (0, 0)) has_diffs |= self._print_status(doc_type, form_ids_in_couch, form_ids_in_sql, diff_count, num_docs_with_diffs, short, diffs_only) form_ids_in_couch = set( get_doc_ids_in_domain_by_type(domain, "XFormInstance-Deleted", XFormInstance.get_db())) form_ids_in_sql = set( FormAccessorSQL.get_deleted_form_ids_in_domain(domain)) diff_count, num_docs_with_diffs = diff_stats.pop( "XFormInstance-Deleted", (0, 0)) has_diffs |= self._print_status("XFormInstance-Deleted", form_ids_in_couch, form_ids_in_sql, diff_count, num_docs_with_diffs, short, diffs_only) ZERO = Counts(0, 0) if db.has_doc_counts(): doc_counts = db.get_doc_counts() couch_missing_cases = doc_counts.get("CommCareCase-couch", ZERO).missing else: doc_counts = None couch_missing_cases = 0 for doc_type in CASE_DOC_TYPES: if doc_counts is not None: counts = doc_counts.get(doc_type, ZERO) case_ids_in_couch = db.get_missing_doc_ids( doc_type) if counts.missing else set() case_ids_in_sql = counts elif doc_type == "CommCareCase": case_ids_in_couch = set(get_case_ids_in_domain(domain)) case_ids_in_sql = set( CaseAccessorSQL.get_case_ids_in_domain(domain)) elif doc_type == "CommCareCase-Deleted": case_ids_in_couch = set( get_doc_ids_in_domain_by_type(domain, "CommCareCase-Deleted", XFormInstance.get_db())) case_ids_in_sql = set( CaseAccessorSQL.get_deleted_case_ids_in_domain(domain)) else: raise NotImplementedError(doc_type) diff_count, num_docs_with_diffs = diff_stats.pop(doc_type, (0, 0)) has_diffs |= self._print_status( doc_type, case_ids_in_couch, case_ids_in_sql, diff_count, num_docs_with_diffs, short, diffs_only, ) if doc_type == "CommCareCase" and couch_missing_cases: has_diffs = True print( shell_red("%s cases could not be loaded from Couch" % couch_missing_cases)) if not short: for case_id in db.get_missing_doc_ids( "CommCareCase-couch"): print(case_id) if diff_stats: for key, counts in diff_stats.items(): diff_count, num_docs_with_diffs = counts has_diffs |= self._print_status(key, set(), set(), diff_count, num_docs_with_diffs, short, diffs_only) if diffs_only and not has_diffs: print( shell_green("No differences found between old and new docs!")) return has_diffs
def get_doc_ids(self, domain): for doc_type in self.doc_types: doc_class = get_document_class_by_doc_type(doc_type) doc_ids = get_doc_ids_in_domain_by_type(domain, doc_type) yield doc_class, doc_ids
def _user_ids_in_domain(self): from corehq.apps.domain.dbaccessors import get_doc_ids_in_domain_by_type user_ids = set() for doc_type in ('CommCareUser', 'WebUser'): user_ids.update(set(get_doc_ids_in_domain_by_type(self.domain, doc_type))) return user_ids
def get_primary_db_case_ids(domain, doc_type, startdate, enddate): if should_use_sql_backend(domain): return get_sql_case_ids(domain, doc_type, startdate, enddate) else: # date filtering not supported for couch return set(get_doc_ids_in_domain_by_type(domain, doc_type, CommCareCase.get_db()))