def _get_cases_to_process(self, domain): from corehq.sql_db.util import get_db_aliases_for_partitioned_query dbs = get_db_aliases_for_partitioned_query() for db in dbs: cases = CommCareCaseSQL.objects.using(db).filter(domain=domain, type='household', closed=True) for case in cases: yield case.case_id
def handle(self, domain, case_type, start_from_db=None, **options): print("Resyncing messaging models for %s/%s ..." % (domain, case_type)) db_aliases = get_db_aliases_for_partitioned_query() db_aliases.sort() if start_from_db: if start_from_db not in db_aliases: raise CommandError("DB alias not recognized: %s" % start_from_db) index = db_aliases.index(start_from_db) db_aliases = db_aliases[index:] print("Iterating over databases: %s" % db_aliases) for db_alias in db_aliases: print("") print("Creating tasks for cases in %s ..." % db_alias) case_ids = list( CommCareCaseSQL .objects .using(db_alias) .filter(domain=domain, type=case_type, deleted=False) .values_list('case_id', flat=True) ) for case_id in with_progress_bar(case_ids): sync_case_for_messaging.delay(domain, case_id)
def get_metas(self, model=BlobMeta): metas = [] for db in get_db_aliases_for_partitioned_query(): metas.extend(model.objects.using(db).all()) if model is BlobMeta: metas.extend(get_form_attachment_blob_metas_by_key(None, db)) return metas
def delete_old_images(): start = datetime.utcnow() max_age = start - timedelta(days=90) db = get_blob_db() def _get_query(db_name, max_age=max_age): return BlobMeta.objects.using(db_name).filter( content_type='image/jpeg', type_code=CODES.form_attachment, domain='icds-cas', created_on__lt=max_age ) run_again = False for db_name in get_db_aliases_for_partitioned_query(): bytes_deleted = 0 metas = list(_get_query(db_name)[:1000]) if metas: for meta in metas: bytes_deleted += meta.content_length or 0 db.bulk_delete(metas=metas) datadog_counter('commcare.icds_images.bytes_deleted', value=bytes_deleted) datadog_counter('commcare.icds_images.count_deleted', value=len(metas)) run_again = True if run_again: delete_old_images.delay()
def _get_cases_to_process(self): from corehq.sql_db.util import get_db_aliases_for_partitioned_query dbs = get_db_aliases_for_partitioned_query() for db in dbs: for case_type in ('household', 'household_member'): cases = CommCareCaseSQL.objects.using(db).filter(domain=self.domain, type=case_type) for case in cases: yield case
def delete_all_forms_and_blob_metadata(cls): for db in get_db_aliases_for_partitioned_query(): with connections[db].cursor() as cursor: cursor.execute(""" DELETE FROM blobs_blobmeta; DELETE FROM form_processor_xformattachmentsql; DELETE FROM form_processor_xforminstancesql; """)
def handle(self, **options): verbose = options['verbosity'] >= 2 if verbose: global log_sql log_sql = log_sql_verbose for db in get_db_aliases_for_partitioned_query(): if _index_exists(db, UNIQIE_INDEX_NAME): print( self.style.SUCCESS( 'Unique index already exists on db: {}'.format(db))) continue _add_temp_index(db) case_ids = _get_case_ids_with_dupe_indices(db) attempts = 0 while case_ids and attempts < 3: attempts += 1 print( '{} cases found with duplicate indices. DB: {}, attempt: {}' .format(len(case_ids), db, attempts)) _delete_duplicate_indices(case_ids, db) case_ids = _get_case_ids_with_dupe_indices(db) if case_ids: print( self.style.ERROR( '{} cases still have duplicate ' 'indices after 3 attempts for db: {}'.format( len(case_ids), db))) grouped_indices = groupby( CommCareCaseIndexSQL.objects.using(db).filter( case_id__in=case_ids), key=lambda c: c.case_id) for case_id, indices in grouped_indices: print('--> Case: {}\n'.format(case_id)) print(' {}'.format('\n '.join( unicode(i) for i in indices))) print('\n') else: print( self.style.WARNING( 'Attempting to create unique index and constraint for db: {}' .format(db))) try: _add_unique_constraint_to_case_index_table(db) except Exception as e: print( self.style.ERROR( 'Failed to create unique constraint on DB {}: {}'. format(db, e))) print(self.style.WARNING('Temporary index left in place')) else: print( self.style.SUCCESS( 'Unique constraint added to db {}'.format(db))) _drop_index(db, IDENTIFIER_INDEX_NAME)
def delete_schedule_instances_by_case_id(domain, case_id): from corehq.messaging.scheduling.scheduling_partitioned.models import ( CaseTimedScheduleInstance, CaseAlertScheduleInstance, ) for cls in (CaseAlertScheduleInstance, CaseTimedScheduleInstance): for db_name in get_db_aliases_for_partitioned_query(): cls.objects.using(db_name).filter(domain=domain, case_id=case_id).delete()
def _delete_all_sql_sharded_models(model_class, domain=None): assert issubclass(model_class, PartitionedModel) from corehq.sql_db.util import get_db_aliases_for_partitioned_query dbs = get_db_aliases_for_partitioned_query() for db in dbs: query = model_class.objects.using(db) if domain: query.filter(domain=domain) query.delete()
def sql_db_aliases(self): all_db_aliases = get_db_aliases_for_partitioned_query() if self.is_sharded() \ else [router.db_for_read(self.model_class)] if self.limit_db_aliases: db_aliases = list(set(all_db_aliases) & set(self.limit_db_aliases)) assert db_aliases, 'Limited DBs not in expected list: {} {}'.format( all_db_aliases, self.limit_db_aliases) return db_aliases return all_db_aliases
def delete_all_sql_forms(cls, domain=None): from corehq.sql_db.util import get_db_aliases_for_partitioned_query logger.debug("Deleting all SQL xforms for domain %s", domain) params = {"type_code__in": [CODES.form_xml, CODES.form_attachment]} if domain: params["domain"] = domain for db in get_db_aliases_for_partitioned_query(): BlobMeta.objects.using(db).filter(**params).delete() cls._delete_all_sql_sharded_models(XFormInstanceSQL, domain)
def _get_sql_cases_modified_on_date(self, date): num_cases = 0 dbs = get_db_aliases_for_partitioned_query() for db in dbs: num_cases += (CommCareCaseSQL.objects.using(db).filter( server_modified_on__gte=date, server_modified_on__lt=date + relativedelta(months=1)).count()) return num_cases
def execute(self, domain_name): if not self.is_app_installed(): return model = self.get_model_class() for db_name in get_db_aliases_for_partitioned_query(): model.objects.using(db_name).filter( **{ self.domain_filter_kwarg: domain_name }).delete()
def _get_cases_to_process(self): from corehq.sql_db.util import get_db_aliases_for_partitioned_query dbs = get_db_aliases_for_partitioned_query() for db in dbs: for case_type in ('household', 'household_member'): cases = CommCareCaseSQL.objects.using(db).filter( domain=self.domain, type=case_type) for case in cases: yield case
def _get_case_id_batches(self): dbs = get_db_aliases_for_partitioned_query() for db in dbs: case_ids = (CommCareCaseSQL.objects.using(db).filter( domain=self.domain, type=CASE_TYPE_EPISODE, closed=False, deleted=False).values_list('case_id', flat=True)) yield case_ids
async def wipe_blobdb(commit=False): """ Wipe shards in parallel """ coros = [ wipe_shard(dbname, commit) for dbname in get_db_aliases_for_partitioned_query() ] bytes_deleted_list = await asyncio.gather(*coros) return sum(bytes_deleted_list)
def handle(self, **options): sharded_models = list(get_all_sharded_models()) for database in get_db_aliases_for_partitioned_query(): for model in sharded_models: invalid_data = get_count_of_unmatched_models_by_shard(database, model) if invalid_data: for shard_id, count in invalid_data: print('found {} unexpected {}s in {} (shard {}).'.format( count, model.__name__, database, shard_id) )
def get_deleted_form_ids_in_domain(self, domain): result = [] for db_name in get_db_aliases_for_partitioned_query(): result.extend( self.using(db_name) .annotate(state_deleted=F('state').bitand(XFormInstance.DELETED)) .filter(domain=domain, state_deleted=XFormInstance.DELETED) .values_list('form_id', flat=True) ) return result
def setUpClass(cls): super(TestRunSql, cls).setUpClass() cls.db = TemporaryFilesystemBlobDB() for db in get_db_aliases_for_partitioned_query(): with connections[db].cursor() as cursor: cursor.execute(""" DROP TRIGGER IF EXISTS legacy_xform_attachment_insert_not_allowed ON form_processor_xformattachmentsql; """) # this test requires a clean slate (no forms or blob metadata) cls.delete_all_forms_and_blob_metadata()
def _case_to_case_index_ratio(self): if not should_use_sql_backend(self.domain): self.stdout.write('\nUnable to get case to index ratio of Couch domain\n') return db_name = get_db_aliases_for_partitioned_query()[0] # just query one shard DB case_query = CommCareCaseSQL.objects.using(db_name).filter(domain=self.domain) index_query = CommCareCaseIndexSQL.objects.using(db_name).filter(domain=self.domain) case_count = estimate_row_count(case_query, db_name) case_index_count = estimate_row_count(index_query, db_name) self._print_value('Ratio of cases to case indices: 1 : ', case_index_count / case_count)
def _case_to_case_index_ratio(self): db_name = get_db_aliases_for_partitioned_query()[ 0] # just query one shard DB case_query = CommCareCase.objects.using(db_name).filter( domain=self.domain) index_query = CommCareCaseIndex.objects.using(db_name).filter( domain=self.domain) case_count = estimate_row_count(case_query, db_name) case_index_count = estimate_row_count(index_query, db_name) self._print_value('Ratio of cases to case indices: 1 : ', case_index_count / case_count)
def _get_sql_forms_received_on_date(self, date): num_forms = 0 dbs = get_db_aliases_for_partitioned_query() for db in dbs: num_forms += (XFormInstanceSQL.objects.using(db).filter( received_on__gte=date, received_on__lt=date + relativedelta(months=1)).filter( state=XFormInstanceSQL.NORMAL).exclude( xmlns=DEVICE_LOG_XMLNS).count()) return num_forms
def tearDownClass(cls): cls.db.close() super(TestRunSql, cls).tearDownClass() for db in get_db_aliases_for_partitioned_query(): with connections[db].cursor() as cursor: cursor.execute(""" CREATE TRIGGER legacy_xform_attachment_insert_not_allowed BEFORE INSERT ON form_processor_xformattachmentsql EXECUTE PROCEDURE insert_not_allowed(); """)
def _get_cases(self): dbs = get_db_aliases_for_partitioned_query() for db in dbs: ccs_record_case_ids = (CommCareCaseSQL.objects.using(db).filter( domain=self.domain, type='ccs_record', closed=False).values_list('case_id', flat=True)) for case_ids in chunked(ccs_record_case_ids, 100): cases = self.case_accessor.get_cases(list(case_ids)) for case in cases: yield case
def _get_sql_cases_modified_on_date(self, date): num_cases = 0 dbs = get_db_aliases_for_partitioned_query() for db in dbs: num_cases += ( CommCareCaseSQL.objects .using(db) .filter(server_modified_on__gte=date, server_modified_on__lt=date + relativedelta(months=1)) .count() ) return num_cases
def _get_sql_cases_by_doc_type(domain, startdate=None, enddate=None): counter = Counter() for db_alias in get_db_aliases_for_partitioned_query(): queryset = CommCareCaseSQL.objects.using(db_alias).filter(domain=domain) if startdate is not None: queryset = queryset.filter(server_modified_on__gte=startdate) if enddate is not None: queryset = queryset.filter(server_modified_on__lt=enddate) counter['CommCareCase'] += queryset.filter(deleted=False).count() counter['CommCareCase-Deleted'] += queryset.filter(deleted=True).count() return counter
def first_form_received_on(self): min_date = datetime(2200, 1, 1) for db in get_db_aliases_for_partitioned_query(): result = XFormInstanceSQL.objects.using(db).filter( domain=self.domain).aggregate(Min('received_on')) date = result.get('received_on__min') if date and date < min_date: min_date = date if min_date.year == 2200: return None else: return min_date
def get_case_owner_ids(domain): from corehq.sql_db.util import get_db_aliases_for_partitioned_query db_aliases = get_db_aliases_for_partitioned_query() owner_ids = set() for db_alias in db_aliases: owner_ids.update( fast_distinct_in_domain(CommCareCaseSQL, 'owner_id', domain, using=db_alias)) return owner_ids
def setup_days_records(cls, day): for doc_type, ucr_table_mapping in UCR_MAPPING.items(): for table_id, doc_type_filters in ucr_table_mapping.items(): for doc_type_filter in doc_type_filters: for db in get_db_aliases_for_partitioned_query(): cls.objects.get_or_create( db_alias=db, day=day, doc_type=doc_type, table_id=table_id, doc_type_filter=doc_type_filter, )
def iter_sql_cases_with_sorted_transactions(domain): sql = f""" SELECT cx.case_id FROM {CommCareCaseSQL._meta.db_table} cx INNER JOIN {CaseTransaction._meta.db_table} tx ON cx.case_id = tx.case_id WHERE cx.domain = %s AND tx.details LIKE %s """ reason = f'%{SortTransactionsRebuild._REASON}%' for dbname in get_db_aliases_for_partitioned_query(): with CommCareCaseSQL.get_cursor_for_partition_db(dbname) as cursor: cursor.execute(sql, [domain, reason]) yield from iter(set(case_id for case_id, in cursor.fetchall()))
def handle(self, **options): verbose = options['verbose'] csv_mode = options['csv'] if csv_mode: print('shard_id,model_name,doc_count,valid/invalid') for database in get_db_aliases_for_partitioned_query(): if verbose: print('Checking database {}...'.format(database)) shard_info = get_database_shard_info_for_testing(database) if options['csv']: print(shard_info.to_csv()) else: print(shard_info)
def delete_timed_schedule_instances_for_schedule(cls, schedule_id): from corehq.messaging.scheduling.scheduling_partitioned.models import ( TimedScheduleInstance, CaseTimedScheduleInstance, ) if cls not in (TimedScheduleInstance, CaseTimedScheduleInstance): raise TypeError("Expected TimedScheduleInstance or CaseTimedScheduleInstance") _validate_uuid(schedule_id) for db_name in get_db_aliases_for_partitioned_query(): cls.objects.using(db_name).filter(timed_schedule_id=schedule_id).delete()
def _case_to_case_index_ratio(self): if not should_use_sql_backend(self.domain): self.stdout.write('\nUnable to get case to index ratio of Couch domain\n') return db_name = get_db_aliases_for_partitioned_query()[0] # just query one shard DB case_count = _get_count_from_explain( db_name, CommCareCaseSQL.objects.using(db_name).filter(domain=self.domain) ) case_index_count = _get_count_from_explain( db_name, CommCareCaseIndexSQL.objects.using(db_name).filter(domain=self.domain) ) self._print_value('Ratio of cases to case indices', case_count // float(case_index_count))
def _get_ids_to_process(self): from corehq.sql_db.util import get_db_aliases_for_partitioned_query dbs = get_db_aliases_for_partitioned_query() if self.database: dbs = [db for db in dbs if db == self.database] for db in dbs: ids_ = self._get_ids(db) num_ids = len(ids_) print("processing %d docs from db %s" % (num_ids, db)) for i, id_ in enumerate(ids_): yield id_ if i % 1000 == 0: print("processed %d / %d docs from db %s" % (i, num_ids, db))
def run_messaging_rule(domain, rule_id): rule = _get_cached_rule(domain, rule_id) if not rule: return progress_helper = MessagingRuleProgressHelper(rule_id) total_cases_count = CaseES().domain(domain).case_type( rule.case_type).count() progress_helper.set_total_cases_to_be_processed(total_cases_count) db_aliases = get_db_aliases_for_partitioned_query() progress_helper.set_initial_progress(shard_count=len(db_aliases)) for db_alias in db_aliases: run_messaging_rule_for_shard.delay(domain, rule_id, db_alias)
def _get_person_case_ids_to_process(): from corehq.sql_db.util import get_db_aliases_for_partitioned_query dbs = get_db_aliases_for_partitioned_query() for db in dbs: case_ids = (CommCareCaseSQL.objects.using(db).filter( domain=DOMAIN, type="person").values_list('case_id', flat=True)) num_case_ids = len(case_ids) print("processing %d docs from db %s" % (num_case_ids, db)) for i, case_id in enumerate(case_ids): yield case_id if i % 1000 == 0: print("processed %d / %d docs from db %s" % (i, num_case_ids, db))
def _get_sql_forms_received_on_date(self, date): num_forms = 0 dbs = get_db_aliases_for_partitioned_query() for db in dbs: num_forms += ( XFormInstanceSQL.objects .using(db) .filter(received_on__gte=date, received_on__lt=date + relativedelta(months=1)) .filter(state=XFormInstanceSQL.NORMAL) .exclude(xmlns=DEVICE_LOG_XMLNS) .count() ) return num_forms
def _get_cases(self): dbs = get_db_aliases_for_partitioned_query() for db in dbs: ccs_record_case_ids = ( CommCareCaseSQL.objects .using(db) .filter(domain=self.domain, type='ccs_record', closed=False) .values_list('case_id', flat=True) ) for case_ids in chunked(ccs_record_case_ids, 100): cases = self.case_accessor.get_cases(list(case_ids)) for case in cases: yield case
def get_sql_case_ids(domain, doc_type, startdate, enddate): sql_ids = set() deleted = doc_type == 'CommCareCase-Deleted' for db_alias in get_db_aliases_for_partitioned_query(): queryset = CommCareCaseSQL.objects.using(db_alias) \ .filter(domain=domain, deleted=deleted) if startdate: queryset = queryset.filter(server_modified_on__gte=startdate) if enddate: queryset = queryset.filter(server_modified_on__lt=enddate) sql_ids.update(list(queryset.values_list('case_id', flat=True))) return sql_ids
def _get_stale_data(run_config): for db in get_db_aliases_for_partitioned_query(): print(f"Starting db {db}") matching_records_for_db = _get_primary_data_for_db(db, run_config) chunk_size = 1000 for chunk in chunked(matching_records_for_db, chunk_size): doc_ids = [val[0] for val in chunk] ucr_insertion_dates = _get_ucr_insertion_dates(run_config.domain, run_config.table_id, doc_ids) for doc_id, doc_type, sql_modified_on in chunk: ucr_insert_date = ucr_insertion_dates.get(doc_id) if (not ucr_insert_date # Handle small time drift between databases or (sql_modified_on - ucr_insert_date) < timedelta(seconds=1)): ucr_date_string = ucr_insert_date.isoformat() if ucr_insert_date else '' yield (doc_id, doc_type, ucr_date_string, sql_modified_on.isoformat())
def run_case_update_rules_for_domain(domain, now=None): now = now or datetime.utcnow() run_record = DomainCaseRuleRun.objects.create( domain=domain, started_on=datetime.utcnow(), status=DomainCaseRuleRun.STATUS_RUNNING, ) if should_use_sql_backend(domain): for db in get_db_aliases_for_partitioned_query(): run_case_update_rules_for_domain_and_db.delay(domain, now, run_record.pk, db=db) else: # explicitly pass db=None so that the serial task decorator has access to db in the key generation run_case_update_rules_for_domain_and_db.delay(domain, now, run_record.pk, db=None)
def handle(self, **options): verbose = options['verbosity'] >= 2 if verbose: global log_sql log_sql = log_sql_verbose for db in get_db_aliases_for_partitioned_query(): if _index_exists(db, UNIQIE_INDEX_NAME): print(self.style.SUCCESS('Unique index already exists on db: {}'.format(db))) continue _add_temp_index(db) case_ids = _get_case_ids_with_dupe_indices(db) attempts = 0 while case_ids and attempts < 3: attempts += 1 print('{} cases found with duplicate indices. DB: {}, attempt: {}'.format( len(case_ids), db, attempts) ) _delete_duplicate_indices(case_ids, db) case_ids = _get_case_ids_with_dupe_indices(db) if case_ids: print(self.style.ERROR( '{} cases still have duplicate ' 'indices after 3 attempts for db: {}'.format(len(case_ids), db)) ) grouped_indices = groupby( CommCareCaseIndexSQL.objects.using(db) .filter(case_id__in=case_ids), key=lambda c: c.case_id ) for case_id, indices in grouped_indices: print('--> Case: {}\n'.format(case_id)) print(' {}'.format('\n '.join(six.text_type(i) for i in indices))) print('\n') else: print(self.style.WARNING('Attempting to create unique index and constraint for db: {}'.format(db))) try: _add_unique_constraint_to_case_index_table(db) except Exception as e: print(self.style.ERROR('Failed to create unique constraint on DB {}: {}'.format(db, e))) print(self.style.WARNING('Temporary index left in place')) else: print(self.style.SUCCESS('Unique constraint added to db {}'.format(db))) _drop_index(db, IDENTIFIER_INDEX_NAME)
def check_db_tables(app_configs, **kwargs): from corehq.sql_db.routers import ICDS_REPORTS_APP, ICDS_MODEL from corehq.sql_db.models import PartitionedModel from corehq.sql_db.util import get_db_aliases_for_partitioned_query errors = [] # some apps only apply to specific envs env_specific_apps = { ICDS_MODEL: settings.ICDS_ENVS, ICDS_REPORTS_APP: settings.ICDS_ENVS } skip = ( 'warehouse', # remove this once the warehouse tables are created ) def _check_model(model_class, using=None): try: model_class._default_manager.using(using).all().exists() except Exception as e: return Error('Error querying model on database "{}": "{}.{}": {}.{}({})'.format( using or 'default', model_class._meta.app_label, model_class.__name__, e.__class__.__module__, e.__class__.__name__, e )) for model in apps.get_models(): app_label = model._meta.app_label if app_label in skip: continue enabled_envs = env_specific_apps.get(app_label) if enabled_envs and settings.SERVER_ENVIRONMENT not in enabled_envs: continue if issubclass(model, PartitionedModel): for db in get_db_aliases_for_partitioned_query(): error = _check_model(model, using=db) error and errors.append(error) else: error = _check_model(model) error and errors.append(error) return errors
def sync_cases(self, domain): db_aliases = get_db_aliases_for_partitioned_query() db_aliases.sort() if should_use_sql_backend(domain): case_accessor = CaseReindexAccessor(domain) case_ids = (case.case_id for case in iter_all_rows(case_accessor)) else: changes = _get_case_iterator(domain).iter_all_changes() case_ids = (case.id for case in changes) next_event = time.time() + 10 for i, case_id in enumerate(case_ids): sync_case_for_messaging.delay(domain, case_id) if time.time() > next_event: print("Queued %d cases for domain %s" % (i + 1, domain)) next_event = time.time() + 10
def handle(self, db_name, **options): db_names = get_db_aliases_for_partitioned_query() if db_name or len(db_names) == 1: _update_forms_in_db(db_name or db_names[0]) else: if not confirm('Running without "db_name" will execute on ALL databases in parallel. Continue?'): raise CommandError('abort') greenlets = [] for db_name in db_names: g = gevent.spawn(_update_forms_in_db, db_name) greenlets.append(g) gevent.joinall(greenlets) try: for job in greenlets: job.get() except Exception: traceback.print_exc()
def get_count_of_active_schedule_instances_due(domain, due_before): from corehq.messaging.scheduling.scheduling_partitioned.models import ( AlertScheduleInstance, TimedScheduleInstance, CaseAlertScheduleInstance, CaseTimedScheduleInstance, ) classes = (AlertScheduleInstance, TimedScheduleInstance, CaseAlertScheduleInstance, CaseTimedScheduleInstance) result = 0 for db_alias in get_db_aliases_for_partitioned_query(): for cls in classes: result += cls.objects.using(db_alias).filter( domain=domain, active=True, next_event_due__lt=due_before ).count() return result
def get_last_growth_monitoring_form(domain, case_id): transactions = CaseAccessorSQL.get_transactions_for_case_rebuild(case_id) form_ids = [t.form_id for t in transactions if t.form_id] forms_under_consideration = [] db_names = get_db_aliases_for_partitioned_query() for db_name in db_names: result = XFormInstanceSQL.objects.using(db_name).filter( domain=domain, form_id__in=form_ids, xmlns=GROWTH_MONITORING_XMLNS, state=XFormInstanceSQL.NORMAL, ).order_by('-received_on').first() if result: forms_under_consideration.append(result) if not forms_under_consideration: return None forms_under_consideration.sort(key=lambda form: form.received_on, reverse=True) return forms_under_consideration[0]
def handle(self, domain, **options): debug = options.get('debug') domain = options.get('domain') case_ids = options.get('case_id') db = options.get('db') self.log_filename = 'undo_uuid_clash.{}.log'.format(datetime.utcnow().isoformat()) print('\nWriting output to log file: {}\n'.format(self.log_filename)) if case_ids: form_ids = set() for case in CaseAccessorSQL.get_cases(case_ids): assert not domain or case.domain == domain, 'Case "%s" not in domain "%s"' % (case.case_id, domain) form_ids.update(case.xform_ids) with self: check_and_process_forms(form_ids, self, debug) else: if domain: domains = [domain] else: domains = iter_domains() for domain in domains: print("Checking domain: %s" % domain) form_ids_to_check = set() dbs = [db] if db else get_db_aliases_for_partitioned_query() for dbname in dbs: form_ids_to_check.update( XFormInstanceSQL.objects.using(dbname) .filter(domain=domain, state=XFormInstanceSQL.DEPRECATED) .values_list('orig_id', flat=True) ) print(' Found %s forms to check' % len(form_ids_to_check)) with self: for chunk in chunked(form_ids_to_check, 500): check_and_process_forms(chunk, self, debug)
def delete_expired_blobs(): run_again = False bytes_deleted = 0 for dbname in get_db_aliases_for_partitioned_query(): expired = list(BlobMeta.objects.using(dbname).filter( expires_on__isnull=False, expires_on__lt=_utcnow(), )[:1000]) if not expired: continue if len(expired) == 1000: run_again = True get_blob_db().bulk_delete(metas=expired) log.info("deleted expired blobs: %r", [m.key for m in expired]) shard_deleted = sum(m.content_length for m in expired) bytes_deleted += shard_deleted datadog_counter('commcare.temp_blobs.bytes_deleted', value=shard_deleted) legacy_exists, legacy_bytes = _delete_legacy_expired_blobs() if run_again or legacy_exists: delete_expired_blobs.delay() return bytes_deleted + legacy_bytes