def get_case_schedule_instances_for_domain(self, domain): instances = list( paginate_query_across_partitioned_databases( CaseAlertScheduleInstance, Q(domain=domain))) instances.extend( paginate_query_across_partitioned_databases( CaseTimedScheduleInstance, Q(domain=domain))) return instances
def get_timed_schedule_instances_for_schedule(schedule): from corehq.messaging.scheduling.models import TimedSchedule from corehq.messaging.scheduling.scheduling_partitioned.models import TimedScheduleInstance _validate_class(schedule, TimedSchedule) return paginate_query_across_partitioned_databases( TimedScheduleInstance, Q(timed_schedule_id=schedule.schedule_id))
def get_active_schedule_instance_ids(cls, due_before, due_after=None): from corehq.messaging.scheduling.scheduling_partitioned.models import ( AlertScheduleInstance, TimedScheduleInstance, ) if cls not in (AlertScheduleInstance, TimedScheduleInstance): raise TypeError( "Expected AlertScheduleInstance or TimedScheduleInstance") active_filter = Q( active=True, next_event_due__lte=due_before, ) if due_after: if due_before <= due_after: raise ValueError("Expected due_before > due_after") active_filter = active_filter & Q(next_event_due__gt=due_after) for domain, schedule_instance_id, next_event_due in paginate_query_across_partitioned_databases( cls, active_filter, values=['domain', 'schedule_instance_id', 'next_event_due'], load_source='get_schedule_instance_ids'): yield domain, schedule_instance_id, next_event_due
def iter_patch_form_diffs(domain, *, kind=None, doc_ids=None, by_kind=None): if kind: if by_kind: raise ValueError("cannot query 'kind' and 'by_kind' together") if kind not in ["forms", "cases"]: raise ValueError(f"kind must be 'forms' or 'cases'; got {kind}") if not doc_ids: raise ValueError(f"please specify doc ids: --select={kind}:id,...") by_kind = {kind: doc_ids} if by_kind: if by_kind.keys() - {"forms", "cases"}: kinds = list(by_kind) raise ValueError(f"valid kinds 'forms' and 'cases'; got {kinds}") form_ids = by_kind.get("forms", []) case_ids = by_kind.get("cases", []) if case_ids: # may be inefficient for cases with many forms for case in get_sql_cases(case_ids): form_ids.extend(case.xform_ids) forms = (f for f in get_sql_forms(form_ids) if f.xmlns == PatchForm.xmlns) else: # based on iter_form_ids_by_xmlns q_expr = Q(domain=domain, xmlns=PatchForm.xmlns) forms = paginate_query_across_partitioned_databases( XFormInstanceSQL, q_expr, load_source='couch_to_sql_migration') for form in forms: yield from iter_doc_diffs(form)
def paginated_case_ids(domain, case_type): row_generator = paginate_query_across_partitioned_databases( CommCareCaseSQL, Q(domain=domain, type=case_type, deleted=False), values=['case_id']) for row in row_generator: yield row[0]
def iter_form_ids_by_xmlns(self, domain, xmlns=None): q_expr = Q(domain=domain) & Q(state=self.model.NORMAL) if xmlns: q_expr &= Q(xmlns=xmlns) for form_id in paginate_query_across_partitioned_databases( self.model, q_expr, values=['form_id'], load_source='formids_by_xmlns'): yield form_id[0]
def tearDown(self): for rule in AutomaticUpdateRule.objects.filter(domain=self.domain): rule.hard_delete() for instance in paginate_query_across_partitioned_databases( CaseTimedScheduleInstance, Q(domain=self.domain)): delete_case_schedule_instance(instance) delete_timed_schedules(self.domain)
def get_alert_schedule_instances_for_schedule(schedule): from corehq.messaging.scheduling.models import AlertSchedule from corehq.messaging.scheduling.scheduling_partitioned.models import AlertScheduleInstance _validate_class(schedule, AlertSchedule) return paginate_query_across_partitioned_databases( AlertScheduleInstance, Q(alert_schedule_id=schedule.schedule_id), load_source='schedule_instances_for_schedule')
def iter_chunks(model_class, field, domain, chunk_size=5000): where = Q(domain=domain) row_count = estimate_partitioned_row_count(model_class, where) rows = paginate_query_across_partitioned_databases( model_class, where, values=[field], load_source='couch_to_sql_migration', query_size=chunk_size, ) values = (r[0] for r in rows) values = with_progress_bar(values, row_count, oneline="concise") yield from chunked(values, chunk_size, list)
def iter_ids(model_class, field, domain, chunk_size=1000): where = Q(domain=domain) rows = paginate_query_across_partitioned_databases( model_class, where, values=[field], load_source='delete_domain', query_size=chunk_size, ) yield from with_progress_bar( (r[0] for r in rows), estimate_partitioned_row_count(model_class, where), prefix="", oneline="concise", stream=silence_during_tests(), )
def iter_form_ids_by_last_modified(start_datetime, end_datetime): from corehq.sql_db.util import paginate_query_across_partitioned_databases annotate = { 'last_modified': Greatest('received_on', 'edited_on', 'deleted_on'), } return paginate_query_across_partitioned_databases( XFormInstanceSQL, (Q(last_modified__gt=start_datetime, last_modified__lt=end_datetime) & Q(state=F('state').bitand(XFormInstanceSQL.DELETED) + F('state').bitand(XFormInstanceSQL.DEPRECATED) + F('state').bitand(XFormInstanceSQL.DUPLICATE) + F('state').bitand(XFormInstanceSQL.ERROR) + F('state').bitand(XFormInstanceSQL.SUBMISSION_ERROR_LOG) + F('state'))), annotate=annotate, values=['form_id'], load_source='find_sql_forms_not_in_es')
def iter_forms_by_xmlns_received_on( domain: str, xmlns: str, start_datetime: datetime, end_datetime: datetime, ) -> Iterator[XFormInstanceSQL]: """ Iterates form submissions of a given ``xmlns`` from ``start_datetime`` (incl) to ``end_datetime`` (excl). """ # ``start_datetime`` is inclusive and ``end_datetime`` is # exclusive so that a form submitted at midnight will be # returned for the day that is starting, not the day that is # ending. That seems to be intuitive. from corehq.sql_db.util import paginate_query_across_partitioned_databases q_expr = ( Q(domain=domain) & Q(state=XFormInstanceSQL.NORMAL) & Q(xmlns=xmlns) & Q(received_on__gte=start_datetime, received_on__lt=end_datetime)) return paginate_query_across_partitioned_databases( XFormInstanceSQL, q_expr, load_source='forms_by_xmlns_received_on')
def _iter_cases_from_postgres(cls, domain, case_type, boundary_date=None, db=None): q_expression = Q( domain=domain, type=case_type, closed=False, deleted=False, ) if boundary_date: q_expression = q_expression & Q( server_modified_on__lte=boundary_date) if db: return paginate_query(db, CommCareCaseSQL, q_expression, load_source='auto_update_rule') else: return paginate_query_across_partitioned_databases( CommCareCaseSQL, q_expression, load_source='auto_update_rule')
def _get_all_ledger_transactions(self, q_): return list( paginate_query_across_partitioned_databases(LedgerTransaction, q_))