Ejemplo n.º 1
0
 def _get_cases_to_process(self, domain):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         cases = CommCareCaseSQL.objects.using(db).filter(domain=domain, type='household', closed=True)
         for case in cases:
             yield case.case_id
Ejemplo n.º 2
0
    def handle(self, domain, case_type, start_from_db=None, **options):
        print("Resyncing messaging models for %s/%s ..." % (domain, case_type))

        db_aliases = get_db_aliases_for_partitioned_query()
        db_aliases.sort()
        if start_from_db:
            if start_from_db not in db_aliases:
                raise CommandError("DB alias not recognized: %s" % start_from_db)

            index = db_aliases.index(start_from_db)
            db_aliases = db_aliases[index:]

        print("Iterating over databases: %s" % db_aliases)

        for db_alias in db_aliases:
            print("")
            print("Creating tasks for cases in %s ..." % db_alias)
            case_ids = list(
                CommCareCaseSQL
                .objects
                .using(db_alias)
                .filter(domain=domain, type=case_type, deleted=False)
                .values_list('case_id', flat=True)
            )
            for case_id in with_progress_bar(case_ids):
                sync_case_for_messaging.delay(domain, case_id)
Ejemplo n.º 3
0
 def get_metas(self, model=BlobMeta):
     metas = []
     for db in get_db_aliases_for_partitioned_query():
         metas.extend(model.objects.using(db).all())
         if model is BlobMeta:
             metas.extend(get_form_attachment_blob_metas_by_key(None, db))
     return metas
Ejemplo n.º 4
0
    def delete_old_images():
        start = datetime.utcnow()
        max_age = start - timedelta(days=90)
        db = get_blob_db()

        def _get_query(db_name, max_age=max_age):
            return BlobMeta.objects.using(db_name).filter(
                content_type='image/jpeg',
                type_code=CODES.form_attachment,
                domain='icds-cas',
                created_on__lt=max_age
            )

        run_again = False
        for db_name in get_db_aliases_for_partitioned_query():
            bytes_deleted = 0
            metas = list(_get_query(db_name)[:1000])
            if metas:
                for meta in metas:
                    bytes_deleted += meta.content_length or 0
                db.bulk_delete(metas=metas)
                datadog_counter('commcare.icds_images.bytes_deleted', value=bytes_deleted)
                datadog_counter('commcare.icds_images.count_deleted', value=len(metas))
                run_again = True

        if run_again:
            delete_old_images.delay()
Ejemplo n.º 5
0
 def _get_cases_to_process(self):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         for case_type in ('household', 'household_member'):
             cases = CommCareCaseSQL.objects.using(db).filter(domain=self.domain, type=case_type)
             for case in cases:
                 yield case
Ejemplo n.º 6
0
 def delete_all_forms_and_blob_metadata(cls):
     for db in get_db_aliases_for_partitioned_query():
         with connections[db].cursor() as cursor:
             cursor.execute("""
             DELETE FROM blobs_blobmeta;
             DELETE FROM form_processor_xformattachmentsql;
             DELETE FROM form_processor_xforminstancesql;
             """)
    def handle(self, **options):
        verbose = options['verbosity'] >= 2
        if verbose:
            global log_sql
            log_sql = log_sql_verbose

        for db in get_db_aliases_for_partitioned_query():
            if _index_exists(db, UNIQIE_INDEX_NAME):
                print(
                    self.style.SUCCESS(
                        'Unique index already exists on db: {}'.format(db)))
                continue

            _add_temp_index(db)

            case_ids = _get_case_ids_with_dupe_indices(db)
            attempts = 0
            while case_ids and attempts < 3:
                attempts += 1
                print(
                    '{} cases found with duplicate indices. DB: {}, attempt: {}'
                    .format(len(case_ids), db, attempts))
                _delete_duplicate_indices(case_ids, db)
                case_ids = _get_case_ids_with_dupe_indices(db)

            if case_ids:
                print(
                    self.style.ERROR(
                        '{} cases still have duplicate '
                        'indices after 3 attempts for db: {}'.format(
                            len(case_ids), db)))
                grouped_indices = groupby(
                    CommCareCaseIndexSQL.objects.using(db).filter(
                        case_id__in=case_ids),
                    key=lambda c: c.case_id)
                for case_id, indices in grouped_indices:
                    print('--> Case: {}\n'.format(case_id))
                    print('    {}'.format('\n    '.join(
                        unicode(i) for i in indices)))
                print('\n')
            else:
                print(
                    self.style.WARNING(
                        'Attempting to create unique index and constraint for db: {}'
                        .format(db)))
                try:
                    _add_unique_constraint_to_case_index_table(db)
                except Exception as e:
                    print(
                        self.style.ERROR(
                            'Failed to create unique constraint on DB {}: {}'.
                            format(db, e)))
                    print(self.style.WARNING('Temporary index left in place'))
                else:
                    print(
                        self.style.SUCCESS(
                            'Unique constraint added to db {}'.format(db)))
                    _drop_index(db, IDENTIFIER_INDEX_NAME)
Ejemplo n.º 8
0
def delete_schedule_instances_by_case_id(domain, case_id):
    from corehq.messaging.scheduling.scheduling_partitioned.models import (
        CaseTimedScheduleInstance,
        CaseAlertScheduleInstance,
    )

    for cls in (CaseAlertScheduleInstance, CaseTimedScheduleInstance):
        for db_name in get_db_aliases_for_partitioned_query():
            cls.objects.using(db_name).filter(domain=domain, case_id=case_id).delete()
Ejemplo n.º 9
0
 def _delete_all_sql_sharded_models(model_class, domain=None):
     assert issubclass(model_class, PartitionedModel)
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         query = model_class.objects.using(db)
         if domain:
             query.filter(domain=domain)
         query.delete()
Ejemplo n.º 10
0
 def sql_db_aliases(self):
     all_db_aliases = get_db_aliases_for_partitioned_query() if self.is_sharded() \
         else [router.db_for_read(self.model_class)]
     if self.limit_db_aliases:
         db_aliases = list(set(all_db_aliases) & set(self.limit_db_aliases))
         assert db_aliases, 'Limited DBs not in expected list: {} {}'.format(
             all_db_aliases, self.limit_db_aliases)
         return db_aliases
     return all_db_aliases
Ejemplo n.º 11
0
 def _get_cases_to_process(self, domain):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         cases = CommCareCaseSQL.objects.using(db).filter(domain=domain,
                                                          type='household',
                                                          closed=True)
         for case in cases:
             yield case.case_id
Ejemplo n.º 12
0
 def delete_all_sql_forms(cls, domain=None):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     logger.debug("Deleting all SQL xforms for domain %s", domain)
     params = {"type_code__in": [CODES.form_xml, CODES.form_attachment]}
     if domain:
         params["domain"] = domain
     for db in get_db_aliases_for_partitioned_query():
         BlobMeta.objects.using(db).filter(**params).delete()
     cls._delete_all_sql_sharded_models(XFormInstanceSQL, domain)
Ejemplo n.º 13
0
def delete_schedule_instances_by_case_id(domain, case_id):
    from corehq.messaging.scheduling.scheduling_partitioned.models import (
        CaseTimedScheduleInstance,
        CaseAlertScheduleInstance,
    )

    for cls in (CaseAlertScheduleInstance, CaseTimedScheduleInstance):
        for db_name in get_db_aliases_for_partitioned_query():
            cls.objects.using(db_name).filter(domain=domain, case_id=case_id).delete()
Ejemplo n.º 14
0
 def _delete_all_sql_sharded_models(model_class, domain=None):
     assert issubclass(model_class, PartitionedModel)
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         query = model_class.objects.using(db)
         if domain:
             query.filter(domain=domain)
         query.delete()
Ejemplo n.º 15
0
    def _get_sql_cases_modified_on_date(self, date):
        num_cases = 0
        dbs = get_db_aliases_for_partitioned_query()
        for db in dbs:
            num_cases += (CommCareCaseSQL.objects.using(db).filter(
                server_modified_on__gte=date,
                server_modified_on__lt=date + relativedelta(months=1)).count())

        return num_cases
Ejemplo n.º 16
0
 def execute(self, domain_name):
     if not self.is_app_installed():
         return
     model = self.get_model_class()
     for db_name in get_db_aliases_for_partitioned_query():
         model.objects.using(db_name).filter(
             **{
                 self.domain_filter_kwarg: domain_name
             }).delete()
Ejemplo n.º 17
0
 def _get_cases_to_process(self):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         for case_type in ('household', 'household_member'):
             cases = CommCareCaseSQL.objects.using(db).filter(
                 domain=self.domain, type=case_type)
             for case in cases:
                 yield case
Ejemplo n.º 18
0
 def _get_case_id_batches(self):
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         case_ids = (CommCareCaseSQL.objects.using(db).filter(
             domain=self.domain,
             type=CASE_TYPE_EPISODE,
             closed=False,
             deleted=False).values_list('case_id', flat=True))
         yield case_ids
Ejemplo n.º 19
0
 def delete_all_sql_forms(cls, domain=None):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     logger.debug("Deleting all SQL xforms for domain %s", domain)
     params = {"type_code__in": [CODES.form_xml, CODES.form_attachment]}
     if domain:
         params["domain"] = domain
     for db in get_db_aliases_for_partitioned_query():
         BlobMeta.objects.using(db).filter(**params).delete()
     cls._delete_all_sql_sharded_models(XFormInstanceSQL, domain)
Ejemplo n.º 20
0
async def wipe_blobdb(commit=False):
    """
    Wipe shards in parallel
    """
    coros = [
        wipe_shard(dbname, commit)
        for dbname in get_db_aliases_for_partitioned_query()
    ]
    bytes_deleted_list = await asyncio.gather(*coros)
    return sum(bytes_deleted_list)
 def handle(self, **options):
     sharded_models = list(get_all_sharded_models())
     for database in get_db_aliases_for_partitioned_query():
         for model in sharded_models:
             invalid_data = get_count_of_unmatched_models_by_shard(database, model)
             if invalid_data:
                 for shard_id, count in invalid_data:
                     print('found {} unexpected {}s in {} (shard {}).'.format(
                         count, model.__name__, database, shard_id)
                     )
 def handle(self, **options):
     sharded_models = list(get_all_sharded_models())
     for database in get_db_aliases_for_partitioned_query():
         for model in sharded_models:
             invalid_data = get_count_of_unmatched_models_by_shard(database, model)
             if invalid_data:
                 for shard_id, count in invalid_data:
                     print('found {} unexpected {}s in {} (shard {}).'.format(
                         count, model.__name__, database, shard_id)
                     )
Ejemplo n.º 23
0
 def get_deleted_form_ids_in_domain(self, domain):
     result = []
     for db_name in get_db_aliases_for_partitioned_query():
         result.extend(
             self.using(db_name)
             .annotate(state_deleted=F('state').bitand(XFormInstance.DELETED))
             .filter(domain=domain, state_deleted=XFormInstance.DELETED)
             .values_list('form_id', flat=True)
         )
     return result
Ejemplo n.º 24
0
 def setUpClass(cls):
     super(TestRunSql, cls).setUpClass()
     cls.db = TemporaryFilesystemBlobDB()
     for db in get_db_aliases_for_partitioned_query():
         with connections[db].cursor() as cursor:
             cursor.execute("""
             DROP TRIGGER IF EXISTS legacy_xform_attachment_insert_not_allowed
                 ON form_processor_xformattachmentsql;
             """)
     # this test requires a clean slate (no forms or blob metadata)
     cls.delete_all_forms_and_blob_metadata()
Ejemplo n.º 25
0
    def _case_to_case_index_ratio(self):
        if not should_use_sql_backend(self.domain):
            self.stdout.write('\nUnable to get case to index ratio of Couch domain\n')
            return

        db_name = get_db_aliases_for_partitioned_query()[0]  # just query one shard DB
        case_query = CommCareCaseSQL.objects.using(db_name).filter(domain=self.domain)
        index_query = CommCareCaseIndexSQL.objects.using(db_name).filter(domain=self.domain)
        case_count = estimate_row_count(case_query, db_name)
        case_index_count = estimate_row_count(index_query, db_name)
        self._print_value('Ratio of cases to case indices: 1 : ', case_index_count / case_count)
Ejemplo n.º 26
0
 def _case_to_case_index_ratio(self):
     db_name = get_db_aliases_for_partitioned_query()[
         0]  # just query one shard DB
     case_query = CommCareCase.objects.using(db_name).filter(
         domain=self.domain)
     index_query = CommCareCaseIndex.objects.using(db_name).filter(
         domain=self.domain)
     case_count = estimate_row_count(case_query, db_name)
     case_index_count = estimate_row_count(index_query, db_name)
     self._print_value('Ratio of cases to case indices: 1 : ',
                       case_index_count / case_count)
Ejemplo n.º 27
0
    def _get_sql_forms_received_on_date(self, date):
        num_forms = 0
        dbs = get_db_aliases_for_partitioned_query()
        for db in dbs:
            num_forms += (XFormInstanceSQL.objects.using(db).filter(
                received_on__gte=date,
                received_on__lt=date + relativedelta(months=1)).filter(
                    state=XFormInstanceSQL.NORMAL).exclude(
                        xmlns=DEVICE_LOG_XMLNS).count())

        return num_forms
Ejemplo n.º 28
0
    def tearDownClass(cls):
        cls.db.close()
        super(TestRunSql, cls).tearDownClass()

        for db in get_db_aliases_for_partitioned_query():
            with connections[db].cursor() as cursor:
                cursor.execute("""
                CREATE TRIGGER legacy_xform_attachment_insert_not_allowed
                    BEFORE INSERT ON form_processor_xformattachmentsql
                    EXECUTE PROCEDURE insert_not_allowed();
                """)
Ejemplo n.º 29
0
    def _get_cases(self):
        dbs = get_db_aliases_for_partitioned_query()
        for db in dbs:
            ccs_record_case_ids = (CommCareCaseSQL.objects.using(db).filter(
                domain=self.domain, type='ccs_record',
                closed=False).values_list('case_id', flat=True))

            for case_ids in chunked(ccs_record_case_ids, 100):
                cases = self.case_accessor.get_cases(list(case_ids))
                for case in cases:
                    yield case
Ejemplo n.º 30
0
    def _get_sql_cases_modified_on_date(self, date):
        num_cases = 0
        dbs = get_db_aliases_for_partitioned_query()
        for db in dbs:
            num_cases += (
                CommCareCaseSQL.objects
                .using(db)
                .filter(server_modified_on__gte=date, server_modified_on__lt=date + relativedelta(months=1))
                .count()
            )

        return num_cases
Ejemplo n.º 31
0
def _get_sql_cases_by_doc_type(domain, startdate=None, enddate=None):
    counter = Counter()
    for db_alias in get_db_aliases_for_partitioned_query():
        queryset = CommCareCaseSQL.objects.using(db_alias).filter(domain=domain)
        if startdate is not None:
            queryset = queryset.filter(server_modified_on__gte=startdate)
        if enddate is not None:
            queryset = queryset.filter(server_modified_on__lt=enddate)
        counter['CommCareCase'] += queryset.filter(deleted=False).count()
        counter['CommCareCase-Deleted'] += queryset.filter(deleted=True).count()

    return counter
Ejemplo n.º 32
0
 def first_form_received_on(self):
     min_date = datetime(2200, 1, 1)
     for db in get_db_aliases_for_partitioned_query():
         result = XFormInstanceSQL.objects.using(db).filter(
             domain=self.domain).aggregate(Min('received_on'))
         date = result.get('received_on__min')
         if date and date < min_date:
             min_date = date
     if min_date.year == 2200:
         return None
     else:
         return min_date
Ejemplo n.º 33
0
    def get_case_owner_ids(domain):
        from corehq.sql_db.util import get_db_aliases_for_partitioned_query
        db_aliases = get_db_aliases_for_partitioned_query()
        owner_ids = set()
        for db_alias in db_aliases:
            owner_ids.update(
                fast_distinct_in_domain(CommCareCaseSQL,
                                        'owner_id',
                                        domain,
                                        using=db_alias))

        return owner_ids
Ejemplo n.º 34
0
 def setup_days_records(cls, day):
     for doc_type, ucr_table_mapping in UCR_MAPPING.items():
         for table_id, doc_type_filters in ucr_table_mapping.items():
             for doc_type_filter in doc_type_filters:
                 for db in get_db_aliases_for_partitioned_query():
                     cls.objects.get_or_create(
                         db_alias=db,
                         day=day,
                         doc_type=doc_type,
                         table_id=table_id,
                         doc_type_filter=doc_type_filter,
                     )
Ejemplo n.º 35
0
 def first_form_received_on(self):
     min_date = datetime(2200, 1, 1)
     for db in get_db_aliases_for_partitioned_query():
         result = XFormInstanceSQL.objects.using(db).filter(
             domain=self.domain).aggregate(Min('received_on'))
         date = result.get('received_on__min')
         if date and date < min_date:
             min_date = date
     if min_date.year == 2200:
         return None
     else:
         return min_date
Ejemplo n.º 36
0
def iter_sql_cases_with_sorted_transactions(domain):
    sql = f"""
        SELECT cx.case_id
        FROM {CommCareCaseSQL._meta.db_table} cx
        INNER JOIN {CaseTransaction._meta.db_table} tx ON cx.case_id = tx.case_id
        WHERE cx.domain = %s AND tx.details LIKE %s
    """
    reason = f'%{SortTransactionsRebuild._REASON}%'
    for dbname in get_db_aliases_for_partitioned_query():
        with CommCareCaseSQL.get_cursor_for_partition_db(dbname) as cursor:
            cursor.execute(sql, [domain, reason])
            yield from iter(set(case_id for case_id, in cursor.fetchall()))
 def handle(self, **options):
     verbose = options['verbose']
     csv_mode = options['csv']
     if csv_mode:
         print('shard_id,model_name,doc_count,valid/invalid')
     for database in get_db_aliases_for_partitioned_query():
         if verbose:
             print('Checking database {}...'.format(database))
         shard_info = get_database_shard_info_for_testing(database)
         if options['csv']:
             print(shard_info.to_csv())
         else:
             print(shard_info)
Ejemplo n.º 38
0
def delete_timed_schedule_instances_for_schedule(cls, schedule_id):
    from corehq.messaging.scheduling.scheduling_partitioned.models import (
        TimedScheduleInstance,
        CaseTimedScheduleInstance,
    )

    if cls not in (TimedScheduleInstance, CaseTimedScheduleInstance):
        raise TypeError("Expected TimedScheduleInstance or CaseTimedScheduleInstance")

    _validate_uuid(schedule_id)

    for db_name in get_db_aliases_for_partitioned_query():
        cls.objects.using(db_name).filter(timed_schedule_id=schedule_id).delete()
 def handle(self, **options):
     verbose = options['verbose']
     csv_mode = options['csv']
     if csv_mode:
         print('shard_id,model_name,doc_count,valid/invalid')
     for database in get_db_aliases_for_partitioned_query():
         if verbose:
             print('Checking database {}...'.format(database))
         shard_info = get_database_shard_info_for_testing(database)
         if options['csv']:
             print(shard_info.to_csv())
         else:
             print(shard_info)
Ejemplo n.º 40
0
    def _case_to_case_index_ratio(self):
        if not should_use_sql_backend(self.domain):
            self.stdout.write('\nUnable to get case to index ratio of Couch domain\n')
            return

        db_name = get_db_aliases_for_partitioned_query()[0]  # just query one shard DB
        case_count = _get_count_from_explain(
            db_name, CommCareCaseSQL.objects.using(db_name).filter(domain=self.domain)
        )
        case_index_count = _get_count_from_explain(
            db_name, CommCareCaseIndexSQL.objects.using(db_name).filter(domain=self.domain)
        )
        self._print_value('Ratio of cases to case indices', case_count // float(case_index_count))
Ejemplo n.º 41
0
def delete_timed_schedule_instances_for_schedule(cls, schedule_id):
    from corehq.messaging.scheduling.scheduling_partitioned.models import (
        TimedScheduleInstance,
        CaseTimedScheduleInstance,
    )

    if cls not in (TimedScheduleInstance, CaseTimedScheduleInstance):
        raise TypeError("Expected TimedScheduleInstance or CaseTimedScheduleInstance")

    _validate_uuid(schedule_id)

    for db_name in get_db_aliases_for_partitioned_query():
        cls.objects.using(db_name).filter(timed_schedule_id=schedule_id).delete()
Ejemplo n.º 42
0
 def _get_ids_to_process(self):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     if self.database:
         dbs = [db for db in dbs if db == self.database]
     for db in dbs:
         ids_ = self._get_ids(db)
         num_ids = len(ids_)
         print("processing %d docs from db %s" % (num_ids, db))
         for i, id_ in enumerate(ids_):
             yield id_
             if i % 1000 == 0:
                 print("processed %d / %d docs from db %s" % (i, num_ids, db))
Ejemplo n.º 43
0
def run_messaging_rule(domain, rule_id):
    rule = _get_cached_rule(domain, rule_id)
    if not rule:
        return
    progress_helper = MessagingRuleProgressHelper(rule_id)
    total_cases_count = CaseES().domain(domain).case_type(
        rule.case_type).count()
    progress_helper.set_total_cases_to_be_processed(total_cases_count)

    db_aliases = get_db_aliases_for_partitioned_query()
    progress_helper.set_initial_progress(shard_count=len(db_aliases))
    for db_alias in db_aliases:
        run_messaging_rule_for_shard.delay(domain, rule_id, db_alias)
Ejemplo n.º 44
0
 def _get_person_case_ids_to_process():
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     for db in dbs:
         case_ids = (CommCareCaseSQL.objects.using(db).filter(
             domain=DOMAIN, type="person").values_list('case_id',
                                                       flat=True))
         num_case_ids = len(case_ids)
         print("processing %d docs from db %s" % (num_case_ids, db))
         for i, case_id in enumerate(case_ids):
             yield case_id
             if i % 1000 == 0:
                 print("processed %d / %d docs from db %s" %
                       (i, num_case_ids, db))
Ejemplo n.º 45
0
    def _get_sql_forms_received_on_date(self, date):
        num_forms = 0
        dbs = get_db_aliases_for_partitioned_query()
        for db in dbs:
            num_forms += (
                XFormInstanceSQL.objects
                .using(db)
                .filter(received_on__gte=date, received_on__lt=date + relativedelta(months=1))
                .filter(state=XFormInstanceSQL.NORMAL)
                .exclude(xmlns=DEVICE_LOG_XMLNS)
                .count()
            )

        return num_forms
Ejemplo n.º 46
0
 def _get_ids_to_process(self):
     from corehq.sql_db.util import get_db_aliases_for_partitioned_query
     dbs = get_db_aliases_for_partitioned_query()
     if self.database:
         dbs = [db for db in dbs if db == self.database]
     for db in dbs:
         ids_ = self._get_ids(db)
         num_ids = len(ids_)
         print("processing %d docs from db %s" % (num_ids, db))
         for i, id_ in enumerate(ids_):
             yield id_
             if i % 1000 == 0:
                 print("processed %d / %d docs from db %s" %
                       (i, num_ids, db))
    def _get_cases(self):
        dbs = get_db_aliases_for_partitioned_query()
        for db in dbs:
            ccs_record_case_ids = (
                CommCareCaseSQL.objects
                .using(db)
                .filter(domain=self.domain, type='ccs_record', closed=False)
                .values_list('case_id', flat=True)
            )

            for case_ids in chunked(ccs_record_case_ids, 100):
                cases = self.case_accessor.get_cases(list(case_ids))
                for case in cases:
                    yield case
Ejemplo n.º 48
0
def get_sql_case_ids(domain, doc_type, startdate, enddate):
    sql_ids = set()
    deleted = doc_type == 'CommCareCase-Deleted'
    for db_alias in get_db_aliases_for_partitioned_query():
        queryset = CommCareCaseSQL.objects.using(db_alias) \
            .filter(domain=domain, deleted=deleted)

        if startdate:
            queryset = queryset.filter(server_modified_on__gte=startdate)

        if enddate:
            queryset = queryset.filter(server_modified_on__lt=enddate)

        sql_ids.update(list(queryset.values_list('case_id', flat=True)))
    return sql_ids
Ejemplo n.º 49
0
def _get_stale_data(run_config):
    for db in get_db_aliases_for_partitioned_query():
        print(f"Starting db {db}")
        matching_records_for_db = _get_primary_data_for_db(db, run_config)
        chunk_size = 1000
        for chunk in chunked(matching_records_for_db, chunk_size):
            doc_ids = [val[0] for val in chunk]
            ucr_insertion_dates = _get_ucr_insertion_dates(run_config.domain, run_config.table_id, doc_ids)
            for doc_id, doc_type, sql_modified_on in chunk:
                ucr_insert_date = ucr_insertion_dates.get(doc_id)
                if (not ucr_insert_date
                        # Handle small time drift between databases
                        or (sql_modified_on - ucr_insert_date) < timedelta(seconds=1)):
                    ucr_date_string = ucr_insert_date.isoformat() if ucr_insert_date else ''
                    yield (doc_id, doc_type, ucr_date_string, sql_modified_on.isoformat())
Ejemplo n.º 50
0
def run_case_update_rules_for_domain(domain, now=None):
    now = now or datetime.utcnow()

    run_record = DomainCaseRuleRun.objects.create(
        domain=domain,
        started_on=datetime.utcnow(),
        status=DomainCaseRuleRun.STATUS_RUNNING,
    )

    if should_use_sql_backend(domain):
        for db in get_db_aliases_for_partitioned_query():
            run_case_update_rules_for_domain_and_db.delay(domain, now, run_record.pk, db=db)
    else:
        # explicitly pass db=None so that the serial task decorator has access to db in the key generation
        run_case_update_rules_for_domain_and_db.delay(domain, now, run_record.pk, db=None)
Ejemplo n.º 51
0
    def handle(self, **options):
        verbose = options['verbosity'] >= 2
        if verbose:
            global log_sql
            log_sql = log_sql_verbose

        for db in get_db_aliases_for_partitioned_query():
            if _index_exists(db, UNIQIE_INDEX_NAME):
                print(self.style.SUCCESS('Unique index already exists on db: {}'.format(db)))
                continue

            _add_temp_index(db)

            case_ids = _get_case_ids_with_dupe_indices(db)
            attempts = 0
            while case_ids and attempts < 3:
                attempts += 1
                print('{} cases found with duplicate indices. DB: {}, attempt: {}'.format(
                    len(case_ids), db, attempts)
                )
                _delete_duplicate_indices(case_ids, db)
                case_ids = _get_case_ids_with_dupe_indices(db)

            if case_ids:
                print(self.style.ERROR(
                    '{} cases still have duplicate '
                    'indices after 3 attempts for db: {}'.format(len(case_ids), db))
                )
                grouped_indices = groupby(
                    CommCareCaseIndexSQL.objects.using(db)
                    .filter(case_id__in=case_ids), key=lambda c: c.case_id
                )
                for case_id, indices in grouped_indices:
                    print('--> Case: {}\n'.format(case_id))
                    print('    {}'.format('\n    '.join(six.text_type(i) for i in indices)))
                print('\n')
            else:
                print(self.style.WARNING('Attempting to create unique index and constraint for db: {}'.format(db)))
                try:
                    _add_unique_constraint_to_case_index_table(db)
                except Exception as e:
                    print(self.style.ERROR('Failed to create unique constraint on DB {}: {}'.format(db, e)))
                    print(self.style.WARNING('Temporary index left in place'))
                else:
                    print(self.style.SUCCESS('Unique constraint added to db {}'.format(db)))
                    _drop_index(db, IDENTIFIER_INDEX_NAME)
Ejemplo n.º 52
0
def check_db_tables(app_configs, **kwargs):
    from corehq.sql_db.routers import ICDS_REPORTS_APP, ICDS_MODEL
    from corehq.sql_db.models import PartitionedModel
    from corehq.sql_db.util import get_db_aliases_for_partitioned_query

    errors = []

    # some apps only apply to specific envs
    env_specific_apps = {
        ICDS_MODEL: settings.ICDS_ENVS,
        ICDS_REPORTS_APP: settings.ICDS_ENVS
    }

    skip = (
        'warehouse',  # remove this once the warehouse tables are created
    )

    def _check_model(model_class, using=None):
        try:
            model_class._default_manager.using(using).all().exists()
        except Exception as e:
            return Error('Error querying model on database "{}": "{}.{}": {}.{}({})'.format(
                using or 'default',
                model_class._meta.app_label, model_class.__name__,
                e.__class__.__module__, e.__class__.__name__,
                e
            ))

    for model in apps.get_models():
        app_label = model._meta.app_label
        if app_label in skip:
            continue

        enabled_envs = env_specific_apps.get(app_label)
        if enabled_envs and settings.SERVER_ENVIRONMENT not in enabled_envs:
            continue

        if issubclass(model, PartitionedModel):
            for db in get_db_aliases_for_partitioned_query():
                error = _check_model(model, using=db)
                error and errors.append(error)
        else:
            error = _check_model(model)
            error and errors.append(error)
    return errors
Ejemplo n.º 53
0
    def sync_cases(self, domain):
        db_aliases = get_db_aliases_for_partitioned_query()
        db_aliases.sort()

        if should_use_sql_backend(domain):
            case_accessor = CaseReindexAccessor(domain)
            case_ids = (case.case_id for case in iter_all_rows(case_accessor))
        else:
            changes = _get_case_iterator(domain).iter_all_changes()
            case_ids = (case.id for case in changes)

        next_event = time.time() + 10
        for i, case_id in enumerate(case_ids):
            sync_case_for_messaging.delay(domain, case_id)

            if time.time() > next_event:
                print("Queued %d cases for domain %s" % (i + 1, domain))
                next_event = time.time() + 10
    def handle(self, db_name, **options):
        db_names = get_db_aliases_for_partitioned_query()
        if db_name or len(db_names) == 1:
            _update_forms_in_db(db_name or db_names[0])
        else:
            if not confirm('Running without "db_name" will execute on ALL databases in parallel. Continue?'):
                raise CommandError('abort')

            greenlets = []
            for db_name in db_names:
                g = gevent.spawn(_update_forms_in_db, db_name)
                greenlets.append(g)

            gevent.joinall(greenlets)
            try:
                for job in greenlets:
                    job.get()
            except Exception:
                traceback.print_exc()
Ejemplo n.º 55
0
def get_count_of_active_schedule_instances_due(domain, due_before):
    from corehq.messaging.scheduling.scheduling_partitioned.models import (
        AlertScheduleInstance,
        TimedScheduleInstance,
        CaseAlertScheduleInstance,
        CaseTimedScheduleInstance,
    )

    classes = (AlertScheduleInstance, TimedScheduleInstance, CaseAlertScheduleInstance, CaseTimedScheduleInstance)

    result = 0

    for db_alias in get_db_aliases_for_partitioned_query():
        for cls in classes:
            result += cls.objects.using(db_alias).filter(
                domain=domain,
                active=True,
                next_event_due__lt=due_before
            ).count()

    return result
Ejemplo n.º 56
0
def get_last_growth_monitoring_form(domain, case_id):
    transactions = CaseAccessorSQL.get_transactions_for_case_rebuild(case_id)
    form_ids = [t.form_id for t in transactions if t.form_id]

    forms_under_consideration = []

    db_names = get_db_aliases_for_partitioned_query()
    for db_name in db_names:
        result = XFormInstanceSQL.objects.using(db_name).filter(
            domain=domain,
            form_id__in=form_ids,
            xmlns=GROWTH_MONITORING_XMLNS,
            state=XFormInstanceSQL.NORMAL,
        ).order_by('-received_on').first()
        if result:
            forms_under_consideration.append(result)

    if not forms_under_consideration:
        return None

    forms_under_consideration.sort(key=lambda form: form.received_on, reverse=True)
    return forms_under_consideration[0]
Ejemplo n.º 57
0
    def handle(self, domain, **options):
        debug = options.get('debug')
        domain = options.get('domain')
        case_ids = options.get('case_id')
        db = options.get('db')

        self.log_filename = 'undo_uuid_clash.{}.log'.format(datetime.utcnow().isoformat())
        print('\nWriting output to log file: {}\n'.format(self.log_filename))

        if case_ids:
            form_ids = set()
            for case in CaseAccessorSQL.get_cases(case_ids):
                assert not domain or case.domain == domain, 'Case "%s" not in domain "%s"' % (case.case_id, domain)
                form_ids.update(case.xform_ids)

            with self:
                check_and_process_forms(form_ids, self, debug)
        else:
            if domain:
                domains = [domain]
            else:
                domains = iter_domains()

            for domain in domains:
                print("Checking domain: %s" % domain)
                form_ids_to_check = set()
                dbs = [db] if db else get_db_aliases_for_partitioned_query()
                for dbname in dbs:
                    form_ids_to_check.update(
                        XFormInstanceSQL.objects.using(dbname)
                        .filter(domain=domain, state=XFormInstanceSQL.DEPRECATED)
                        .values_list('orig_id', flat=True)
                    )

                print('  Found %s forms to check' % len(form_ids_to_check))
                with self:
                    for chunk in chunked(form_ids_to_check, 500):
                        check_and_process_forms(chunk, self, debug)
Ejemplo n.º 58
0
def delete_expired_blobs():
    run_again = False
    bytes_deleted = 0
    for dbname in get_db_aliases_for_partitioned_query():
        expired = list(BlobMeta.objects.using(dbname).filter(
            expires_on__isnull=False,
            expires_on__lt=_utcnow(),
        )[:1000])
        if not expired:
            continue
        if len(expired) == 1000:
            run_again = True
        get_blob_db().bulk_delete(metas=expired)
        log.info("deleted expired blobs: %r", [m.key for m in expired])
        shard_deleted = sum(m.content_length for m in expired)
        bytes_deleted += shard_deleted
        datadog_counter('commcare.temp_blobs.bytes_deleted', value=shard_deleted)

    legacy_exists, legacy_bytes = _delete_legacy_expired_blobs()
    if run_again or legacy_exists:
        delete_expired_blobs.delay()

    return bytes_deleted + legacy_bytes