コード例 #1
0
def _wrap_form(doc):
    if doc['doc_type'] in form_doc_types():
        return form_doc_types()[doc['doc_type']].wrap(doc)
    if doc['doc_type'] in ("XFormInstance-Deleted", "HQSubmission"):
        return XFormInstance.wrap(doc)
コード例 #2
0
class MissingIds:
    """Iterator of document ids found in Couch but not SQL"""
    @classmethod
    def forms(cls, *args, **kw):
        return cls(cls.FORM, *args, **kw)

    entity = attr.ib()
    statedb = attr.ib()
    stopper = attr.ib()
    resume = attr.ib(default=True, kw_only=True)
    tag = attr.ib(default="missing", kw_only=True)
    chunk_size = attr.ib(default=5000, kw_only=True)

    missing_docs_sql = """
        SELECT couch.{doc_id}
        FROM (SELECT unnest(%s) AS {doc_id}) AS couch
        LEFT JOIN {table} sql USING ({doc_id})
        WHERE sql.{doc_id} IS NULL
    """

    FORM = "form"
    CASE = "case"

    sql_params = {
        FORM: {
            "doc_id": "form_id",
            "table": "form_processor_xforminstancesql"
        },
        CASE: {
            "doc_id": "case_id",
            "table": "form_processor_commcarecasesql"
        },
    }

    _doc_types = {
        FORM:
        list(form_doc_types()) + ["HQSubmission", "XFormInstance-Deleted"],
        CASE: ['CommCareCase', 'CommCareCase-Deleted'],
    }

    def __attrs_post_init__(self):
        self.domain = self.statedb.domain
        self.counter = DocCounter(self.statedb)
        self.doc_types = self._doc_types[self.entity]
        sql_params = self.sql_params[self.entity]
        self.sql = self.missing_docs_sql.format(**sql_params)
        self._iteration_keys = set()

    def __call__(self, doc_type):
        """Create a missing ids generator for the given doc type

        Default datadog tags (varies on `self.tag`):
        - type:find_missing_forms
        - type:find_missing_cases
        """
        if self.stopper.clean_break:
            return
        assert doc_type in self.doc_types, \
            f"'{doc_type}' is not a {self.entity} doc type"
        dd_type = f"find_{self.tag}_{self.entity}s"
        count_key = f"{doc_type}.id.{self.tag}"
        resume_key = f"{self.domain}.{count_key}.{self.statedb.unique_id}"
        if not self.resume:
            self.discard_iteration_state(resume_key)
            self.counter.pop(count_key)
        couch_ids = _iter_docs(self.domain, f"{doc_type}.id", resume_key,
                               self.stopper)
        couch_ids = self.with_progress(doc_type, couch_ids, count_key)
        with self.counter(dd_type, count_key) as add_docs:
            for batch in chunked(couch_ids, self.chunk_size, list):
                yield from self.drop_sql_ids(batch)
                add_docs(len(batch))
        self._iteration_keys.add((doc_type, count_key, resume_key))

    def drop_sql_ids(self, couch_ids):
        """Filter the given couch ids, removing ids that are in SQL"""
        for dbname, form_ids in split_list_by_db_partition(couch_ids):
            with XFormInstanceSQL.get_cursor_for_partition_db(
                    dbname, readonly=True) as cursor:
                cursor.execute(self.sql, [form_ids])
                yield from (form_id for form_id, in cursor.fetchall())

    def with_progress(self, doc_type, iterable, count_key):
        couchdb = XFormInstance.get_db()
        return with_progress_bar(
            iterable,
            get_doc_count_in_domain_by_type(self.domain, doc_type, couchdb),
            prefix=f"Scanning {doc_type}",
            offset=self.counter.get(count_key),
            oneline="concise",
        )

    def __enter__(self):
        if self.stopper.live_migrate and not hasattr(self.stopper,
                                                     "stop_date"):
            self.stopper.stop_date = get_main_forms_iteration_stop_date(
                self.statedb)
        self.counter.__enter__()
        return self

    def __exit__(self, *exc_info):
        if self.stopper.live_migrate and hasattr(self.stopper, "stop_date"):
            # remove stop date so main forms iteration may continue
            del self.stopper.stop_date
        if self.stopper.clean_break or exc_info[1] is not None:
            # incomplete iteration
            return
        # discard iteration state so it is possible to do again later
        for doc_type, count_key, resume_key in self._iteration_keys:
            self.discard_iteration_state(resume_key)
            self.reset_doc_count(doc_type, count_key)
        self.counter.__exit__(*exc_info)

    @staticmethod
    def discard_iteration_state(resume_key):
        ResumableFunctionIterator(resume_key, None, None, None).discard_state()

    def reset_doc_count(self, doc_type, count_key):
        count = self.counter.pop(count_key)
        if not self.stopper.live_migrate:
            couchdb = XFormInstance.get_db()
            count = get_doc_count_in_domain_by_type(self.domain, doc_type,
                                                    couchdb)
        self.statedb.set_counter(doc_type, count)
コード例 #3
0
ファイル: missingdocs.py プロジェクト: fmagege/commcare-hq
class MissingIds:
    """Iterator of document ids found in Couch but not SQL"""
    @classmethod
    def forms(cls, statedb):
        return cls(cls.FORM, statedb, None)

    @classmethod
    def cases(cls, statedb):
        return cls(cls.CASE, statedb, None)

    entity = attr.ib()
    statedb = attr.ib()
    stopper = attr.ib()
    resume = attr.ib(default=True, kw_only=True)
    tag = attr.ib(default="missing", kw_only=True)
    chunk_size = attr.ib(default=5000, kw_only=True)

    missing_docs_sql = """
        SELECT couch.{doc_id}
        FROM (SELECT unnest(%s) AS {doc_id}) AS couch
        LEFT JOIN {table} sql USING ({doc_id})
        WHERE sql.{doc_id} IS NULL
    """

    FORM = "form"
    CASE = "case"

    sql_params = {
        FORM: {
            "doc_id": "form_id",
            "table": "form_processor_xforminstancesql"
        },
        CASE: {
            "doc_id": "case_id",
            "table": "form_processor_commcarecasesql"
        },
    }

    form_types = list(
        form_doc_types()) + ["HQSubmission", "XFormInstance-Deleted"]
    case_types = ['CommCareCase', 'CommCareCase-Deleted']
    _doc_types = {
        FORM: form_types,
        CASE: case_types,
    }

    def __attrs_post_init__(self):
        self.domain = self.statedb.domain
        self.counter = DocCounter(self.statedb)
        self.doc_types = self._doc_types[self.entity]
        sql_params = self.sql_params[self.entity]
        self.sql = self.missing_docs_sql.format(**sql_params)
        self._count_keys = set()

    def __call__(self, doc_type):
        """Create a missing ids generator for the given doc type

        Default datadog tags (varies on `self.tag`):
        - type:find_missing_forms
        - type:find_missing_cases
        """
        if self.stopper.clean_break:
            return
        assert doc_type in self.doc_types, \
            f"'{doc_type}' is not a {self.entity} doc type"
        dd_type = f"find_{self.tag}_{self.entity}s"
        count_key = f"{doc_type}.id.{self.tag}"
        resume_key = f"{self.domain}.{count_key}.{self.statedb.unique_id}"
        if not self.resume:
            self.discard_iteration_state(resume_key)
            self.counter.pop(count_key)
        couch_ids = _iter_docs(self.domain, f"{doc_type}.id", resume_key,
                               self.stopper)
        couch_ids = self.with_progress(doc_type, couch_ids, count_key)
        if self.entity == self.CASE:
            drop_if_not_missing = self.drop_not_missing_case_ids
        else:
            drop_if_not_missing = self.drop_sql_ids
        with self.counter(dd_type, count_key) as add_docs:
            for batch in chunked(couch_ids, self.chunk_size, list):
                yield from drop_if_not_missing(batch)
                add_docs(len(batch))
        self._count_keys.add((doc_type, count_key))

    def drop_sql_ids(self, couch_ids):
        """Filter the given couch ids, removing ids that are in SQL"""
        for dbname, form_ids in split_list_by_db_partition(couch_ids):
            with XFormInstanceSQL.get_cursor_for_partition_db(
                    dbname, readonly=True) as cursor:
                cursor.execute(self.sql, [form_ids])
                yield from (form_id for form_id, in cursor.fetchall())

    def drop_not_missing_case_ids(self, case_ids):
        def modified_since_stop_date(case_id):
            case = cases.get(case_id)
            if case is None or not case.actions:
                return False
            return any(a.server_date > stop_date for a in case.actions
                       if a.server_date)
コード例 #4
0
def _wrap_form(doc):
    if doc['doc_type'] in form_doc_types():
        return form_doc_types()[doc['doc_type']].wrap(doc)
    if doc['doc_type'] in ("XFormInstance-Deleted", "HQSubmission"):
        return XFormInstance.wrap(doc)