Ejemplo n.º 1
0
def _iter_missing_ids(db, min_tries, resume_key, view_name, view_params, repair):
    def data_function(**view_kwargs):
        @retry_on_couch_error
        def get_doc_ids():
            results = list(db.view(view_name, **view_kwargs))
            if "limit" in view_kwargs and results:
                nonlocal last_result
                last_result = results[-1]
                replace_limit_with_endkey(view_kwargs, last_result)
            return {r["id"] for r in results}

        def replace_limit_with_endkey(view_kwargs, last_result):
            assert "endkey_docid" not in view_kwargs, view_kwargs
            view_kwargs.pop("limit")
            view_kwargs["endkey"] = last_result["key"]
            view_kwargs["endkey_docid"] = last_result["id"]

        last_result = None
        missing, tries = find_missing_ids(get_doc_ids, min_tries=min_tries)
        if last_result is None:
            log.debug("no results %s - %s", view_kwargs['startkey'], view_kwargs['endkey'])
            assert not missing
            return []
        if missing and repair:
            missing, tries2, repaired = repair_couch_docs(db, missing, get_doc_ids, min_tries)
            tries += tries2
        else:
            repaired = 0
        log.debug(f"{len(missing)}/{tries} start={view_kwargs['startkey']} {missing or ''}")
        last_result["missing_info"] = missing, tries, repaired
        return [last_result]

    args_provider = NoSkipArgsProvider(view_params)
    return ResumableFunctionIterator(resume_key, data_function, args_provider)
Ejemplo n.º 2
0
def _iter_docs(domain, doc_type, resume_key, stopper):
    def data_function(**view_kwargs):
        return couch_db.view('by_domain_doc_type_date/view', **view_kwargs)

    if "." in doc_type:
        doc_type, row_key = doc_type.split(".")
    else:
        row_key = "doc"

    if stopper.clean_break:
        return []
    couch_db = XFormInstance.get_db()
    args_provider = NoSkipArgsProvider({
        'startkey': [domain, doc_type],
        'endkey': [domain, doc_type, {}],
        'limit': _iter_docs.chunk_size,
        'include_docs': row_key == "doc",
        'reduce': False,
    })
    rows = ResumableFunctionIterator(
        resume_key,
        data_function,
        args_provider,
        item_getter=None,
        event_handler=MigrationPaginationEventHandler(domain, stopper))
    return (row[row_key] for row in rows)
Ejemplo n.º 3
0
def get_main_forms_iteration_stop_date(domain_name, migration_id):
    resume_key = "%s.%s.%s" % (domain_name, "XFormInstance", migration_id)
    itr = ResumableFunctionIterator(resume_key, None, None, None)
    kwargs = itr.state.kwargs
    assert kwargs, f"migration state not found: {resume_key}"
    # this is tightly coupled to by_domain_doc_type_date/view in couch:
    # the last key element is expected to be a datetime
    return kwargs["startkey"][-1]
Ejemplo n.º 4
0
def get_main_forms_iteration_stop_date(statedb):
    resume_key = f"{statedb.domain}.XFormInstance.{statedb.unique_id}"
    itr = ResumableFunctionIterator(resume_key, None, None, None)
    kwargs = itr.state.kwargs
    assert kwargs, f"migration state not found: {resume_key}"
    # this is tightly coupled to by_domain_doc_type_date/view in couch:
    # the last key element is expected to be a datetime
    return kwargs["startkey"][-1]
Ejemplo n.º 5
0
def get_endkey_docid(domain, doc_type, migration_id):
    resume_key = "%s.%s.%s" % (domain, doc_type, migration_id)
    state = ResumableFunctionIterator(resume_key, None, None, None).state
    assert getattr(state, '_rev',
                   None), "rebuild not necessary (no resume state)"
    assert not state.complete, "iteration is complete"
    state_json = state.to_json()
    assert not state_json['args']
    kwargs = state_json['kwargs']
    return kwargs['startkey'], kwargs['startkey_docid']
Ejemplo n.º 6
0
    def get_iterator(self, missing_items=None):
        def data_provider(batch_number):
            try:
                return self.batches[batch_number]
            except IndexError:
                return []

        itr = ResumableFunctionIterator('test', data_provider, TestArgsProvider())
        itr.couch_db = self.couch_db
        return itr
Ejemplo n.º 7
0
def _iter_skipped_form_ids(domain, migration_id, stopper, with_progress):
    resume_key = "%s.%s.%s" % (domain, "XFormInstance.id", migration_id)
    couch_ids = _iter_docs(domain, "XFormInstance.id", resume_key, stopper)
    couch_ids = with_progress(["XFormInstance"],
                              couch_ids,
                              "Scanning",
                              offset_key="XFormInstance.id")
    for batch in chunked(couch_ids, _iter_skipped_form_ids.chunk_size, list):
        yield from _drop_sql_form_ids(batch, domain)
    if not stopper.clean_break:
        # discard iteration state on successful completion so it is possible
        # to run another skipped forms iteration later
        ResumableFunctionIterator(resume_key, None, None, None).discard_state()
Ejemplo n.º 8
0
 def _get_paginated_iterable(data_function,
                             args_provider,
                             event_handler=None,
                             resumable_key=None):
     if resumable_key:
         return ResumableFunctionIterator(resumable_key,
                                          data_function,
                                          args_provider,
                                          lambda x: x.id,
                                          event_handler=event_handler)
     else:
         return paginate_function(data_function,
                                  args_provider,
                                  event_handler=event_handler)
Ejemplo n.º 9
0
 def __attrs_post_init__(self):
     migration_id = self.statedb.unique_id
     resume_key = "%s.%s.%s" % (self.domain, self.doc_type, migration_id)
     self.itr = ResumableFunctionIterator(resume_key, None, None, None)
     for method, regex in [
         ("case_rewind", r"^case-(\d+)$"),
         ("resume_rewind", r"^resume-"),
     ]:
         match = re.search(regex, self.move_to)
         if match:
             getattr(self, method)(match)
             break
     else:
         raise NotImplementedError(self.move_to)
Ejemplo n.º 10
0
    def get_iterator(self, missing_items=None):
        def data_provider(batch_number):
            try:
                return self.batches[batch_number]
            except IndexError:
                return []

        def item_getter(item_id):
            if missing_items and item_id in missing_items:
                return None
            return int(item_id)

        itr = ResumableFunctionIterator('test', data_provider,
                                        TestArgsProvider(), item_getter)
        itr.couch_db = self.couch_db
        return itr
Ejemplo n.º 11
0
def _iter_docs(domain, doc_type, resume_key, stopper):
    @retry_on_couch_error
    def data_function(**view_kwargs):
        view_name = 'by_domain_doc_type_date/view'
        results = list(couch_db.view(view_name, **view_kwargs))
        assert all(r['key'][0] == domain for r in results), \
            _repr_bad_results(view_name, view_kwargs, results, domain)
        return results

    if "." in doc_type:
        doc_type, row_key = doc_type.split(".")
    else:
        row_key = "doc"

    if stopper.clean_break:
        return []
    couch_db = XFormInstance.get_db()
    args_provider = NoSkipArgsProvider({
        'startkey': [domain, doc_type],
        'endkey': [domain, doc_type, {}],
        'limit': _iter_docs.chunk_size,
        'include_docs': row_key == "doc",
        'reduce': False,
    })
    rows = ResumableFunctionIterator(
        resume_key,
        data_function,
        args_provider,
        item_getter=None,
        event_handler=MigrationPaginationEventHandler(domain, stopper))
    if rows.state.is_resume():
        log.info("iteration state: %r", rows.state.to_json())
    row = None
    try:
        for row in rows:
            yield row[row_key]
    finally:
        if row is not None:
            row_copy = dict(row)
            row_copy.pop("doc", None)
            log.info("last item: %r", row_copy)
        log.info("final iteration state: %r", rows.state.to_json())
Ejemplo n.º 12
0
 def discard_iteration_state(resume_key):
     ResumableFunctionIterator(resume_key, None, None, None).discard_state()
Ejemplo n.º 13
0
 def __attrs_post_init__(self):
     migration_id = self.statedb.unique_id
     resume_key = "%s.%s.%s" % (self.domain, self.doc_type, migration_id)
     self.itr = ResumableFunctionIterator(resume_key, None, None, None)