def iter_id_chunks(domain, doc_type, migration_id, couch_db): def data_function(**view_kwargs): return couch_db.view('by_domain_doc_type_date/view', **view_kwargs) endkey, docid = get_endkey_docid(domain, doc_type, migration_id) args_provider = NoSkipArgsProvider({ 'startkey': [domain, doc_type], 'endkey': endkey, 'endkey_docid': docid, 'inclusive_end': False, 'limit': iter_id_chunks.chunk_size, 'include_docs': False, 'reduce': False, }) args, kwargs = args_provider.get_initial_args() while True: results = list(data_function(*args, **kwargs)) results = args_provider.adjust_results(results, args, kwargs) if not results: break yield [r["id"] for r in results] try: args, kwargs = args_provider.get_next_args(results[-1], *args, **kwargs) except StopIteration: break
def __iter__(self): def data_function(**view_kwargs): return couch_db.view('by_domain_doc_type_date/view', **view_kwargs) log.info("preparing to rewind: %s", self.move_to) state_json = self.itr.state.to_json() self._save_resume_state(state_json) couch_db = XFormInstance.get_db() args_provider = NoSkipArgsProvider({ 'startkey': state_json["kwargs"]["startkey"], 'startkey_docid': state_json["kwargs"]["startkey_docid"], 'endkey': [self.domain, self.doc_type], 'descending': True, 'limit': 1000, 'include_docs': True, 'reduce': False, }) args, kwargs = args_provider.get_initial_args() while True: results = list(data_function(*args, **kwargs)) results = args_provider.adjust_results(results, args, kwargs) if not results: break for result in results: yield get_received_on(result["doc"], self.stats) try: args, kwargs = args_provider.get_next_args(results[-1], *args, **kwargs) except StopIteration: break
def _iter_missing_ids(db, min_tries, resume_key, view_name, view_params, repair): def data_function(**view_kwargs): @retry_on_couch_error def get_doc_ids(): results = list(db.view(view_name, **view_kwargs)) if "limit" in view_kwargs and results: nonlocal last_result last_result = results[-1] replace_limit_with_endkey(view_kwargs, last_result) return {r["id"] for r in results} def replace_limit_with_endkey(view_kwargs, last_result): assert "endkey_docid" not in view_kwargs, view_kwargs view_kwargs.pop("limit") view_kwargs["endkey"] = last_result["key"] view_kwargs["endkey_docid"] = last_result["id"] last_result = None missing, tries = find_missing_ids(get_doc_ids, min_tries=min_tries) if last_result is None: log.debug("no results %s - %s", view_kwargs['startkey'], view_kwargs['endkey']) assert not missing return [] if missing and repair: missing, tries2, repaired = repair_couch_docs(db, missing, get_doc_ids, min_tries) tries += tries2 else: repaired = 0 log.debug(f"{len(missing)}/{tries} start={view_kwargs['startkey']} {missing or ''}") last_result["missing_info"] = missing, tries, repaired return [last_result] args_provider = NoSkipArgsProvider(view_params) return ResumableFunctionIterator(resume_key, data_function, args_provider)
def _iter_docs(domain, doc_type, resume_key, stopper): def data_function(**view_kwargs): return couch_db.view('by_domain_doc_type_date/view', **view_kwargs) if "." in doc_type: doc_type, row_key = doc_type.split(".") else: row_key = "doc" if stopper.clean_break: return [] couch_db = XFormInstance.get_db() args_provider = NoSkipArgsProvider({ 'startkey': [domain, doc_type], 'endkey': [domain, doc_type, {}], 'limit': _iter_docs.chunk_size, 'include_docs': row_key == "doc", 'reduce': False, }) rows = ResumableFunctionIterator( resume_key, data_function, args_provider, item_getter=None, event_handler=MigrationPaginationEventHandler(domain, stopper)) return (row[row_key] for row in rows)
def _iter_docs(domain, doc_type, resume_key, stopper): @retry_on_couch_error def data_function(**view_kwargs): view_name = 'by_domain_doc_type_date/view' results = list(couch_db.view(view_name, **view_kwargs)) assert all(r['key'][0] == domain for r in results), \ _repr_bad_results(view_name, view_kwargs, results, domain) return results if "." in doc_type: doc_type, row_key = doc_type.split(".") else: row_key = "doc" if stopper.clean_break: return [] couch_db = XFormInstance.get_db() args_provider = NoSkipArgsProvider({ 'startkey': [domain, doc_type], 'endkey': [domain, doc_type, {}], 'limit': _iter_docs.chunk_size, 'include_docs': row_key == "doc", 'reduce': False, }) rows = ResumableFunctionIterator( resume_key, data_function, args_provider, item_getter=None, event_handler=MigrationPaginationEventHandler(domain, stopper)) if rows.state.is_resume(): log.info("iteration state: %r", rows.state.to_json()) row = None try: for row in rows: yield row[row_key] finally: if row is not None: row_copy = dict(row) row_copy.pop("doc", None) log.info("last item: %r", row_copy) log.info("final iteration state: %r", rows.state.to_json())