def _get_couch_form_chunks(run_config): db = XFormInstance.get_db() view_name = 'by_domain_doc_type_date/view' keys = [{ 'startkey': [ couch_domain, doc_type, json_format_datetime(run_config.start_date) ], 'endkey': [ couch_domain, doc_type, json_format_datetime(run_config.end_date) ], } for couch_domain in _get_matching_couch_domains(run_config) for doc_type in ['XFormArchived', 'XFormInstance']] if not keys: return def _get_length(): length = 0 for key in keys: result = db.view(view_name, reduce=True, **key).one() if result: length += result['value'] return length iteration_key = f'couch_forms-{run_config.iteration_key}' iterable = resumable_view_iterator(XFormInstance.get_db(), iteration_key, view_name, keys, chunk_size=CHUNK_SIZE, full_row=True) progress = ProgressManager(iterable, total=_get_length(), reset=False, chunk_size=CHUNK_SIZE, logger=ProcessorProgressLogger( '[Couch Forms] ', sys.stderr)) with progress: for chunk in chunked(iterable, CHUNK_SIZE): records = [] for row in chunk: form_id = row['id'] domain, doc_type, received_on = row['key'] received_on = iso_string_to_datetime(received_on) assert run_config.domain in (domain, ALL_DOMAINS) records.append( (form_id, doc_type, 'COUCH_XMLNS_NOT_SUPPORTED', received_on, domain)) yield records progress.add(len(chunk))
def _get_resumable_chunked_iterator(dbaccessor, iteration_key, log_prefix): total_docs = 0 for db in dbaccessor.sql_db_aliases: total_docs += dbaccessor.get_approximate_doc_count(db) iterable = resumable_sql_model_iterator(iteration_key, dbaccessor, chunk_size=CHUNK_SIZE, transform=lambda x: x) progress = ProgressManager(iterable, total=total_docs, reset=False, chunk_size=CHUNK_SIZE, logger=ProcessorProgressLogger( log_prefix, sys.stderr)) with progress: for chunk in chunked(iterable, CHUNK_SIZE): yield chunk progress.add(len(chunk))