def test_iterate_doc_ids_in_domain_by_type(self): id1 = uuid.uuid4().hex id2 = uuid.uuid4().hex id3 = uuid.uuid4().hex doc1 = { '_id': id1, 'domain': 'match-domain', 'doc_type': 'match-type', } doc2 = { '_id': id2, 'domain': 'match-domain', 'doc_type': 'match-type', } doc3 = { '_id': id3, 'domain': 'match-domain', 'doc_type': 'nomatch-type', } self.db.save_doc(doc1) self.db.save_doc(doc2) self.db.save_doc(doc3) self.addCleanup(self.db.delete_doc, doc1) self.addCleanup(self.db.delete_doc, doc2) self.addCleanup(self.db.delete_doc, doc3) ids = list(iterate_doc_ids_in_domain_by_type( 'match-domain', 'match-type', database=self.db, chunk_size=1)) self.assertEqual(sorted(ids), sorted([id1, id2]))
def _iter_couch_docs_for_domains(self, domains): for domain in domains: print(f"Processing data for domain: {domain}") doc_id_iter = iterate_doc_ids_in_domain_by_type( domain, self.couch_doc_type(), database=self.couch_db()) for doc in iter_docs(self.couch_db(), doc_id_iter): yield doc
def rebuild_indicators(indicator_config_id): config = _get_config_by_id(indicator_config_id) adapter = IndicatorSqlAdapter(config) couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) if not is_static(indicator_config_id): # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.meta.build.finished = False config.save() redis_key = _get_redis_key_for_config(config) adapter.rebuild_table() relevant_ids_chunk = [] for relevant_id in iterate_doc_ids_in_domain_by_type( config.domain, config.referenced_doc_type, chunk_size=CHUNK_SIZE, database=couchdb): relevant_ids_chunk.append(relevant_id) if len(relevant_ids_chunk) >= CHUNK_SIZE: redis_client.sadd(redis_key, *relevant_ids_chunk) _build_indicators(indicator_config_id, relevant_ids_chunk) relevant_ids_chunk = [] if relevant_ids_chunk: redis_client.sadd(redis_key, *relevant_ids_chunk) _build_indicators(indicator_config_id, relevant_ids_chunk)
def iter_document_ids(self): from corehq.apps.domain.dbaccessors import iterate_doc_ids_in_domain_by_type if not (self.domain and self.doc_type): raise ValueError( 'This function requires a domain and doc_type set!') return iterate_doc_ids_in_domain_by_type( self.domain, self.doc_type, chunk_size=ID_CHUNK_SIZE, database=self._couch_db, )
def iter_document_ids(self, last_id=None): from corehq.apps.domain.dbaccessors import iterate_doc_ids_in_domain_by_type if not (self.domain and self.doc_type): raise ValueError( 'This function requires a domain and doc_type set!') start_key = None if last_id: last_doc = self.get_document(last_id) start_key = [self.domain, self.doc_type] if self.doc_type in _DATE_MAP.keys(): start_key.append(last_doc[_DATE_MAP[self.doc_type]]) return iterate_doc_ids_in_domain_by_type(self.domain, self.doc_type, chunk_size=ID_CHUNK_SIZE, database=self._couch_db, startkey=start_key, startkey_docid=last_id)
def _iteratively_build_table(config, last_id=None): couchdb = _get_db(config.referenced_doc_type) redis_client = get_redis_client().client.get_client() redis_key = _get_redis_key_for_config(config) indicator_config_id = config._id start_key = None if last_id: last_doc = _DOC_TYPE_MAPPING[config.referenced_doc_type].get(last_id) start_key = [config.domain, config.referenced_doc_type] if config.referenced_doc_type in _DATE_MAP.keys(): date = json_format_datetime(last_doc[_DATE_MAP[config.referenced_doc_type]]) start_key.append(date) relevant_ids = [] for relevant_id in iterate_doc_ids_in_domain_by_type( config.domain, config.referenced_doc_type, chunk_size=CHUNK_SIZE, database=couchdb, startkey=start_key, startkey_docid=last_id): relevant_ids.append(relevant_id) if len(relevant_ids) >= CHUNK_SIZE: redis_client.rpush(redis_key, *relevant_ids) _build_indicators(indicator_config_id, relevant_ids) relevant_ids = [] if relevant_ids: redis_client.rpush(redis_key, *relevant_ids) _build_indicators(indicator_config_id, relevant_ids) if not is_static(indicator_config_id): redis_client.delete(redis_key) config.meta.build.finished = True try: config.save() except ResourceConflict: current_config = DataSourceConfiguration.get(config._id) # check that a new build has not yet started if config.meta.build.initiated == current_config.meta.build.initiated: current_config.meta.build.finished = True current_config.save()
def iter_document_ids(self, last_id=None): from corehq.apps.domain.dbaccessors import iterate_doc_ids_in_domain_by_type if not (self.domain and self.doc_type): raise ValueError('This function requires a domain and doc_type set!') start_key = None if last_id: last_doc = self.get_document(last_id) start_key = [self.domain, self.doc_type] if self.doc_type in list(_DATE_MAP): start_key.append(last_doc[_DATE_MAP[self.doc_type]]) return iterate_doc_ids_in_domain_by_type( self.domain, self.doc_type, chunk_size=ID_CHUNK_SIZE, database=self._couch_db, startkey=start_key, startkey_docid=last_id )