Ejemplo n.º 1
0
    def test_iterate_doc_ids_in_domain_by_type(self):
        id1 = uuid.uuid4().hex
        id2 = uuid.uuid4().hex
        id3 = uuid.uuid4().hex
        doc1 = {
            '_id': id1,
            'domain': 'match-domain',
            'doc_type': 'match-type',
        }
        doc2 = {
            '_id': id2,
            'domain': 'match-domain',
            'doc_type': 'match-type',
        }
        doc3 = {
            '_id': id3,
            'domain': 'match-domain',
            'doc_type': 'nomatch-type',
        }
        self.db.save_doc(doc1)
        self.db.save_doc(doc2)
        self.db.save_doc(doc3)

        self.addCleanup(self.db.delete_doc, doc1)
        self.addCleanup(self.db.delete_doc, doc2)
        self.addCleanup(self.db.delete_doc, doc3)

        ids = list(iterate_doc_ids_in_domain_by_type(
            'match-domain',
            'match-type',
            database=self.db,
            chunk_size=1))
        self.assertEqual(sorted(ids), sorted([id1, id2]))
 def _iter_couch_docs_for_domains(self, domains):
     for domain in domains:
         print(f"Processing data for domain: {domain}")
         doc_id_iter = iterate_doc_ids_in_domain_by_type(
             domain, self.couch_doc_type(), database=self.couch_db())
         for doc in iter_docs(self.couch_db(), doc_id_iter):
             yield doc
Ejemplo n.º 3
0
def rebuild_indicators(indicator_config_id):
    config = _get_config_by_id(indicator_config_id)
    adapter = IndicatorSqlAdapter(config)
    couchdb = _get_db(config.referenced_doc_type)
    redis_client = get_redis_client().client.get_client()
    redis_key = _get_redis_key_for_config(config)

    if not is_static(indicator_config_id):
        # Save the start time now in case anything goes wrong. This way we'll be
        # able to see if the rebuild started a long time ago without finishing.
        config.meta.build.initiated = datetime.datetime.utcnow()
        config.meta.build.finished = False
        config.save()
        redis_key = _get_redis_key_for_config(config)

    adapter.rebuild_table()
    relevant_ids_chunk = []
    for relevant_id in iterate_doc_ids_in_domain_by_type(
            config.domain,
            config.referenced_doc_type,
            chunk_size=CHUNK_SIZE,
            database=couchdb):
        relevant_ids_chunk.append(relevant_id)
        if len(relevant_ids_chunk) >= CHUNK_SIZE:
            redis_client.sadd(redis_key, *relevant_ids_chunk)
            _build_indicators(indicator_config_id, relevant_ids_chunk)
            relevant_ids_chunk = []

    if relevant_ids_chunk:
        redis_client.sadd(redis_key, *relevant_ids_chunk)
        _build_indicators(indicator_config_id, relevant_ids_chunk)
Ejemplo n.º 4
0
    def iter_document_ids(self):
        from corehq.apps.domain.dbaccessors import iterate_doc_ids_in_domain_by_type

        if not (self.domain and self.doc_type):
            raise ValueError(
                'This function requires a domain and doc_type set!')

        return iterate_doc_ids_in_domain_by_type(
            self.domain,
            self.doc_type,
            chunk_size=ID_CHUNK_SIZE,
            database=self._couch_db,
        )
Ejemplo n.º 5
0
    def iter_document_ids(self, last_id=None):
        from corehq.apps.domain.dbaccessors import iterate_doc_ids_in_domain_by_type

        if not (self.domain and self.doc_type):
            raise ValueError(
                'This function requires a domain and doc_type set!')
        start_key = None
        if last_id:
            last_doc = self.get_document(last_id)
            start_key = [self.domain, self.doc_type]
            if self.doc_type in _DATE_MAP.keys():
                start_key.append(last_doc[_DATE_MAP[self.doc_type]])

        return iterate_doc_ids_in_domain_by_type(self.domain,
                                                 self.doc_type,
                                                 chunk_size=ID_CHUNK_SIZE,
                                                 database=self._couch_db,
                                                 startkey=start_key,
                                                 startkey_docid=last_id)
Ejemplo n.º 6
0
def _iteratively_build_table(config, last_id=None):
    couchdb = _get_db(config.referenced_doc_type)
    redis_client = get_redis_client().client.get_client()
    redis_key = _get_redis_key_for_config(config)
    indicator_config_id = config._id

    start_key = None
    if last_id:
        last_doc = _DOC_TYPE_MAPPING[config.referenced_doc_type].get(last_id)
        start_key = [config.domain, config.referenced_doc_type]
        if config.referenced_doc_type in _DATE_MAP.keys():
            date = json_format_datetime(last_doc[_DATE_MAP[config.referenced_doc_type]])
            start_key.append(date)

    relevant_ids = []
    for relevant_id in iterate_doc_ids_in_domain_by_type(
            config.domain,
            config.referenced_doc_type,
            chunk_size=CHUNK_SIZE,
            database=couchdb,
            startkey=start_key,
            startkey_docid=last_id):
        relevant_ids.append(relevant_id)
        if len(relevant_ids) >= CHUNK_SIZE:
            redis_client.rpush(redis_key, *relevant_ids)
            _build_indicators(indicator_config_id, relevant_ids)
            relevant_ids = []

    if relevant_ids:
        redis_client.rpush(redis_key, *relevant_ids)
        _build_indicators(indicator_config_id, relevant_ids)

    if not is_static(indicator_config_id):
        redis_client.delete(redis_key)
        config.meta.build.finished = True
        try:
            config.save()
        except ResourceConflict:
            current_config = DataSourceConfiguration.get(config._id)
            # check that a new build has not yet started
            if config.meta.build.initiated == current_config.meta.build.initiated:
                current_config.meta.build.finished = True
                current_config.save()
Ejemplo n.º 7
0
    def iter_document_ids(self, last_id=None):
        from corehq.apps.domain.dbaccessors import iterate_doc_ids_in_domain_by_type

        if not (self.domain and self.doc_type):
            raise ValueError('This function requires a domain and doc_type set!')
        start_key = None
        if last_id:
            last_doc = self.get_document(last_id)
            start_key = [self.domain, self.doc_type]
            if self.doc_type in list(_DATE_MAP):
                start_key.append(last_doc[_DATE_MAP[self.doc_type]])

        return iterate_doc_ids_in_domain_by_type(
            self.domain,
            self.doc_type,
            chunk_size=ID_CHUNK_SIZE,
            database=self._couch_db,
            startkey=start_key,
            startkey_docid=last_id
        )