コード例 #1
0
    def assert_valid_state_migrations(self,
                                      content_store: ContentStore,
                                      migration_batch: int = 50):
        if self._if_all_items_have_state(content_store):
            print(f"Pipeline {self.name} has migrated state for all items")
            return

        # quickly check count first
        # if sum of all state docs < total count then there are docs that are unmigrated and it has to be invalid
        sum_migrated_count = 0
        for state in self.states:
            sum_migrated_count += content_store.count(state.migrate_q)
        total_count = content_store.count({})
        if sum_migrated_count < total_count:
            print(
                f"Sampling 100 object IDs that will miss migration {self._object_ids_missing_migration(content_store)[: 100]}"
            )
            raise RuntimeError(
                f"Sum of all state docs does not match total count. sum={sum_migrated_count} < actual={total_count}"
            )

        # check if there is overlap in state docs
        # combined with the check that count matches, it means
        # every item in content store belongs to one and only one state
        # and all items in content store are accounted for
        doc_id_to_state = {}  # type: Dict[str, PipelineState]
        for state in self.states:
            state_name = state.name
            migrate_q = state.migrate_q
            migrated_count = content_store.count(state.migrate_q)
            migration_batches = migrated_count // migration_batch

            for i in range(migration_batches):
                print(
                    f"Checking pipeline {self.name} state {state_name} migration batch {i + 1}/{migration_batches}"
                )
                for doc in content_store.query(migrate_q,
                                               skip=i * migration_batch,
                                               limit=migration_batch):
                    doc_id = doc["_id"]
                    if doc_id in doc_id_to_state:
                        raise RuntimeError(
                            f"Document with id {doc_id} is going to have "
                            f"both {doc_id_to_state[doc_id].name} and {state.name} state"
                        )
                    doc_id_to_state[doc_id] = state

            print(
                f"Checking pipeline {self.name} state {state_name} last migration batch"
            )
            for doc in content_store.query(migrate_q,
                                           skip=migration_batches *
                                           migration_batch):
                doc_id = doc["_id"]
                if doc_id in doc_id_to_state:
                    raise RuntimeError(
                        f"Document with id {doc_id} is going to have "
                        f"both {doc_id_to_state[doc_id].name} and {state.name} state"
                    )
                doc_id_to_state[doc_id] = state
コード例 #2
0
ファイル: timeline.py プロジェクト: k-t-corp/broccoli-server
def query(
    content_store: ContentStore,
    query_params: Dict,
    projection: List[str],
    limit: int,
    additional_q: Optional[Dict] = None,
) -> Dict:
    if additional_q is None:
        additional_q = {}

    # get results
    if "from" in query_params:
        from_id = bson.ObjectId(query_params["from"])
        q = additional_q.copy()
        q["_id"] = {"$lt": from_id}

        results = content_store.query(
            q=q,
            projection=projection,
            limit=limit,
            sort={"_id": -1},
        )
    elif "to" in query_params:
        to_id = bson.ObjectId(query_params["to"])
        q = additional_q.copy()
        q["_id"] = {"$gt": to_id}

        results = content_store.query(
            q=q,
            projection=projection,
            limit=limit,
            sort={"_id": 1},
        )
        results = list(reversed(results))
    else:
        results = content_store.query(
            q=additional_q, projection=projection, limit=limit, sort={"_id": -1}
        )

    if not results:
        return {
            "has_prev": False,
            "has_next": False,
            "results": list(map(add_created_at, results)),
        }

    # get next
    next_from_id = bson.ObjectId(results[-1]["_id"])
    q = additional_q.copy()
    q["_id"] = {"$lt": next_from_id}
    has_next = content_store.count(q) != 0

    # get prev
    prev_to_id = bson.ObjectId(results[0]["_id"])
    q = additional_q.copy()
    q["_id"] = {"$gt": prev_to_id}
    has_prev = content_store.count(q) != 0

    return {
        "has_prev": has_prev,
        "prev_to": str(prev_to_id),
        "has_next": has_next,
        "next_from": str(next_from_id),
        "results": list(map(add_created_at, results)),
    }