def test_new_doc_and_save(self): assert store.get_collection().count_documents({}) == 0 genotype = store.Genotype() doc = store.Doc(genotype=genotype) assert store.get_collection().count_documents({}) == 0 assert len(doc._update) == 2 assert isinstance(doc.id, bson.ObjectId) assert doc.genotype is genotype assert doc.wins == 0 assert doc.model is None with pytest.raises(AttributeError) as error: isinstance(doc.llh, bson.ObjectId) assert "object has no attribute 'llh'" in str(error.value) assert doc.date is None assert doc.timer == 0 assert doc.tickers is None doc.save() assert store.get_collection().count_documents({}) == 1 assert len(doc._update) == 0
def test_delete(self): assert store.get_collection().count_documents({}) == 1 db_doc = store.get_collection().find_one() doc = store.Doc(id_=db_doc[store.ID]) doc.delete() assert store.get_collection().count_documents({}) == 0
def _aggregate_oldest(limit: int, first_step: Optional[dict] = None): """Берет первые документы по возрастанию id. При наличии добавляет первый шаг агрегации. """ pipeline = [ { "$project": { "ir": True, "llh": True, "date": True, "timer": True } }, { "$sort": { "_id": pymongo.ASCENDING } }, { "$limit": limit }, ] if first_step: pipeline = [first_step] + pipeline return store.get_collection().aggregate(pipeline)
def find_weaker(self) -> "Organism": """Находит организм с наименьшим llh. В оборе участвуют только организмы с таким же набором тикеров и датой обновления. Может найти самого себя. """ data = self._data collection = store.get_collection() filter_ = dict(timer={"$gte": data.timer}, tickers=data.tickers, date=data.date) id_dict = collection.find_one( filter=filter_, projection=["_id"], sort=[("llh", pymongo.ASCENDING)], ) org = Organism(**id_dict) if self.id != org.id: return org filter_ = dict(tickers=data.tickers, date=data.date) id_dict = collection.find_one( filter=filter_, projection=["_id"], sort=[("llh", pymongo.ASCENDING)], ) return Organism(**id_dict)
def _print_key_stats(key: str, view: str = None) -> None: """Статистика по минимуму, медиане и максимуму llh.""" collection = store.get_collection() db_find = collection.find cursor = db_find(filter={key: {"$exists": True}}, projection=[key]) keys = map(lambda doc: doc[key], cursor) keys = map( lambda amount: amount if isinstance(amount, float) else np.median(np.array(amount)), keys, ) keys = filter( lambda amount: not np.isnan(amount), keys, ) keys = tuple(keys) if keys: quantiles = np.quantile(keys, [0, 0.5, 1.0]) quantiles = map(lambda quantile: f"{quantile:.4f}", quantiles) quantiles = tuple(quantiles) else: quantiles = ["-" for _ in range(3)] quantiles = ", ".join(tuple(quantiles)) view = view or key.upper() LOGGER.info(f"{view} - ({quantiles})") # noqa: WPS421
def test_load_doc_update_and_save(self): db_doc = store.get_collection().find_one() doc = store.Doc(id_=db_doc[store.ID]) assert len(doc._update) == 0 doc.wins = 42 doc.llh = 2.2 doc.timer = 111 assert len(doc._update) == 3 doc.save() assert len(doc._update) == 0 doc_loaded = store.Doc(id_=db_doc[store.ID]) assert len(doc_loaded._update) == 0 assert doc_loaded.id == db_doc[store.ID] assert doc_loaded.genotype == db_doc["genotype"] assert doc_loaded.wins == 42 assert doc_loaded.model is None assert doc_loaded.llh == 2.2 assert doc_loaded.date is None assert doc_loaded.timer == 111 assert doc_loaded.tickers is None
def _get_parents() -> tuple[Organism, Organism]: """Получить родителей. Если популяция меньше 2 организмов, то используются два организма с базовыми случайными генотипами. """ collection = store.get_collection() pipeline = [ { "$project": { "_id": True } }, { "$sample": { "size": 2 } }, ] parents = tuple(Organism(**doc) for doc in collection.aggregate(pipeline)) if len(parents) == 2: return parents[0], parents[1] return Organism(), Organism()
def _sample_organism(num: int) -> Iterable[Organism]: """Выбирает несколько случайных организмов. Необходимо для реализации размножения и отбора. """ collection = store.get_collection() pipeline = [{"$sample": {"size": num}}, {"$project": {"_id": True}}] organisms = collection.aggregate(pipeline) yield from (Organism(**organism) for organism in organisms)
def get_all_organisms() -> Iterable[Organism]: """Получить все имеющиеся организмы.""" collection = store.get_collection() id_dicts = collection.find( filter={}, projection=["_id"], sort=[("date", pymongo.ASCENDING), ("llh", pymongo.ASCENDING)] ) for id_dict in id_dicts: try: yield Organism(**id_dict) except store.IdError: pass
def get_parent() -> Organism: """Получить лучший из популяции.""" collection = store.get_collection() organism = collection.find_one( filter={}, projection=["_id"], sort=[ ("date", pymongo.ASCENDING), ("llh", pymongo.DESCENDING), ], ) return Organism(**organism)
def find_weaker(self) -> "Organism": """Находит организм с llh меньше или равное своему и максимальным временем обучения. Может найти самого себя. """ data = self._data collection = store.get_collection() filter_ = dict(llh={"$lte": data.llh}, tickers=data.tickers) id_dict = collection.find_one( filter=filter_, projection=["_id"], sort=[("timer", pymongo.DESCENDING)] ) return Organism(**id_dict)
def _print_llh_stats() -> NoReturn: """Статистика по минимуму, медиане и максимуму llh.""" collection = store.get_collection() db_find = collection.find cursor = db_find(filter=dict(llh={"$exists": True}), projection=["llh"]) llhs = map(lambda x: x["llh"], cursor) llhs = tuple(llhs) if llhs: quantiles = np.quantile(tuple(llhs), [0.0, 0.5, 1.0]) quantiles = map(lambda x: f"{x:.4f}", quantiles) quantiles = tuple(quantiles) else: quantiles = ["-"] * 3 print(f"LLH - ({', '.join(tuple(quantiles))})")
def _print_llh_stats() -> None: """Статистика по минимуму, медиане и максимуму llh.""" collection = store.get_collection() db_find = collection.find cursor = db_find(filter={"llh": {"$exists": True}}, projection=["llh"]) llhs = map(lambda doc: doc["llh"], cursor) llhs = tuple(llhs) if llhs: quantiles = np.quantile(tuple(llhs), [0, 0.5, 1.0]) quantiles = map(lambda quantile: f"{quantile:.4f}", quantiles) quantiles = tuple(quantiles) else: quantiles = ["-" for _ in range(3)] print(f"LLH - ({', '.join(tuple(quantiles))})")
def _print_wins_stats() -> NoReturn: """Статистика по максимуму побед.""" collection = store.get_collection() db_find = collection.find params = { "filter": dict(wins={"$exists": True}), "projection": ["wins"], "sort": [("wins", pymongo.DESCENDING)], "limit": 1, } wins = list(db_find(**params)) max_wins = None if wins: max_wins, *_ = wins max_wins = max_wins["wins"] print(f"Максимум побед - {max_wins}")
def test_load_doc(self): db_doc = store.get_collection().find_one() doc = store.Doc(id_=db_doc[store.ID]) assert len(doc._update) == 0 assert doc.id == db_doc[store.ID] assert doc.genotype == db_doc["genotype"] assert doc.wins == 0 assert doc.model is None with pytest.raises(AttributeError) as error: isinstance(doc.llh, bson.ObjectId) assert "object has no attribute 'llh'" in str(error.value) assert doc.date is None assert doc.timer == 0 assert doc.tickers is None
def _print_wins_stats() -> None: """Статистика по максимуму побед.""" collection = store.get_collection() db_find = collection.find request = { "filter": { "wins": { "$exists": True } }, "projection": ["wins"], "sort": [("wins", pymongo.DESCENDING)], "limit": 1, } wins = list(db_find(**request)) max_wins = None if wins: max_wins = wins[0] max_wins = max_wins["wins"] LOGGER.info(f"Организмов - {count()} / Максимум оценок - {max_wins}")
def min_max_date() -> tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]: """Минимальная и максимальная дата в популяции.""" collection = store.get_collection() pipeline = [ { "$group": { "_id": {}, "min": { "$min": "$date" }, "max": { "$max": "$date" }, }, }, ] doc = next(collection.aggregate(pipeline), {}) if doc.get("max") is None: return None, None return pd.Timestamp(doc["min"]), pd.Timestamp(doc["max"])
def count() -> int: """Количество организмов в популяции.""" collection = store.get_collection() return collection.count_documents({})
def test_get_collection(): collection = store.get_collection() assert isinstance(collection, pymongo.collection.Collection) assert collection.name == "test"