def test_insert_retrieve_multiple_discussions(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion1 = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") discussion2 = Discussion(2, beatmapset=beatmapset, user=user, content="real testing", tab="tab", difficulty="diff") test_database.insert_discussion(discussion1) test_database.insert_discussion(discussion2) retrieved_discussions = test_database.retrieve_discussions( where="beatmapset_id=%s", where_values=(beatmapset.id, )) assert next(retrieved_discussions, None) == discussion1 assert next(retrieved_discussions, None) == discussion2
async def test_insert_retrieve_event_digit_properties(test_database): user = User(1, "497") beatmapset = Beatmapset(3, artist="5", title="2", creator=user, allow_api=False) discussion = Discussion(2, beatmapset, user, content="8", tab="tab", difficulty="diff") event = Event(_type="test", time=datetime.utcnow(), user=user, beatmapset=beatmapset, discussion=discussion, content="4") test_database.insert_event(event) retrieved_event = await test_database.retrieve_event( where="type=%s", where_values=("test", )) # Ensures the database field retrieval retains the `str` type, rather than reinterpreting as `int`. assert retrieved_event.content == "4" assert retrieved_event.user.name == "497" assert retrieved_event.beatmapset.artist == "5" assert retrieved_event.beatmapset.title == "2" assert retrieved_event.discussion.content == "8"
async def test_insert_retrieve_multiple_events(test_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event1 = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) event2 = Event(_type="123", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event1) test_database.insert_event(event2) retrieved_events = test_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) assert await anext(retrieved_events, None) == event1 assert await anext(retrieved_events, None) == event2
async def test_insert_retrieve_event(test_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event("type=%s", ("test", )) assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.beatmapset == event.beatmapset assert retrieved_event.discussion == event.discussion assert retrieved_event.user == event.user assert retrieved_event.content == event.content assert retrieved_event == event
async def test_insert_retrieve_multiple_events(test_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event1 = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) event2 = Event(_type="123", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event1) test_database.insert_event(event2) retrieved_events = test_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) assert await anext(retrieved_events, None) == event1 assert await anext(retrieved_events, None) == event2
async def test_insert_retrieve_event(test_database): time = datetime.utcnow() user = User(1, allow_api=False) beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event("type=%s", ("test", )) assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.beatmapset == event.beatmapset assert retrieved_event.discussion == event.discussion assert retrieved_event.user == event.user assert retrieved_event.content == event.content assert retrieved_event == event
def test_old_discussion(): beatmapset = Beatmapset(41823, beatmapset_json=mock_old_beatmap.JSON) discussion = Discussion(1234956, beatmapset) # No such discussion exists, but this should still work. assert discussion.id == 1234956 assert discussion.beatmapset == beatmapset
def test_insert_retrieve_discussion(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") test_database.insert_discussion(discussion) retrieved_discussion = test_database.retrieve_discussion( where="id=%s", where_values=(1, )) assert retrieved_discussion.id == discussion.id assert retrieved_discussion.beatmapset == discussion.beatmapset assert retrieved_discussion.user == discussion.user assert retrieved_discussion.content == discussion.content assert retrieved_discussion.tab == discussion.tab assert retrieved_discussion.difficulty == discussion.difficulty assert retrieved_discussion == discussion
def retrieve_discussions( self, where: str, where_values: tuple = None, group_by: str = None, order_by: str = None, limit: int = None, beatmapset: Beatmapset = None ) -> Generator[Discussion, None, None]: """Returns a generator of all discussions from the database matching the given WHERE clause. Also retrieves the associated beatmapset from the database if not supplied.""" fetched_rows = self.retrieve_table_data( table="discussions", where=where, where_values=where_values, selection="id, beatmapset_id, user_id, content, tab, difficulty", group_by=group_by, order_by=order_by, limit=limit) for row in (fetched_rows or []): _id = row[0] if not beatmapset: beatmapset = self.retrieve_beatmapset("id=%s", (row[1], )) user = self.retrieve_user("id=%s", (row[2], )) content = row[3] tab = row[4] difficulty = row[5] yield Discussion(_id, beatmapset, user, content, tab, difficulty)
def test_discussion_int_content(): beatmapset = Beatmapset(1001546, beatmapset_json=mock_beatmap.JSON) discussion = Discussion(1234956, beatmapset, content=4) assert discussion.id == 1234956 assert discussion.content == "4" assert discussion.beatmapset == beatmapset
def test_insert_incomplete_discussion(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset) with pytest.raises(ValueError) as error: test_database.insert_discussion(discussion) assert "missing from discussion" in str(error.value)
def parse_event_json(self, event_json: object, user_jsons: object = None) -> Event: """Returns a BeatmapsetEvent reflecting the given event json object. Ignores any event with an incomplete context (e.g. deleted beatmaps). Requests user names from the api unless supplied with the corresponding user json from the discussion page.""" if not event_json: # Seems to occur when the respective beatmapset has been deleted. However, it's there when # viewing the page source manually for some reason, regardless of login status. log_err( "WARNING | An event is missing; the beatmapset was probably deleted." ) return None try: # Scrape object data _type = event_json["message_type"] time = timestamp.from_string(event_json["created_at"]) beatmapset_id = event_json["beatmapset_id"] discussion_id = event_json["starting_post"][ "beatmap_discussion_id"] user_id = event_json["user_id"] # The user name is either provided by a user json from the discussion page, or queried through the api. user_json = self.__lookup_user_json(user_id, user_jsons) user_name = user_json["username"] if user_json else None content = event_json["starting_post"]["message"] difficulty = event_json["beatmap"][ "version"] if "beatmap" in event_json and "version" in event_json[ "beatmap"] else None tab = None if event_json["timestamp"] is not None: tab = "timeline" elif difficulty: tab = "general" else: tab = "generalAll" # Reconstruct objects beatmapset = Beatmapset(beatmapset_id) user = User(user_id, user_name) if user_id is not None else None # TODO: This portion is missing handling for replies, see the other method. # Still unclear which message_type replies use; will need to find out if/when replies get json formats. discussion = Discussion( discussion_id, beatmapset, user, content, tab, difficulty) if discussion_id is not None else None except DeletedContextError as err: log_err(err) else: return Event(_type=_type, time=time, beatmapset=beatmapset, discussion=discussion, user=user, content=content) return None
def __complete_discussion_context(discussion: Discussion, db_name: str = SCRAPER_DB_NAME) -> bool: """Completes the context of the discussion from prior database entries, if present. Returns true if succeeded.""" cached_discussion = Database(db_name).retrieve_discussion( "id=%s", (discussion.id, )) if not cached_discussion: return False complete = (cached_discussion.user and cached_discussion.content and cached_discussion.tab and cached_discussion.difficulty) if not complete: return False discussion.user = cached_discussion.user discussion.content = cached_discussion.content discussion.tab = cached_discussion.tab discussion.difficulty = cached_discussion.difficulty return True
def parse_event(self, event: Tag) -> Event: """Returns a BeatmapsetEvent reflecting the given event html Tag object. Ignores any event with an incomplete context (e.g. deleted beatmaps).""" try: # Scrape object data _type = self.parse_event_type(event) time = self.parse_event_time(event) link = self.parse_event_link(event) beatmapset_id = self.parse_id_from_beatmapset_link(link) discussion_id = self.parse_id_from_discussion_link(link) user_id = self.parse_event_author_id(event) user_name = self.parse_event_author_name(event) content = self.parse_discussion_message(event) # Reconstruct objects beatmapset = Beatmapset(beatmapset_id) user = User(user_id, user_name) if user_id is not None else None if _type == "reply": # Replies should look up the discussion they are posted on. discussion = Discussion( discussion_id, beatmapset) if discussion_id is not None else None else: tab = self.parse_discussion_tab(event) difficulty = self.parse_discussion_diff(event) discussion = Discussion( discussion_id, beatmapset, user, content, tab, difficulty) if discussion_id is not None else None except DeletedContextError as err: log_err(err) else: return Event(_type=_type, time=time, beatmapset=beatmapset, discussion=discussion, user=user, content=content) return None
def test_insert_retrieve_discussion_and_replies(test_database): time = datetime.utcnow() author = User(1, name="one") replier = User(2, name="two") beatmapset = Beatmapset(1, artist="123", title="456", creator=replier, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=author, content="ping", tab="tab", difficulty="diff") problem = Event(_type="problem", time=time, beatmapset=beatmapset, discussion=discussion, user=author, content="ping") reply1 = Event(_type="reply", time=time, beatmapset=beatmapset, discussion=discussion, user=replier, content="pong") reply2 = Event(_type="reply", time=time, beatmapset=beatmapset, discussion=discussion, user=author, content="miss") test_database.insert_event(problem) test_database.insert_event(reply1) test_database.insert_event(reply2) retrieved_problem = test_database.retrieve_event( where="type=%s", where_values=("problem", )) retrieved_reply1 = test_database.retrieve_event( where="type=%s AND user_id=%s", where_values=("reply", replier.id)) retrieved_reply2 = test_database.retrieve_event( where="type=%s AND user_id=%s", where_values=("reply", author.id)) assert retrieved_problem assert retrieved_reply1 assert retrieved_reply2
def test_insert_incomplete_discussion(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset) with pytest.raises(ValueError) as error: test_database.insert_discussion(discussion) assert "missing from discussion" in str(error.value)
def test_insert_retrieve_multiple_discussions(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion1 = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") discussion2 = Discussion(2, beatmapset=beatmapset, user=user, content="real testing", tab="tab", difficulty="diff") test_database.insert_discussion(discussion1) test_database.insert_discussion(discussion2) retrieved_discussions = test_database.retrieve_discussions( where="beatmapset_id=%s", where_values=(beatmapset.id, )) assert next(retrieved_discussions, None) == discussion1 assert next(retrieved_discussions, None) == discussion2
def test_insert_retrieve_obv_sev(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") test_database.insert_obv_sev(discussion, obv=2, sev=0) obv, sev = test_database.retrieve_obv_sev(discussion_id=1) assert obv == 2 assert sev == 0
async def test_insert_retrieve_event_cached(cached_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") for i in range(100): event = Event(_type=f"{i}", time=time, beatmapset=beatmapset, discussion=discussion, user=user) cached_database.insert_event(event) start_time = datetime.utcnow() retrieved_events_uncached = cached_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) async for event in retrieved_events_uncached: assert event.beatmapset == beatmapset delta_time_uncached = datetime.utcnow() - start_time start_time = datetime.utcnow() retrieved_events_cached = cached_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) async for event in retrieved_events_cached: assert event.beatmapset == beatmapset delta_time_cached = datetime.utcnow() - start_time assert await anext(retrieved_events_uncached, None) == await anext(retrieved_events_cached, None) assert delta_time_uncached > delta_time_cached
def test_insert_retrieve_obv_sev_event(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event = Event(_type=types.SEV, time=from_string("2020-01-01 00:00:00"), beatmapset=beatmapset, discussion=discussion, content="2/0") test_database.insert_obv_sev_event(event) obv, sev = test_database.retrieve_obv_sev(discussion_id=1) assert obv == 2 assert sev == 0
def test_insert_retrieve_discussion(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") test_database.insert_discussion(discussion) retrieved_discussion = test_database.retrieve_discussion( where="id=%s", where_values=(1, )) assert retrieved_discussion.id == discussion.id assert retrieved_discussion.beatmapset == discussion.beatmapset assert retrieved_discussion.user == discussion.user assert retrieved_discussion.content == discussion.content assert retrieved_discussion.tab == discussion.tab assert retrieved_discussion.difficulty == discussion.difficulty assert retrieved_discussion == discussion
def test_discussion(): beatmapset = Beatmapset(1001546, beatmapset_json=mock_beatmap.JSON) discussion = Discussion(1234956, beatmapset) assert discussion.id == 1234956 assert discussion.beatmapset == beatmapset
def parse_event_json(self, event_json: object, user_jsons: object = None) -> Event: """Returns a BeatmapsetEvent reflecting the given event json object. Ignores any event with an incomplete context (e.g. deleted beatmaps). Requests user names from the api unless supplied with the json-users.""" if not event_json: # Seems to occur when the respective beatmapset has been deleted. log_err( "WARNING | An event is missing; the beatmapset was probably deleted." ) return None try: # Scrape object data _type = event_json["type"] time = timestamp.from_string(event_json["created_at"]) if "beatmapset" not in event_json or not event_json["beatmapset"]: raise DeletedContextError( "No beatmapset was found in this event. It was likely deleted." ) beatmapset_id = event_json["beatmapset"]["id"] discussion_id = event_json["discussion"][ "id"] if "discussion" in event_json and event_json[ "discussion"] else None user_id = event_json["user_id"] if "user_id" in event_json else None user_json = self.__lookup_user_json(user_id, user_jsons) user_name = user_json["username"] if user_json else None content = None if _type in [types.LANGUAGE_EDIT, types.GENRE_EDIT]: # Language/genre edits always have "old" and "new" fields, which no other type has. old = event_json["comment"]["old"] new = event_json["comment"]["new"] content = f"{old} -> {new}" if _type in [types.UNLOVE]: # E.g. "Mapper has asked for it to be removed from Loved". content = event_json["comment"]["reason"] # Reconstruct objects beatmapset = Beatmapset(beatmapset_id) user = User(user_id, user_name) if user_id is not None else None discussion = Discussion( discussion_id, beatmapset) if discussion_id is not None else None except DeletedContextError as err: log_err(err) else: return Event(_type=_type, time=time, beatmapset=beatmapset, discussion=discussion, user=user, content=content) return None