Exemple #1
0
def test_parse_fetch_unchanged():
    with mock.patch("scraper.parsers.sev_parser.SCRAPER_DB_NAME",
                    SCRAPER_TEST_DB_NAME):
        user = User(_id=1, allow_api=False)
        beatmapset = Beatmapset(_id=2, creator=user, allow_api=False)
        discussion = Discussion(_id=4,
                                beatmapset=beatmapset,
                                user=user,
                                content="123")
        Database(SCRAPER_TEST_DB_NAME).insert_discussion(discussion)
        Database(SCRAPER_TEST_DB_NAME).insert_obv_sev(discussion, obv=1, sev=2)

        # This event basically does: 1/2 -> 0/2
        event = sev_parser.parse(discussion_id=4,
                                 obv=0,
                                 sev=None,
                                 time=from_string("2020-07-22T21:00:00+00:00"))

    expected_event = Event(_type="sev",
                           time=from_string("2020-07-22T21:00:00+00:00"),
                           beatmapset=beatmapset,
                           discussion=discussion,
                           content="0/2")

    assert event.type == expected_event.type
    assert event.time == expected_event.time
    assert event.beatmapset == expected_event.beatmapset
    assert event.discussion == expected_event.discussion
    assert event.content == expected_event.content
    assert event == expected_event
def test_parse_timing():
    # Test both additions and removals.
    Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7),
                                                     user=User(1, "one"))
    Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7),
                                                     user=User(2, "two"))
    Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7),
                                                     user=User(3, "three"))
    Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7),
                                                     user=User(4, "four"))
    Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7),
                                                     user=User(5, "five"))

    start_time = datetime.utcnow()

    events = []
    with mock.patch("scraper.parsers.group_parser.SCRAPER_DB_NAME",
                    SCRAPER_TEST_DB_NAME):
        for event in group_parser.parse(
                group_id=7,
                group_page=mock_groups.soup,
                last_checked_at=from_string("2020-07-22T21:00:00+00:00")):
            events.append(event)

    end_time = datetime.utcnow()
    # We should not be using the api to fill in user names and such, as this data is available within the users json.
    assert (end_time - start_time).total_seconds() < 3
Exemple #3
0
def test_incomplete_context_from_db():
    beatmapset = Beatmapset(1001546, beatmapset_json=mock_beatmap.JSON)
    discussion = Discussion(99, beatmapset, user=User(1, "someone"), content="hello there")  # Missing tab and difficulty.
    incomplete_discussion = Discussion(99, beatmapset)
    
    Database(SCRAPER_TEST_DB_NAME).insert_discussion(discussion)

    assert not __complete_discussion_context(incomplete_discussion, db_name=SCRAPER_TEST_DB_NAME)
Exemple #4
0
def test_complete_context():
    beatmapset = Beatmapset(1001546, beatmapset_json=mock_beatmap.JSON)
    discussion = Discussion(99, beatmapset, user=User(1, "someone"), content="hello there", tab="tab", difficulty="diff")
    incomplete_discussion = Discussion(99, beatmapset)
    
    Database(SCRAPER_TEST_DB_NAME).insert_discussion(discussion)

    assert __complete_discussion_context(incomplete_discussion, db_name=SCRAPER_TEST_DB_NAME)
    assert incomplete_discussion.user
    assert incomplete_discussion.content
    assert incomplete_discussion.tab
    assert incomplete_discussion.difficulty
Exemple #5
0
def test_parse_both_unchanged():
    with mock.patch("scraper.parsers.sev_parser.SCRAPER_DB_NAME",
                    SCRAPER_TEST_DB_NAME):
        user = User(_id=1, allow_api=False)
        beatmapset = Beatmapset(_id=2, creator=user, allow_api=False)
        discussion = Discussion(_id=4,
                                beatmapset=beatmapset,
                                user=user,
                                content="123")
        Database(SCRAPER_TEST_DB_NAME).insert_discussion(discussion)
        Database(SCRAPER_TEST_DB_NAME).insert_obv_sev(discussion, obv=1, sev=2)

        #with pytest.raises(DeletedContextError) as err1:
        #    sev_parser.parse(discussion_id=4, obv=1, sev=2, time=from_string("2020-07-22T21:00:00+00:00"))
        #assert "changed back" in str(err1).lower()

        with pytest.raises(DeletedContextError) as err2:
            sev_parser.parse(discussion_id=4,
                             obv=None,
                             sev=None,
                             time=from_string("2020-07-22T21:00:00+00:00"))
        assert "neither severity nor obviousness have been set" in str(
            err2).lower()
Exemple #6
0
def test_get_group_events():
    Database(SCRAPER_TEST_DB_NAME).clear_table_data("group_users")

    with mock.patch("scraper.parsers.group_parser.SCRAPER_DB_NAME", SCRAPER_TEST_DB_NAME):
        events = get_group_events(_from=datetime.utcnow())

        event_n = 0
        for event in events:
            assert event.type == types.ADD
            assert event.user
            assert event.group
            event_n += 1
    
    assert event_n > 100
Exemple #7
0
def __complete_discussion_context(discussion: Discussion,
                                  db_name: str = SCRAPER_DB_NAME) -> bool:
    """Completes the context of the discussion from prior database entries, if present. Returns true if succeeded."""
    cached_discussion = Database(db_name).retrieve_discussion(
        "id=%s", (discussion.id, ))
    if not cached_discussion:
        return False

    complete = (cached_discussion.user and cached_discussion.content
                and cached_discussion.tab and cached_discussion.difficulty)
    if not complete:
        return False

    discussion.user = cached_discussion.user
    discussion.content = cached_discussion.content
    discussion.tab = cached_discussion.tab
    discussion.difficulty = cached_discussion.difficulty
    return True
def test_parse_removals():
    Database(SCRAPER_TEST_DB_NAME).insert_group_user(group=Usergroup(7),
                                                     user=User(1, "someone"))

    events = []
    with mock.patch("scraper.parsers.group_parser.SCRAPER_DB_NAME",
                    SCRAPER_TEST_DB_NAME):
        for event in group_parser.parse(
                group_id=7,
                group_page=mock_groups.soup,
                last_checked_at=from_string("2020-07-22T21:00:00+00:00")):
            events.append(event)

    assert len(events) == 18
    assert events[0] == Event(_type="remove",
                              time=from_string("2020-07-22T21:00:00+00:00"),
                              group=Usergroup(7),
                              user=User(_id=1, name="someone"))
Exemple #9
0
def test_correct_setup():
    database = Database(SCRAPER_TEST_DB_NAME)
    assert not database.retrieve_table_data("events")
    assert not database.retrieve_table_data("discussions")
Exemple #10
0
def setup_function():
    database = Database(SCRAPER_TEST_DB_NAME)
    # Reset database to state before any tests ran.
    database.clear_table_data("events")
    database.clear_table_data("discussions")
Exemple #11
0
def get_group_user(group_id: int, user_id: int) -> List[int]:
    """Returns the last remembered user beloning to the given group id with the given user id."""
    return Database(SCRAPER_DB_NAME).retrieve_group_user("group_id=%s AND user_id=%s", (group_id, user_id))[1]
Exemple #12
0
def get_group_user_ids(group_id: int) -> List[int]:
    """Returns the last remembered user ids beloning to the given group id."""
    group_user_relations = Database(SCRAPER_DB_NAME).retrieve_group_users("group_id=%s", (group_id,))
    return [user.id for group, user in group_user_relations]
Exemple #13
0
 def __init__(self, reader_id: str, db_name: str):
     self.reader_id = reader_id
     self.database = Database(db_name)
     self.running = False
     self.latest_event_time = None
Exemple #14
0
    return "".join([
        f"{fmt(event.type, colors.EVENT)}",
        f" ({fmt(event.user, colors.AUTHOR)})" if event.user else "",
        f" on {fmt(event.beatmapset, colors.CONTEXT)}" if event.beatmapset else "",
        f" to/from {fmt(event.group, colors.CONTEXT)}" if event.group else "",
        f" \"{event.content}\"" if event.content else ""
    ])

def insert_db(events) -> None:
    """Inserts the given event list into the database in reversed order."""
    if not events:
        return
    
    events.sort(key=lambda event: event.time)

    log(f"--- Inserting {len(events)} Events into the Database ---")
    for event in events:
        log(".", newline=False)
        database.insert_event(event)
    log()

def last_updated(current_time: datetime, _id: str) -> None:
    """Updates the last updated file to reflect the given time."""
    log(f"--- Last Updated [{_id}] {current_time} ---")
    timestamp.set_last(current_time, _id)

logger.init()
database = Database(SCRAPER_DB_NAME)

loop = asyncio.get_event_loop()
loop.run_until_complete(gather_loop())
Exemple #15
0
def test_database():
    database = Database(SCRAPER_TEST_DB_NAME)
    # Reset database to state before any tests ran.
    database.clear_table_data("events")
    database.clear_table_data("discussions")
    database.clear_table_data("discussion_obv_sev")
    database.clear_table_data("beatmapsets")
    database.clear_table_data("beatmaps")
    database.clear_table_data("beatmapset_status")
    database.clear_table_data("status_nominators")
    database.clear_table_data("beatmapset_modes")
    database.clear_table_data("newsposts")
    database.clear_table_data("group_users")
    database.clear_table_data("users")

    return database
Exemple #16
0
class Reader():
    """This has an async method `run`, which starts a loop that reads Aiess events every 10 seconds.

    If an event is found that is after an internal timestamp (initially current time on first run),
    then `on_event` is called with this; basically called for every new event.

    For each of these reads, `on_event_batch` is called, regardless of if any new events were found.
    
    Use this by creating a class inheriting Reader, and override above methods with custom functionality."""
    def __init__(self, reader_id: str, db_name: str):
        self.reader_id = reader_id
        self.database = Database(db_name)
        self.running = False
        self.latest_event_time = None

    async def run(self) -> None:
        """A blocking method which initiates a loop looking through events in the database.
        This is from where on_event is called, for each new event found.
        
        Being a blocking call, any statement after calling this method will not be executed,
        so place this after any setup code."""
        if self.running:
            raise ValueError("Reader is already running.")

        self.running = True
        while True:
            await self.__push_all_new_events()
            await asyncio.sleep(10)

    async def __push_all_new_events(self) -> None:
        """Triggers the on_event method for each new event since the last stored datetime for each scope."""
        news_target = f"type=\"{types.NEWS}\""
        groups_target = f"type=\"{types.ADD}\" OR type=\"{types.REMOVE}\""

        await self.__push_new_events(
            Scope("mapset",
                  sql_target=f"NOT ({news_target}) AND NOT ({groups_target})"))
        await self.__push_new_events(Scope("news", sql_target=news_target))
        await self.__push_new_events(Scope("groups", sql_target=groups_target))

    async def __push_new_events(self, scope: Scope) -> None:
        """Triggers the on_event method for each new event since the last stored datetime for the given scope."""
        last_time = timestamp.get_last(self.__time_id(scope))
        await self.__push_events_between(last_time, datetime.utcnow(), scope)

    async def __push_events_between(self, last_time: datetime,
                                    current_time: datetime,
                                    scope: Scope) -> datetime:
        """Triggers the on_event method for each event between the two datetimes.
        Updates the last stored datetime after each on_event call."""
        await self.on_event_batch()
        async for event in await self.events_between(last_time, current_time,
                                                     scope.sql_target):
            await self.on_event(event)
            timestamp.set_last(event.time, self.__time_id(scope))

            if self.latest_event_time is None or event.time > self.latest_event_time:
                self.latest_event_time = event.time

    def __time_id(self, scope: Scope):
        """Returns the identifier of the file the reader creates to keep track of the last time for this scope.
        This is based on the identifier supplied to the reader on initialization."""
        return f"reader-{self.reader_id}-{scope.name}"

    async def events_between(
            self,
            _from: datetime,
            to: datetime,
            sql_target: str = "TRUE") -> Generator[Event, None, None]:
        """Yields each event found in the database, from (excluding) the later time to (including) the earlier time.
        Optionally only retrieves events matching the `sql_target` WHERE clause."""
        return self.database.retrieve_events(
            where=
            f"({sql_target}) AND time > %s AND time <= %s ORDER BY time ASC",
            where_values=(_from, to))

    async def on_event_batch(self) -> None:
        """Called for each new event batch found in the running loop of the reader.
        This happens before on_event is called for each event."""

    async def on_event(self, event: Event) -> None:
        """Called for each new event found in the running loop of the reader."""
def setup_function():
    Database(SCRAPER_TEST_DB_NAME).clear_table_data("group_users")