Ejemplo n.º 1
0
def main() -> None:
    processor = CommentsProcessor(config.REDIS_HOST,
                                config.REDIS_PORT,
                                config.REDIS_PASSWORD,
                                config.REDIS_CHANNEL,
                                L)
    processor.subscribe()
    for x in processor.listen():
        print(len(x))
Ejemplo n.º 2
0
def update_post(session: sqlalchemy.orm.Session, state: SyncState,
                processor: CommentsProcessor) -> None:
    L.info("Updating post %d...", state.post_id)

    r = requests.get(GK_URL + "/" + str(state.post_id),
                     headers=config.DEFAULT_HEADERS,
                     timeout=30)
    if r.status_code != 200:
        update_state(state, 'HTTP error {0}'.format(r.status_code))
        return

    dump_post(r.content)

    try:
        post, users, comments = parse_post(r.content)
    except Exception as e:
        L.exception(e)
        update_state(state, 'Parse error')
        return

    session.merge(post)
    for user in users:
        session.merge(user)

    last_comment_id = None
    updated_comments: List[Comment] = []
    new_comments: List[Comment] = []

    for comment in comments:
        old_text = session.query(Comment.text).filter(
            Comment.comment_id == comment.comment_id).first()
        merged_comment = session.merge(comment)
        if old_text is None:
            new_comments.append(merged_comment)
        elif old_text[0] != merged_comment.text:
            updated_comments.append(merged_comment)
        if last_comment_id is None or comment.comment_id > last_comment_id:
            last_comment_id = comment.comment_id

    update_state(state, 'OK')
    session.flush()
    processor.on_comments_update(new_comments, updated_comments)

    # Workaround for https://govnokod.ru/26440#comment527494
    # TODO: Make an appropriate fix
    session.commit()
    if last_comment_id is not None:
        if state.last_comment_id is not None and state.last_comment_id > last_comment_id:
            # scan_comments.py changed state while we were parsing the post
            L.warning(f'state.last_comment_id changed during post {post.post_id} ' \
                    + f'parsing: {state.last_comment_id} > {last_comment_id}')
            state.pending = True
            session.flush()
    state.last_comment_id = last_comment_id
Ejemplo n.º 3
0
def update_xyz_states(comments: Sequence[parser_xyz.CommentXyz],
                      processor: CommentsProcessor) -> None:
    with ScopedSession() as session:
        updated_comments = []
        prefetched_comment_id_pairs = []
        for comment in comments:
            if comment.id_xyz is not None and comment.id_ru is not None:
                comment_db = session.query(Comment).filter(
                    Comment.comment_id == comment.id_ru).first()
                if comment_db is None or comment_db.comment_id_storage is None:
                    id_storage = session.query(CommentIdStorage).filter(
                        CommentIdStorage.comment_id_ru ==
                        comment.id_ru).first()
                    if id_storage is None:
                        id_storage = CommentIdStorage()
                        id_storage.comment_id_ru = comment.id_ru
                        id_storage.comment_id_xyz = comment.id_xyz
                        session.add(id_storage)
                        prefetched_comment_id_pairs.append(
                            (comment.id_ru, comment.id_xyz))
                    elif id_storage.comment_id_xyz != comment.id_xyz:
                        id_storage.comment_id_xyz = comment.id_xyz
                        prefetched_comment_id_pairs.append(
                            (comment.id_ru, comment.id_xyz))

                    if comment_db is not None:
                        comment_db.comment_id_storage = id_storage
                        updated_comments.append(comment_db)

                elif comment_db.comment_id_storage.comment_id_xyz != comment.id_xyz:
                    comment_db.comment_id_storage.comment_id_xyz = comment.id_xyz
                    session.merge(comment_db.comment_id_storage)
                    updated_comments.append(comment_db)

        if len(prefetched_comment_id_pairs) > 0:
            session.flush()
            to_print: list = prefetched_comment_id_pairs[:5]
            if len(prefetched_comment_id_pairs) > 5:
                to_print.append('...')
            L.info(
                f'Prefetched xyz ids for {len(prefetched_comment_id_pairs)} comments: [{", ".join(map(str, to_print))}]'
            )

        if len(updated_comments) > 0:
            session.commit()
            to_print = [(c.comment_id, c.comment_id_storage.comment_id_xyz)
                        for c in updated_comments[:5]]
            if len(updated_comments) > 5:
                to_print.append('...')
            L.info(
                f'Fetched xyz ids for {len(updated_comments)} comments: [{", ".join(map(str, to_print))}]'
            )
            processor.on_comments_update([], updated_comments)
Ejemplo n.º 4
0
def worker_xyz(thread_exited_event: threading.Event) -> None:
    thread_exited_event.clear()
    processor = CommentsProcessor(config.REDIS_HOST, config.REDIS_PORT,
                                  config.REDIS_PASSWORD, config.REDIS_CHANNEL,
                                  L)
    L.info("=== xyz worker started ===")
    fast_requests = 0
    last_xyz_id: Optional[int] = -1
    while True:
        try:
            comments = fetch_latest_comments_xyz()
            update_xyz_states(comments, processor)

            if comments[0].id_xyz != last_xyz_id:
                fast_requests = FAST_TO_SLOW_STEPS
                last_xyz_id = comments[0].id_xyz
        except Exception as e:
            L.exception(e)
            fast_requests = 0

        if fast_requests > 0:
            delay = FAST_DELAY
            fast_requests -= 1
        else:
            delay = SLOW_DELAY

        if exit_event.wait(delay):
            break

    thread_exited_event.set()
Ejemplo n.º 5
0
def worker_ru(thread_exited_event: threading.Event) -> None:
    thread_exited_event.clear()
    processor = CommentsProcessor(config.REDIS_HOST, config.REDIS_PORT,
                                  config.REDIS_PASSWORD, config.REDIS_CHANNEL,
                                  L)
    L.info("=== ru worker started ===")
    fast_requests = 0
    while True:
        try:
            comments = fetch_latest_comments()
            has_updates = update_sync_states(comments, processor)
            if has_updates:
                fast_requests = FAST_TO_SLOW_STEPS
        except Exception as e:
            L.exception(e)
            fast_requests = 0

        if fast_requests > 0:
            delay = FAST_DELAY
            fast_requests -= 1
        else:
            delay = SLOW_DELAY

        if exit_event.wait(delay):
            break

    thread_exited_event.set()
Ejemplo n.º 6
0
def main() -> None:
    L.info("=== started ===")
    processor = CommentsProcessor(config.REDIS_HOST, config.REDIS_PORT,
                                  config.REDIS_PASSWORD, config.REDIS_CHANNEL,
                                  L)
    while True:
        update_next_post(processor)
Ejemplo n.º 7
0
def update_sync_states(comments: Sequence[Tuple[int, int, str]],
                       processor: CommentsProcessor) -> bool:
    has_updates = False
    updated_comments: List[Comment] = []
    with ScopedSession() as session:
        for post_id, comment_id, comment_text in comments:
            comment_db = session.query(Comment).filter(
                Comment.comment_id == comment_id).first()
            if comment_db is not None and comment_db.text != comment_text:
                comment_db.text = comment_text
                updated_comments.append(comment_db)

            state = session.query(SyncState).filter(
                SyncState.post_id == post_id).one_or_none()
            if not state:
                L.info("Got new comment %d for new post %d", comment_id,
                       post_id)
                has_updates = True
                state = SyncState(post_id=post_id,
                                  last_comment_id=comment_id,
                                  pending=True,
                                  priority=SyncState.PRIORITY_HAS_COMMENTS)
                session.add(state)
            else:
                if state.last_comment_id is None or comment_id > state.last_comment_id:
                    L.info("Got new comment %d for post %d", comment_id,
                           post_id)
                    has_updates = True
                    state.last_comment_id = comment_id
                    state.pending = True
                    state.priority = SyncState.PRIORITY_HAS_COMMENTS

        if len(updated_comments) > 0:
            L.info(f'Fast-fetched {len(updated_comments)} updated ' + \
                   f'comment{"s" if len(updated_comments) > 1 else ""}: {[c.comment_id for c in updated_comments]}')
            session.commit()
            processor.on_comments_update([], updated_comments)

    return has_updates
Ejemplo n.º 8
0
def comments_listener(comments_processor: CommentsProcessor) -> None:
    L.debug('IO: CommentsListenerTask started')
    for message in comments_processor.listen():
        try:
            update_event = json.loads(message, encoding='utf-8')
            new_comments = update_event['new']
            updated_comments = update_event['updated']
            L.debug(
                f'IO: CommentsListenerTask: Got {len(new_comments)} new comments and {len(updated_comments)} updated comments'
            )
            to_send = new_comments + updated_comments
            for room in rooms.copy():
                max_id = rooms[room]
                L.debug(
                    f'IO: CommentsListenerTask: Room {room}, max_id={max_id}, to_send -> {len(to_send)}'
                )
                if len(to_send) > 0:
                    io.emit('new_comments',
                            to_send,
                            namespace=IO_NAMESPACE,
                            room=room)
        except:
            L.exception('IO: CommentsListenerTask: exception')
    L.warning('IO: CommentsListenerTask: exiting')
Ejemplo n.º 9
0
            update_event = json.loads(message, encoding='utf-8')
            new_comments = update_event['new']
            updated_comments = update_event['updated']
            L.debug(
                f'IO: CommentsListenerTask: Got {len(new_comments)} new comments and {len(updated_comments)} updated comments'
            )
            to_send = new_comments + updated_comments
            for room in rooms.copy():
                max_id = rooms[room]
                L.debug(
                    f'IO: CommentsListenerTask: Room {room}, max_id={max_id}, to_send -> {len(to_send)}'
                )
                if len(to_send) > 0:
                    io.emit('new_comments',
                            to_send,
                            namespace=IO_NAMESPACE,
                            room=room)
        except:
            L.exception('IO: CommentsListenerTask: exception')
    L.warning('IO: CommentsListenerTask: exiting')


comments_processor = CommentsProcessor(config.REDIS_HOST, config.REDIS_PORT,
                                       config.REDIS_PASSWORD,
                                       config.REDIS_CHANNEL, L)
comments_processor.subscribe()
L.debug('IO: starting CommentsListenerTask')
listener_thread = io.start_background_task(comments_listener,
                                           comments_processor)
L.debug('IO: started a listener_thread: ' + str(listener_thread))