Example #1
0
def on_new_icon_found(message: PickleMessage, ctx: missive.HandlingContext):
    event = cast(NewIconFound, message.get_obj())
    session = get_session(ctx)
    http_client = get_http_client(ctx)

    icon_url = get_url_by_url_uuid(session, event.icon_url_uuid)
    if icon_url is None:
        raise RuntimeError("icon url not in db")

    if event.page_url_uuid is not None:
        page_url = get_url_by_url_uuid(session, event.page_url_uuid)
    else:
        page_url = None

    existing_icon_uuid = icon_at_url(session, icon_url)
    if existing_icon_uuid is not None:
        log.info("already have icon at %s", icon_url)
        if page_url is not None:
            upsert_icon_for_url(session, page_url, existing_icon_uuid)
    else:
        blake2b_hash, response = crawler.crawl_icon(
            session, http_client, Request(verb=HTTPVerb.GET, url=icon_url))
        body = cast(RewindingIO, response.body)
        with body as wind:
            indexing.index_icon(session,
                                icon_url,
                                wind,
                                blake2b_hash,
                                page_url=page_url)
    session.commit()

    ctx.ack()
Example #2
0
def on_discussion_crawl_requested(message: PickleMessage,
                                  ctx: missive.HandlingContext):
    event = cast(FetchDiscussionsCommand, message.get_obj())
    session = get_session(ctx)
    http_client = get_http_client(ctx)
    url = get_url_by_url_uuid(session, event.url_uuid)
    if url is None:
        # FIXME: improve this...
        raise RuntimeError("url does not exist!")
    log.info("fetching discussions for %s from %s", url, event.source)
    client: Union[discussion_clients.HNAlgoliaClient,
                  discussion_clients.RedditDiscussionClient]
    if event.source == DiscussionSource.HN:
        client = discussion_clients.HNAlgoliaClient(http_client)
    else:
        client = get_reddit_client(ctx)

    try:
        upsert_discussions(session, client.discussions_for_url(url))
        record_discussion_fetch(session, url, event.source)
    except discussion_clients.DiscussionAPIError as e:
        log.error(
            "got bad response (%s) from %s: %s",
            e.response_status(),
            e.source,
            e.response_text(),
        )
        session.rollback()
    else:
        session.commit()
    ctx.ack()
Example #3
0
def on_bookmark_created(message: PickleMessage, ctx: missive.HandlingContext):
    """When a new bookmark is created, we want to:

    - crawl it, if it's not yet crawled
    - (tbc) other things

    """
    event = cast(BookmarkCreated, message.get_obj())
    session = get_session(ctx)
    url = get_url_by_url_uuid(session, event.url_uuid)
    if url is None:
        raise RuntimeError("url requested to crawl does not exist in the db")
    if not is_crawled(session, url):
        publish_message(
            CrawlRequested(crawl_request=CrawlRequest(
                request=Request(verb=HTTPVerb.GET, url=url),
                reason=BookmarkCrawlReason(),
            )),
            environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
        )
    for source in DiscussionSource:
        publish_message(
            FetchDiscussionsCommand(url_uuid=url.url_uuid, source=source),
            environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
        )

    ctx.ack()
Example #4
0
def print_hellos(message: PickleMessage, ctx: missive.HandlingContext):
    event: HelloEvent = cast(HelloEvent, message.get_obj())
    time_taken_ms = (datetime.now(timezone.utc) -
                     event.created).total_seconds() * 1000
    log.info(
        "got hello event (in %.3fms), message: '%s'",
        round(time_taken_ms, 3),
        event.message,
    )
    ctx.ack()
Example #5
0
def on_index_requested(message: PickleMessage, ctx: missive.HandlingContext):
    event = cast(IndexRequested, message.get_obj())
    session = get_session(ctx)
    metadata = indexing.index(session, event.crawl_uuid)
    if metadata:
        icon_message = icon_message_if_necessary(session, metadata)
    else:
        icon_message = None
    session.commit()
    ctx.ack()
    if icon_message:
        publish_message(icon_message, environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
Example #6
0
def on_bookmark_crawl_requested(message: PickleMessage,
                                ctx: missive.HandlingContext):
    event = cast(CrawlRequested, message.get_obj())
    session = get_session(ctx)
    http_client = get_http_client(ctx)
    crawl_result = crawler.crawl(session, http_client,
                                 event.crawl_request.request)
    session.commit()
    publish_message(
        IndexRequested(crawl_uuid=crawl_result.crawl_uuid),
        environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
    )
    ctx.ack()
Example #7
0
 def __call__(self, message: PickleMessage) -> bool:
     return isinstance(message.get_obj(), self.required_class)