def on_new_icon_found(message: PickleMessage, ctx: missive.HandlingContext): event = cast(NewIconFound, message.get_obj()) session = get_session(ctx) http_client = get_http_client(ctx) icon_url = get_url_by_url_uuid(session, event.icon_url_uuid) if icon_url is None: raise RuntimeError("icon url not in db") if event.page_url_uuid is not None: page_url = get_url_by_url_uuid(session, event.page_url_uuid) else: page_url = None existing_icon_uuid = icon_at_url(session, icon_url) if existing_icon_uuid is not None: log.info("already have icon at %s", icon_url) if page_url is not None: upsert_icon_for_url(session, page_url, existing_icon_uuid) else: blake2b_hash, response = crawler.crawl_icon( session, http_client, Request(verb=HTTPVerb.GET, url=icon_url)) body = cast(RewindingIO, response.body) with body as wind: indexing.index_icon(session, icon_url, wind, blake2b_hash, page_url=page_url) session.commit() ctx.ack()
def on_discussion_crawl_requested(message: PickleMessage, ctx: missive.HandlingContext): event = cast(FetchDiscussionsCommand, message.get_obj()) session = get_session(ctx) http_client = get_http_client(ctx) url = get_url_by_url_uuid(session, event.url_uuid) if url is None: # FIXME: improve this... raise RuntimeError("url does not exist!") log.info("fetching discussions for %s from %s", url, event.source) client: Union[discussion_clients.HNAlgoliaClient, discussion_clients.RedditDiscussionClient] if event.source == DiscussionSource.HN: client = discussion_clients.HNAlgoliaClient(http_client) else: client = get_reddit_client(ctx) try: upsert_discussions(session, client.discussions_for_url(url)) record_discussion_fetch(session, url, event.source) except discussion_clients.DiscussionAPIError as e: log.error( "got bad response (%s) from %s: %s", e.response_status(), e.source, e.response_text(), ) session.rollback() else: session.commit() ctx.ack()
def on_bookmark_created(message: PickleMessage, ctx: missive.HandlingContext): """When a new bookmark is created, we want to: - crawl it, if it's not yet crawled - (tbc) other things """ event = cast(BookmarkCreated, message.get_obj()) session = get_session(ctx) url = get_url_by_url_uuid(session, event.url_uuid) if url is None: raise RuntimeError("url requested to crawl does not exist in the db") if not is_crawled(session, url): publish_message( CrawlRequested(crawl_request=CrawlRequest( request=Request(verb=HTTPVerb.GET, url=url), reason=BookmarkCrawlReason(), )), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) for source in DiscussionSource: publish_message( FetchDiscussionsCommand(url_uuid=url.url_uuid, source=source), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) ctx.ack()
def print_hellos(message: PickleMessage, ctx: missive.HandlingContext): event: HelloEvent = cast(HelloEvent, message.get_obj()) time_taken_ms = (datetime.now(timezone.utc) - event.created).total_seconds() * 1000 log.info( "got hello event (in %.3fms), message: '%s'", round(time_taken_ms, 3), event.message, ) ctx.ack()
def on_index_requested(message: PickleMessage, ctx: missive.HandlingContext): event = cast(IndexRequested, message.get_obj()) session = get_session(ctx) metadata = indexing.index(session, event.crawl_uuid) if metadata: icon_message = icon_message_if_necessary(session, metadata) else: icon_message = None session.commit() ctx.ack() if icon_message: publish_message(icon_message, environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
def on_bookmark_crawl_requested(message: PickleMessage, ctx: missive.HandlingContext): event = cast(CrawlRequested, message.get_obj()) session = get_session(ctx) http_client = get_http_client(ctx) crawl_result = crawler.crawl(session, http_client, event.crawl_request.request) session.commit() publish_message( IndexRequested(crawl_uuid=crawl_result.crawl_uuid), environ["QM_RABBITMQ_BG_WORKER_TOPIC"], ) ctx.ack()
def __call__(self, message: PickleMessage) -> bool: return isinstance(message.get_obj(), self.required_class)