Ejemplo n.º 1
0
def on_bookmark_created(message: PickleMessage, ctx: missive.HandlingContext):
    """When a new bookmark is created, we want to:

    - crawl it, if it's not yet crawled
    - (tbc) other things

    """
    event = cast(BookmarkCreated, message.get_obj())
    session = get_session(ctx)
    url = get_url_by_url_uuid(session, event.url_uuid)
    if url is None:
        raise RuntimeError("url requested to crawl does not exist in the db")
    if not is_crawled(session, url):
        publish_message(
            CrawlRequested(crawl_request=CrawlRequest(
                request=Request(verb=HTTPVerb.GET, url=url),
                reason=BookmarkCrawlReason(),
            )),
            environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
        )
    for source in DiscussionSource:
        publish_message(
            FetchDiscussionsCommand(url_uuid=url.url_uuid, source=source),
            environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
        )

    ctx.ack()
Ejemplo n.º 2
0
def test_publish_message(test_queue):
    """Test that simple sending of messages works correctly"""
    sent_message = BookmarkCreated(uuid4(), uuid4())
    publish_message(sent_message, routing_key=test_queue.name)

    received_message = pickle.loads(test_queue.get().body)
    assert received_message == sent_message
Ejemplo n.º 3
0
def reindex_url(url: str, log_level: str):
    url_obj = URL.from_string(url)
    Session = get_session_cls()
    with contextlib.closing(Session()) as session:
        crawl_uuid = get_most_recent_crawl(session, url_obj)
    publish_message(IndexRequested(crawl_uuid),
                    environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
    log.info("requested index of %s (crawl_uuid: %s)", url_obj, crawl_uuid)
Ejemplo n.º 4
0
def request_indexes_for_unindexed_urls(session: Session) -> None:
    index = 0
    for index, (url, crawl_uuid) in enumerate(get_unindexed_urls(session),
                                              start=1):
        publish_message(IndexRequested(crawl_uuid),
                        environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
        log.info("requested index: %s", url.to_string())
    log.info("requested %d indexes", index)
Ejemplo n.º 5
0
def fetch_frontier(limit: Optional[int]):
    Session = get_session_cls()
    count = 0
    with contextlib.closing(Session()) as session:
        frontier = DiscussionFrontier(session)
        for url_uuid, discussion_source in frontier.iter(limit):
            publish_message(
                FetchDiscussionsCommand(url_uuid, discussion_source),
                routing_key=environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
            )
            count += 1
    log.info("requested %d fetches", count)
Ejemplo n.º 6
0
def reindex_bookmarks(log_level: str):
    """Requests an (re)index of the most recent crawl for each bookmark."""
    configure_logging(log_level)
    log.warning("requesting reindex of all bookmarks")
    Session = get_session_cls()
    index = 0
    with contextlib.closing(Session()) as session:
        for index, crawl_uuid in enumerate(
                most_recent_successful_bookmark_crawls(session)):
            publish_message(IndexRequested(crawl_uuid),
                            environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
        log.warning("requested %d indexings", index)
Ejemplo n.º 7
0
def on_index_requested(message: PickleMessage, ctx: missive.HandlingContext):
    event = cast(IndexRequested, message.get_obj())
    session = get_session(ctx)
    metadata = indexing.index(session, event.crawl_uuid)
    if metadata:
        icon_message = icon_message_if_necessary(session, metadata)
    else:
        icon_message = None
    session.commit()
    ctx.ack()
    if icon_message:
        publish_message(icon_message, environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
Ejemplo n.º 8
0
def send_hello(message, loop):
    routing_key: str = environ["QM_RABBITMQ_BG_WORKER_TOPIC"]

    # call this for side-effects - to ensure things are set up so that the
    # timing numbers are accurate
    get_producer()

    hello_event = HelloEvent(message)
    publish_message(hello_event, routing_key=routing_key)
    if loop:
        while True:
            hello_event = HelloEvent(message)
            publish_message(hello_event, routing_key=routing_key)
Ejemplo n.º 9
0
def on_bookmark_crawl_requested(message: PickleMessage,
                                ctx: missive.HandlingContext):
    event = cast(CrawlRequested, message.get_obj())
    session = get_session(ctx)
    http_client = get_http_client(ctx)
    crawl_result = crawler.crawl(session, http_client,
                                 event.crawl_request.request)
    session.commit()
    publish_message(
        IndexRequested(crawl_uuid=crawl_result.crawl_uuid),
        environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
    )
    ctx.ack()
Ejemplo n.º 10
0
def create_bookmark(username: str) -> flask.Response:
    owner = get_user_or_fail(db.session, username)
    # FIXME: sort out optional url_uuid
    require_access_or_fail(
        UserBookmarksAccessObject(user_uuid=owner.user_uuid),
        Access.WRITE,
    )
    form = flask.request.form
    creation_time = datetime.utcnow().replace(tzinfo=timezone.utc)
    tag_triples = tag_triples_from_form(form)

    url_str = form["url"]
    try:
        # As it's a user entering this url, help them along with getting a
        # sufficiently canonicalised url
        url = URL.from_string(url_str, coerce_canonicalisation=True)
    except DisallowedSchemeException:
        log.warning("user tried to create url: %s (disallowed scheme)",
                    url_str)
        flask.abort(400, "invalid url (disallowed scheme)")

    bookmark = Bookmark(
        url=url,
        title=form["title"],
        description=form["description"],
        unread="unread" in form,
        deleted=False,
        updated=creation_time,
        created=creation_time,
        tag_triples=tag_triples,
    )
    url_uuid = set_bookmark(db.session, get_cache(), owner.user_uuid, bookmark)
    db.session.commit()
    publish_message(
        message_lib.BookmarkCreated(user_uuid=owner.user_uuid,
                                    url_uuid=url.url_uuid),
        environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
    )
    flask.flash("Bookmarked: %s" % bookmark.title)
    response = flask.make_response("Redirecting...", 303)
    response.headers["Location"] = flask.url_for(
        "quarchive.edit_bookmark_form",
        url_uuid=url_uuid,
        username=owner.username,
    )
    return response
Ejemplo n.º 11
0
def sync(current_user: User) -> Tuple[flask.Response, int]:
    start_time = datetime.utcnow()
    extension_version = flask.request.headers.get(
        "Quarchive-Extension-Version", "unknown"
    )
    log.debug("extension version: %s", extension_version)
    user_uuid = current_user.user_uuid
    use_jsonlines = flask.request.headers["Content-Type"] != "application/json"
    if not use_jsonlines:
        log.warning("sync request using deprecated single json object")
        body = flask.request.json
        recieved_bookmarks = (Bookmark.from_json(item) for item in body["bookmarks"])
    else:
        log.info("sync request using jsonlines")
        recieved_bookmarks = (
            Bookmark.from_json(json.loads(l)) for l in flask.request.stream.readlines()
        )

    try:
        merge_result = merge_bookmarks(db.session, user_uuid, recieved_bookmarks)
    except BadCanonicalisationException as e:
        log.error(
            "bad canonicalised url ('%s') from version %s, user %s",
            e.url_string,
            extension_version,
            current_user,
        )
        db.session.rollback()
        flask.abort(400, "bad canonicalisation on url: %s" % e.url_string)
    db.session.commit()

    for added in merge_result.added:
        publish_message(
            message_lib.BookmarkCreated(
                user_uuid=user_uuid, url_uuid=added.url.url_uuid
            ),
            environ["QM_RABBITMQ_BG_WORKER_TOPIC"],
        )

    is_full_sync = "full" in flask.request.args

    if is_full_sync:
        response_bookmarks = all_bookmarks(db.session, current_user.user_uuid)
    else:
        response_bookmarks = merge_result.changed

    # If we got JSON, send json back
    if not use_jsonlines:
        return flask.json.jsonify(
            {"bookmarks": [b.to_json() for b in response_bookmarks]}
        )
    else:

        def generator():
            for b in response_bookmarks:
                yield json.dumps(b.to_json())
                yield "\n"
            if is_full_sync:
                duration = datetime.utcnow() - start_time
                log.info(
                    "completed full sync for %s in %ds",
                    current_user.username,
                    duration.total_seconds(),
                )

        return (
            flask.Response(
                flask.stream_with_context(generator()), mimetype="application/x-ndjson",
            ),
            200,
        )
Ejemplo n.º 12
0
def fetch(url: str):
    url_obj = URL.from_string(url)
    event = FetchDiscussionsCommand(url_obj.url_uuid, DiscussionSource.HN)
    publish_message(event, routing_key=environ["QM_RABBITMQ_BG_WORKER_TOPIC"])
    event = FetchDiscussionsCommand(url_obj.url_uuid, DiscussionSource.REDDIT)
    publish_message(event, routing_key=environ["QM_RABBITMQ_BG_WORKER_TOPIC"])