Beispiel #1
0
    def fetch_iri(self, iri: str) -> ap.ObjectType:
        if iri == ME["id"]:
            return ME

        if iri in ACTORS_CACHE:
            logger.info(f"{iri} found in cache")
            return ACTORS_CACHE[iri]

        # data = DB.actors.find_one({"remote_id": iri})
        # if data:
        #    if ap._has_type(data["type"], ap.ACTOR_TYPES):
        #        logger.info(f"{iri} found in DB cache")
        #        ACTORS_CACHE[iri] = data["data"]
        #    return data["data"]

        data = self._fetch_iri(iri)
        logger.debug(f"_fetch_iri({iri!r}) == {data!r}")
        if ap._has_type(data["type"], ap.ACTOR_TYPES):
            logger.debug(f"caching actor {iri}")
            # Cache the actor
            DB.actors.update_one(
                {"remote_id": iri},
                {"$set": {
                    "remote_id": iri,
                    "data": data
                }},
                upsert=True,
            )
            ACTORS_CACHE[iri] = data

        return data
Beispiel #2
0
def _add_answers_to_question(raw_doc: Dict[str, Any]) -> None:
    activity = raw_doc["activity"]
    if (ap._has_type(activity["type"], ap.ActivityType.CREATE)
            and "object" in activity and ap._has_type(
                activity["object"]["type"], ap.ActivityType.QUESTION)):
        for choice in activity["object"].get("oneOf",
                                             activity["object"].get("anyOf")):
            choice["replies"] = {
                "type":
                ap.ActivityType.COLLECTION.value,
                "totalItems":
                raw_doc["meta"].get("question_answers",
                                    {}).get(_answer_key(choice["name"]), 0),
            }
        now = datetime.now(timezone.utc)
        if format_datetime(now) >= activity["object"]["endTime"]:
            activity["object"]["closed"] = activity["object"]["endTime"]
Beispiel #3
0
def add_extra_collection(raw_doc: Dict[str, Any]) -> Dict[str, Any]:
    if not ap._has_type(raw_doc["activity"]["type"], ap.ActivityType.CREATE.value):
        return raw_doc

    raw_doc["activity"]["object"]["replies"] = embed_collection(
        raw_doc.get("meta", {}).get(MetaKey.COUNT_REPLY.value, 0),
        f'{raw_doc["remote_id"]}/replies',
    )

    raw_doc["activity"]["object"]["likes"] = embed_collection(
        raw_doc.get("meta", {}).get(MetaKey.COUNT_LIKE.value, 0),
        f'{raw_doc["remote_id"]}/likes',
    )

    raw_doc["activity"]["object"]["shares"] = embed_collection(
        raw_doc.get("meta", {}).get(MetaKey.COUNT_BOOST.value, 0),
        f'{raw_doc["remote_id"]}/shares',
    )

    return raw_doc
Beispiel #4
0
    def cache_attachment(self, attachment: Dict[str, Any], remote_id: str) -> None:
        url = attachment["url"]

        # Ensure it's not already there
        if self.fs.find_one(
            {"url": url, "kind": Kind.ATTACHMENT.value, "remote_id": remote_id}
        ):
            return

        # If it's an image, make some thumbnails
        if (
            _is_img(url)
            or attachment.get("mediaType", "").startswith("image/")
            or ap._has_type(attachment.get("type"), ap.ActivityType.IMAGE)
        ):
            try:
                i = load(url, self.user_agent)
                # Save the original attachment (gzipped)
                with BytesIO() as buf:
                    f1 = GzipFile(mode="wb", fileobj=buf)
                    i.save(f1, format=i.format)
                    f1.close()
                    buf.seek(0)
                    self.fs.put(
                        buf,
                        url=url,
                        size=None,
                        content_type=i.get_format_mimetype(),
                        kind=Kind.ATTACHMENT.value,
                        remote_id=remote_id,
                    )
                # Save a thumbnail (gzipped)
                i.thumbnail((720, 720))
                with BytesIO() as buf:
                    with GzipFile(mode="wb", fileobj=buf) as f1:
                        i.save(f1, format=i.format)
                    buf.seek(0)
                    self.fs.put(
                        buf,
                        url=url,
                        size=720,
                        content_type=i.get_format_mimetype(),
                        kind=Kind.ATTACHMENT.value,
                        remote_id=remote_id,
                    )
                return
            except Exception:
                # FIXME(tsileo): logging
                pass

        # The attachment is not an image, download and save it anyway
        with requests.get(
            url, stream=True, headers={"User-Agent": self.user_agent}
        ) as resp:
            resp.raise_for_status()
            with BytesIO() as buf:
                with GzipFile(mode="wb", fileobj=buf) as f1:
                    for chunk in resp.iter_content(chunk_size=2 << 20):
                        if chunk:
                            print(len(chunk))
                            f1.write(chunk)
                buf.seek(0)
                self.fs.put(
                    buf,
                    url=url,
                    size=None,
                    content_type=mimetypes.guess_type(url)[0],
                    kind=Kind.ATTACHMENT.value,
                    remote_id=remote_id,
                )
Beispiel #5
0
def inbox():
    # GET /inbox
    if request.method == "GET":
        if not is_api_request():
            abort(404)
        try:
            _api_required()
        except BadSignature:
            abort(404)

        return activitypubify(**activitypub.build_ordered_collection(
            DB.activities,
            q={
                "meta.deleted": False,
                "box": Box.INBOX.value
            },
            cursor=request.args.get("cursor"),
            map_func=lambda doc: remove_context(doc["activity"]),
            col_name="inbox",
        ))

    # POST/ inbox
    try:
        data = request.get_json(force=True)
        if not isinstance(data, dict):
            raise ValueError("not a dict")
    except Exception:
        return Response(
            status=422,
            headers={"Content-Type": "application/json"},
            response=json.dumps({
                "error": "failed to decode request body as JSON",
                "request_id": g.request_id,
            }),
        )

    # Check the blacklist now to see if we can return super early
    if is_blacklisted(data):
        logger.info(f"dropping activity from blacklisted host: {data['id']}")
        return Response(status=201)

    logger.info(f"request_id={g.request_id} req_headers={request.headers!r}")
    logger.info(f"request_id={g.request_id} raw_data={data}")
    try:
        req_verified, actor_id = verify_request(request.method, request.path,
                                                request.headers, request.data)
        if not req_verified:
            raise Exception("failed to verify request")
        logger.info(f"request_id={g.request_id} signed by {actor_id}")
    except Exception:
        logger.exception(
            f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote"
        )
        try:
            remote_data = get_backend().fetch_iri(data["id"])
        except ActivityGoneError:
            # XXX Mastodon sends Delete activities that are not dereferencable, it's the actor url with #delete
            # appended, so an `ActivityGoneError` kind of ensure it's "legit"
            if data["type"] == ActivityType.DELETE.value and data[
                    "id"].startswith(data["object"]):
                # If we're here, this means the key is not saved, so we cannot verify the object
                logger.info(
                    f"received a Delete for an unknown actor {data!r}, drop it"
                )

                return Response(status=201)
        except Exception:
            logger.exception(f"failed to fetch remote for payload {data!r}")

            if "type" in data:
                # Friendica does not returns a 410, but a 302 that redirect to an HTML page
                if ap._has_type(data["type"], ActivityType.DELETE):
                    logger.info(
                        f"received a Delete for an unknown actor {data!r}, drop it"
                    )
                    return Response(status=201)

            if "id" in data:
                if DB.trash.find_one({"activity.id": data["id"]}):
                    # It's already stored in trash, returns early
                    return Response(
                        status=422,
                        headers={"Content-Type": "application/json"},
                        response=json.dumps({
                            "error":
                            "failed to verify request (using HTTP signatures or fetching the IRI)",
                            "request_id": g.request_id,
                        }),
                    )

            # Now we can store this activity in the trash for later analysis

            # Track/store the payload for analysis
            ip, geoip = _get_ip()

            DB.trash.insert({
                "activity": data,
                "meta": {
                    "ts": datetime.now().timestamp(),
                    "ip_address": ip,
                    "geoip": geoip,
                    "tb": traceback.format_exc(),
                    "headers": dict(request.headers),
                    "request_id": g.request_id,
                },
            })

            return Response(
                status=422,
                headers={"Content-Type": "application/json"},
                response=json.dumps({
                    "error":
                    "failed to verify request (using HTTP signatures or fetching the IRI)",
                    "request_id": g.request_id,
                }),
            )

        # We fetched the remote data successfully
        data = remote_data
    try:
        activity = ap.parse_activity(data)
    except ValueError:
        logger.exception(
            "failed to parse activity for req {g.request_id}: {data!r}")

        # Track/store the payload for analysis
        ip, geoip = _get_ip()

        DB.trash.insert({
            "activity": data,
            "meta": {
                "ts": datetime.now().timestamp(),
                "ip_address": ip,
                "geoip": geoip,
                "tb": traceback.format_exc(),
                "headers": dict(request.headers),
                "request_id": g.request_id,
            },
        })

        return Response(status=201)

    logger.debug(f"inbox activity={g.request_id}/{activity}/{data}")

    post_to_inbox(activity)

    return Response(status=201)