def fetch_iri(self, iri: str) -> ap.ObjectType: if iri == ME["id"]: return ME if iri in ACTORS_CACHE: logger.info(f"{iri} found in cache") return ACTORS_CACHE[iri] # data = DB.actors.find_one({"remote_id": iri}) # if data: # if ap._has_type(data["type"], ap.ACTOR_TYPES): # logger.info(f"{iri} found in DB cache") # ACTORS_CACHE[iri] = data["data"] # return data["data"] data = self._fetch_iri(iri) logger.debug(f"_fetch_iri({iri!r}) == {data!r}") if ap._has_type(data["type"], ap.ACTOR_TYPES): logger.debug(f"caching actor {iri}") # Cache the actor DB.actors.update_one( {"remote_id": iri}, {"$set": { "remote_id": iri, "data": data }}, upsert=True, ) ACTORS_CACHE[iri] = data return data
def _add_answers_to_question(raw_doc: Dict[str, Any]) -> None: activity = raw_doc["activity"] if (ap._has_type(activity["type"], ap.ActivityType.CREATE) and "object" in activity and ap._has_type( activity["object"]["type"], ap.ActivityType.QUESTION)): for choice in activity["object"].get("oneOf", activity["object"].get("anyOf")): choice["replies"] = { "type": ap.ActivityType.COLLECTION.value, "totalItems": raw_doc["meta"].get("question_answers", {}).get(_answer_key(choice["name"]), 0), } now = datetime.now(timezone.utc) if format_datetime(now) >= activity["object"]["endTime"]: activity["object"]["closed"] = activity["object"]["endTime"]
def add_extra_collection(raw_doc: Dict[str, Any]) -> Dict[str, Any]: if not ap._has_type(raw_doc["activity"]["type"], ap.ActivityType.CREATE.value): return raw_doc raw_doc["activity"]["object"]["replies"] = embed_collection( raw_doc.get("meta", {}).get(MetaKey.COUNT_REPLY.value, 0), f'{raw_doc["remote_id"]}/replies', ) raw_doc["activity"]["object"]["likes"] = embed_collection( raw_doc.get("meta", {}).get(MetaKey.COUNT_LIKE.value, 0), f'{raw_doc["remote_id"]}/likes', ) raw_doc["activity"]["object"]["shares"] = embed_collection( raw_doc.get("meta", {}).get(MetaKey.COUNT_BOOST.value, 0), f'{raw_doc["remote_id"]}/shares', ) return raw_doc
def cache_attachment(self, attachment: Dict[str, Any], remote_id: str) -> None: url = attachment["url"] # Ensure it's not already there if self.fs.find_one( {"url": url, "kind": Kind.ATTACHMENT.value, "remote_id": remote_id} ): return # If it's an image, make some thumbnails if ( _is_img(url) or attachment.get("mediaType", "").startswith("image/") or ap._has_type(attachment.get("type"), ap.ActivityType.IMAGE) ): try: i = load(url, self.user_agent) # Save the original attachment (gzipped) with BytesIO() as buf: f1 = GzipFile(mode="wb", fileobj=buf) i.save(f1, format=i.format) f1.close() buf.seek(0) self.fs.put( buf, url=url, size=None, content_type=i.get_format_mimetype(), kind=Kind.ATTACHMENT.value, remote_id=remote_id, ) # Save a thumbnail (gzipped) i.thumbnail((720, 720)) with BytesIO() as buf: with GzipFile(mode="wb", fileobj=buf) as f1: i.save(f1, format=i.format) buf.seek(0) self.fs.put( buf, url=url, size=720, content_type=i.get_format_mimetype(), kind=Kind.ATTACHMENT.value, remote_id=remote_id, ) return except Exception: # FIXME(tsileo): logging pass # The attachment is not an image, download and save it anyway with requests.get( url, stream=True, headers={"User-Agent": self.user_agent} ) as resp: resp.raise_for_status() with BytesIO() as buf: with GzipFile(mode="wb", fileobj=buf) as f1: for chunk in resp.iter_content(chunk_size=2 << 20): if chunk: print(len(chunk)) f1.write(chunk) buf.seek(0) self.fs.put( buf, url=url, size=None, content_type=mimetypes.guess_type(url)[0], kind=Kind.ATTACHMENT.value, remote_id=remote_id, )
def inbox(): # GET /inbox if request.method == "GET": if not is_api_request(): abort(404) try: _api_required() except BadSignature: abort(404) return activitypubify(**activitypub.build_ordered_collection( DB.activities, q={ "meta.deleted": False, "box": Box.INBOX.value }, cursor=request.args.get("cursor"), map_func=lambda doc: remove_context(doc["activity"]), col_name="inbox", )) # POST/ inbox try: data = request.get_json(force=True) if not isinstance(data, dict): raise ValueError("not a dict") except Exception: return Response( status=422, headers={"Content-Type": "application/json"}, response=json.dumps({ "error": "failed to decode request body as JSON", "request_id": g.request_id, }), ) # Check the blacklist now to see if we can return super early if is_blacklisted(data): logger.info(f"dropping activity from blacklisted host: {data['id']}") return Response(status=201) logger.info(f"request_id={g.request_id} req_headers={request.headers!r}") logger.info(f"request_id={g.request_id} raw_data={data}") try: req_verified, actor_id = verify_request(request.method, request.path, request.headers, request.data) if not req_verified: raise Exception("failed to verify request") logger.info(f"request_id={g.request_id} signed by {actor_id}") except Exception: logger.exception( f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote" ) try: remote_data = get_backend().fetch_iri(data["id"]) except ActivityGoneError: # XXX Mastodon sends Delete activities that are not dereferencable, it's the actor url with #delete # appended, so an `ActivityGoneError` kind of ensure it's "legit" if data["type"] == ActivityType.DELETE.value and data[ "id"].startswith(data["object"]): # If we're here, this means the key is not saved, so we cannot verify the object logger.info( f"received a Delete for an unknown actor {data!r}, drop it" ) return Response(status=201) except Exception: logger.exception(f"failed to fetch remote for payload {data!r}") if "type" in data: # Friendica does not returns a 410, but a 302 that redirect to an HTML page if ap._has_type(data["type"], ActivityType.DELETE): logger.info( f"received a Delete for an unknown actor {data!r}, drop it" ) return Response(status=201) if "id" in data: if DB.trash.find_one({"activity.id": data["id"]}): # It's already stored in trash, returns early return Response( status=422, headers={"Content-Type": "application/json"}, response=json.dumps({ "error": "failed to verify request (using HTTP signatures or fetching the IRI)", "request_id": g.request_id, }), ) # Now we can store this activity in the trash for later analysis # Track/store the payload for analysis ip, geoip = _get_ip() DB.trash.insert({ "activity": data, "meta": { "ts": datetime.now().timestamp(), "ip_address": ip, "geoip": geoip, "tb": traceback.format_exc(), "headers": dict(request.headers), "request_id": g.request_id, }, }) return Response( status=422, headers={"Content-Type": "application/json"}, response=json.dumps({ "error": "failed to verify request (using HTTP signatures or fetching the IRI)", "request_id": g.request_id, }), ) # We fetched the remote data successfully data = remote_data try: activity = ap.parse_activity(data) except ValueError: logger.exception( "failed to parse activity for req {g.request_id}: {data!r}") # Track/store the payload for analysis ip, geoip = _get_ip() DB.trash.insert({ "activity": data, "meta": { "ts": datetime.now().timestamp(), "ip_address": ip, "geoip": geoip, "tb": traceback.format_exc(), "headers": dict(request.headers), "request_id": g.request_id, }, }) return Response(status=201) logger.debug(f"inbox activity={g.request_id}/{activity}/{data}") post_to_inbox(activity) return Response(status=201)