def pagespeed_report(_: utils.LambdaEvent) -> List[Dict]: """Return report from Google Pagespeed data stored in DynamoDB.""" data = aws.scan_dynamodb_table(env["PAGESPEED_TABLE"]) if data["Count"] == 0: raise utils.HandledError(message="Unexpected DynamoDB response: empty table", status_code=500) items = [{"url": item['url']['S'], "latest_score_value": float(item['latest_score_value']['N']), "latest_score_timestamp": item['latest_score_timestamp']['S']} for item in data["Items"]] utils.Log.debug("Items: %s", items) errors = False for item in items: if not 0.95 < item["latest_score_value"] <= 1: item["error"] = True errors = True if errors: raise utils.HandledError(message=items, status_code=400) return items
def put_record_to_logstream(event: utils.LambdaEvent) -> str: """Put a record of source Lambda execution in LogWatch Logs.""" log_group_name = env["REPORT_LOG_GROUP_NAME"] utils.Log.info("Fetching requestPayload and responsePayload") req, res = event["requestPayload"], event["responsePayload"] utils.Log.info("Fetching requestPayload content") sns_payload = req["Records"][0]["Sns"] message_id = sns_payload["MessageId"] message = json.loads(sns_payload["Message"]) url, title = message["url"], message["title"] try: body = json.loads(res["body"]) except json.JSONDecodeError as error: raise utils.HandledError("Failed decoding payload: %s" % error) name, timestamp = body["name"], body["timestamp"] if res["statusCode"] != 200: raise utils.HandledError( "Source lambda '%s' failed with status code %d, " "ignoring report" % (name, res["statusCode"])) return aws.send_event_to_logstream(log_group=log_group_name, log_stream=name, message={ "url": url, "MessageId": message_id, "title": title, "timestamp": timestamp, })
def _validate_backup_keys(keys: List[Dict], first_expected: date, regexp: str = None, tolerance: int = None): previous_file_size = 0 check_day = first_expected for iteration, item in enumerate(keys, start=1): # Validate name if regexp: utils.Log.debug("Validating key name %s against regexp %s", item["key"], regexp) if not match(regexp, item["key"]): raise utils.HandledError("Key %s doesn't match regexp %s" % (item["key"], regexp)) # Validate LastModified timestamp utils.Log.debug("Validating LastModified value for key %s", item["key"]) test, actual = check_day.isoformat( )[:10], item["last_modified"].isoformat()[:10] if not test == actual: raise utils.HandledError("Wrong key %s: " "expected date %s, got %s" % (item["key"], test, actual)) # Validate file size if item["size"] == 0: raise utils.HandledError("%s key is empty" % item["key"]) # Validate file size delta if tolerance: if iteration > 1: size_diff = item["size"] - previous_file_size variation = round(size_diff / item["size"] * 100) utils.Log.debug( "Found %d bytes variation (%d percent) for key %s " "compared to previous backup", size_diff, variation, item["key"]) if variation > tolerance: raise utils.HandledError( "Size difference compared to previous backup for key %s " "above threshold of %d percent" % (item["key"], tolerance)) previous_file_size = item["size"] check_day += timedelta(days=1)
def _send_attachment_to_kindle(key: str, bucket: str, item_id: int = None) -> utils.Response: utils.Log.info("Send attachment to %s via %s email notification service", env["KINDLE_EMAIL"], env["LAMBDA_NOTIFICATIONS"]) extension = None if key.endswith(".mobi"): extension = "mobi" if key.endswith(".html"): extension = "html" if extension not in ["mobi", "html"]: raise utils.HandledError( message= "Invalid document extension: must be either '.mobi' or '.html'", status_code=401) return aws.invoke_lambda( name=env["LAMBDA_NOTIFICATIONS"], payload={ "mail_to": env["KINDLE_EMAIL"], "attachments": [{ # https://www.iana.org/assignments/media-types/ "ContentType": "application/vnd.amazon.mobi8-ebook" \ if extension == "mobi" else "text/html", "Key": key, "Bucket": bucket, "Filename": f"pocket-{item_id}.{extension}" \ if item_id else f"{uuid4()}.{extension}", }], }, invoke_type="Event")
def create_doc(event: utils.LambdaEvent) -> str: """Build clean HTML file from URL source and store it to S3.""" utils.Log.info("Fetch content from %s", event["url"]) requests = helpers.import_non_stdlib_module("requests") response = requests.get(url=event["url"]) if not response.status_code == 200: raise utils.HandledError("Error downloading %s: " "HTTP status code %d" % (event["ur"], response.status_code), status_code=response.status_code) utils.Log.info("Create readability-clean HTML text from %s source", event["url"]) readability = helpers.import_non_stdlib_module("readability") doc = readability.Document(response.text) utils.Log.debug("Document title:\n%s", doc.title()) utils.Log.debug("Document readability-cleaned content:\n%s", doc.summary()) now = datetime.utcnow() file_name = f"pocket-{event['item_id']}" if "item_id" in event else uuid4() key_name = now.strftime(f"%Y/%m/%d/{file_name}.html") aws.put_object_to_s3_bucket(key=key_name, bucket=env["DOCUMENT_BUCKET"], body=bytes(doc.summary(), encoding="utf-8")) file_url = f"s3://{env['DOCUMENT_BUCKET']}/{key_name}" utils.Log.info("File %s created successfully", file_url) return f"success: {file_url}"
def publish(event: dict) -> str: """ SNS message producer If 'message' is an event key, use message as content. If 'url' is an event key, ignore 'message' key and scrape web page at URL in event["url"] searching for: - url - title tag content - meta description tag content Deliver the content to `publish_to_social` SNS topic. SNS consumers are supposed to subscribe to the topic and publish the content via social medias' public APIs. """ if "url" in event: utils.Log.debug("Found 'url' key in client input, ignoring other keys") helpers.validate_url(event["url"]) content = build_message(event["url"], disable=event.get("disable", [])) else: raise utils.HandledError("Missing 'url' key in payload") message_id = aws.publish_to_sns_topic(sns_topic=environ["SNS_TOPIC"], subject="publish_to_social", content=content).text return "messageId '{}' with content scraped " \ "from source {} delivered successfully".format(message_id, event["url"])
def _get_expected_values( bucket_name: str, retention_days: int, start_day_isoformat: Optional[str] = None) -> Tuple[int, date]: """Validate input, return exected backup list size and first backup date.""" try: assert isinstance(bucket_name, str) except AssertionError: raise utils.HandledError("Invalid bucket_name argument: %s" % bucket_name, status_code=500) try: assert retention_days > 0 except (TypeError, AssertionError) as error: raise utils.HandledError( f"Invalid retention_days: expected positive integer, got {error}", status_code=500) utils.Log.debug("Validate backups in bucket %s", bucket_name) if start_day_isoformat is None: start_day = TODAY - timedelta(days=retention_days) else: try: start_day = date.fromisoformat(start_day_isoformat) except (TypeError, ValueError) as error: raise utils.HandledError( f"{bucket_name}: Invalid `start_day_isoformat` argument: {error}", status_code=500) from error if start_day > TODAY: raise utils.HandledError( f"{bucket_name}: Wrong start day: {start_day} is in the future", status_code=500) expected_backups = min(retention_days, (TODAY - start_day).days + 1) first_expected = start_day if expected_backups < retention_days \ else TODAY - timedelta(days=(retention_days - 1)) utils.Log.debug("%s: Expected %d backup(s) starting from %s", bucket_name, expected_backups, first_expected) return expected_backups, first_expected
def contact(event: utils.LambdaEvent) -> str: """ Send event payload to Notifications lambda for delivery. Expects these keys in event mapping: - source - name - email - description """ lambda_notifications = env["LAMBDA_NOTIFICATIONS"] body = event["body"] utils.Log.debug("Processing body payload: %s", body) try: utils.Log.debug("Loading JSON content from body") utils.Log.info("json.loads should be safe to use: " "https://stackoverflow.com/a/45483187/2274124") msg = """Source: {source} Name: {name} Mail: {email} Desc: {description} """.format(**json.loads(body)) except (TypeError, json.JSONDecodeError) as error: raise utils.HandledError("JSON body is malformatted: %s" % error) except KeyError as error: raise utils.HandledError("Missing JSON key: %s" % error) utils.Log.debug("### Message content below ###") utils.Log.debug(msg) utils.Log.debug("#############################") return aws.invoke_lambda( name=lambda_notifications, payload={ "title": "New /contact submission received", "payload": msg, }).text
def send(event: utils.LambdaEvent) -> str: """Send payload as message to Pushover API.""" utils.Log.info("Delivering message via Pushover") title = event["title"] payload = event["payload"] token = env["PUSHOVER_TOKEN"] user = env["PUSHOVER_USERKEY"] for string in token, user: try: assert len(string) == 30 assert match(r"[a-z0-9]+", string) except AssertionError as error: raise utils.HandledError( f"Pushover string token malformed: {string}", status_code=500) from error data = { "token": token, "user": user, "message": payload, "title": title, } resp = helpers.send_http_request(url=PUSHOVER_API_ENDPOINT, data=data).text try: status = resp["status"] assert status == 1 req_id = resp["request"] return "Message sent to Pushover successful (request '%s')" % req_id except Exception as error: raise utils.HandledError("Unexpected response from Pushover: %s" % error, status_code=500)
def _check_bucket_validity(bucket_name: str, retention_days: int, regexp: str = None, start_day_isoformat: str = None, tolerance: int = None) -> str: expected_backups, first_expected = _get_expected_values( bucket_name, retention_days, start_day_isoformat) content = [{ "last_modified": item["LastModified"], "size": item["Size"], "key": item["Key"] } for item in aws.list_bucket(bucket_name=bucket_name)] if len(content) < expected_backups: raise utils.HandledError( "%s: Invalid backups number. Expected at least %d, got %d" % (bucket_name, expected_backups, len(content))) elif len(content) > expected_backups: diff = len(content) - expected_backups if diff > 2: raise utils.HandledError( "%s: Invalid backups number. Expected at most %d, got %d" % (bucket_name, expected_backups + 2, len(content))) utils.Log.info("%s: Ignoring oldest %d key(s): %s", bucket_name, diff, content[:diff]) content = content[diff:] _validate_backup_keys(keys=content, first_expected=first_expected, regexp=regexp, tolerance=tolerance) return f"{bucket_name}: OK"
def send(event: utils.LambdaEvent) -> str: """Send document to Kindle.""" # When file matches "pocket" format we're processing a Pocket item, so if everything goes # as expected we archive it before exiting match = search(r'pocket-(\d+)', event["keyName"]) item_id = None if match: item_id = int(match.groups()[0]) response = _send_attachment_to_kindle(key=event["keyName"], bucket=env["DOCUMENT_SRC_BUCKET"], item_id=item_id) if response.status_code != 200: raise utils.HandledError( message= f"Unexpected response from notifications service: {response.text}", status_code=response.status_code) if item_id: _archive_pocket_item(item_id)
def _message_from_event(event: utils.LambdaEvent, default_from: str = "", default_to: str = "") -> Dict: """Return message dictionary compatible with MAILJET_API_ENDPOINT API argument.""" msg = { "From": {}, "TextPart": event.get("text", "no content"), "To": [{}], "Cc": [], "Bcc": [], "CustomID": event.get("custom_id", "api-l3x-in"), } if "mail_from" in event: utils.Log.debug("Found `mail_from` in event, parsing content: %s", event["mail_from"]) else: if not default_from: raise utils.HandledError("Missing MAIL_FROM") utils.Log.debug("Using default MAIL_FROM address: %s", default_from) event["mail_from"] = default_from name, email = helpers.parsed_email_address(event["mail_from"]) if email: msg["From"]["Email"] = email if name: msg["From"]["Name"] = name else: raise utils.HandledError("Missing MAIL_FROM") if "mail_to" in event: utils.Log.debug("Found `mail_to` in event, parsing content: %s", event["mail_to"]) else: utils.Log.debug("Using default MAIL_TO address: %s", default_to) event["mail_to"] = default_to name, email = helpers.parsed_email_address(event["mail_to"]) if email: msg["To"][0]["Email"] = email if name: msg["To"][0]["Name"] = name else: raise utils.HandledError("Missing MAIL_TO") for src_key, target_key in {"mail_cc": "Cc", "mail_bcc": "Bcc"}.items(): if src_key in event: utils.Log.debug("Found `%s` in event, parsing content: %s", src_key, event[src_key]) for mail_string in event[src_key]: name, email = helpers.parsed_email_address(mail_string) if email: utils.Log.debug("Adding %s address: %s", target_key, email) msg[target_key].append({"Email": email}) if name: msg[target_key][-1]["Name"] = name if "subject" in event: msg["Subject"] = event["subject"] utils.Log.debug("Message content: %s", msg) return msg
def create_epub(event: utils.LambdaEvent) -> str: """Build EPUB file from URL source and store it to S3.""" utils.Log.info("Fetch content from %s", event["url"]) requests = helpers.import_non_stdlib_module("requests") response = requests.get(url=event["url"]) if not response.status_code == 200: raise utils.HandledError("Error downloading %s: " "HTTP status code %d" % (event["ur"], response.status_code), status_code=response.status_code) utils.Log.info("Create Markdown text from %s source", event["url"]) html2text = helpers.import_non_stdlib_module("html2text") markdown_maker = html2text.HTML2Text() markdown_maker.ignore_links = True markdown = markdown_maker.handle(response.text) utils.Log.debug("Markdown content:\n%s", markdown) utils.Log.info("Create temporary file to store epub content") epub = NamedTemporaryFile(suffix=".epub") utils.Log.debug("tempfile created: %s", epub.name) try: completed = run(["pandoc", "--version"], check=True, capture_output=True, text=True) utils.Log.debug(completed.stdout) pandoc_cmd = [ "pandoc", "--quiet", "--from=markdown", "--to=epub", f"--metadata=title:'{event['title']}'", f"--output={epub.name}", ] timeout = 200 utils.Log.info("Executing %s", join(pandoc_cmd)) run(pandoc_cmd, input=bytes(markdown, encoding="utf-8"), check=True, timeout=timeout) utils.Log.info("EPUB creation completed (%d bytes)", stat(epub.name).st_size) except TimeoutExpired: raise utils.HandledError( "Error: pandoc execution exceeded timeout of %d seconds" % timeout, status_code=500) except CalledProcessError as error: raise utils.HandledError("Error: %s" % error, status_code=500) from error now = datetime.utcnow() file_name = f"pocket-{event['item_id']}" if "item_id" in event else uuid4() key_name = now.strftime(f"%Y/%m/%d/{file_name}.epub") aws.put_object_to_s3_bucket(key=key_name, bucket=env["EPUB_BUCKET"], body=epub) file_url = f"s3://{env['EPUB_BUCKET']}/{key_name}" utils.Log.info("File %s created successfully", file_url) return f"success: {file_url}"