Exemple #1
0
def pagespeed_report(_: utils.LambdaEvent) -> List[Dict]:
    """Return report from Google Pagespeed data stored in DynamoDB."""
    data = aws.scan_dynamodb_table(env["PAGESPEED_TABLE"])

    if data["Count"] == 0:
        raise utils.HandledError(message="Unexpected DynamoDB response: empty table",
                                 status_code=500)

    items = [{"url": item['url']['S'],
              "latest_score_value": float(item['latest_score_value']['N']),
              "latest_score_timestamp": item['latest_score_timestamp']['S']}
             for item in data["Items"]]

    utils.Log.debug("Items: %s", items)

    errors = False
    for item in items:
        if not 0.95 < item["latest_score_value"] <= 1:
            item["error"] = True
            errors = True

    if errors:
        raise utils.HandledError(message=items, status_code=400)

    return items
Exemple #2
0
def put_record_to_logstream(event: utils.LambdaEvent) -> str:
    """Put a record of source Lambda execution in LogWatch Logs."""
    log_group_name = env["REPORT_LOG_GROUP_NAME"]

    utils.Log.info("Fetching requestPayload and responsePayload")
    req, res = event["requestPayload"], event["responsePayload"]

    utils.Log.info("Fetching requestPayload content")
    sns_payload = req["Records"][0]["Sns"]

    message_id = sns_payload["MessageId"]
    message = json.loads(sns_payload["Message"])
    url, title = message["url"], message["title"]

    try:
        body = json.loads(res["body"])

    except json.JSONDecodeError as error:
        raise utils.HandledError("Failed decoding payload: %s" % error)

    name, timestamp = body["name"], body["timestamp"]

    if res["statusCode"] != 200:
        raise utils.HandledError(
            "Source lambda '%s' failed with status code %d, "
            "ignoring report" % (name, res["statusCode"]))

    return aws.send_event_to_logstream(log_group=log_group_name,
                                       log_stream=name,
                                       message={
                                           "url": url,
                                           "MessageId": message_id,
                                           "title": title,
                                           "timestamp": timestamp,
                                       })
Exemple #3
0
def _validate_backup_keys(keys: List[Dict],
                          first_expected: date,
                          regexp: str = None,
                          tolerance: int = None):
    previous_file_size = 0
    check_day = first_expected

    for iteration, item in enumerate(keys, start=1):
        # Validate name
        if regexp:
            utils.Log.debug("Validating key name %s against regexp %s",
                            item["key"], regexp)
            if not match(regexp, item["key"]):
                raise utils.HandledError("Key %s doesn't match regexp %s" %
                                         (item["key"], regexp))

        # Validate LastModified timestamp
        utils.Log.debug("Validating LastModified value for key %s",
                        item["key"])
        test, actual = check_day.isoformat(
        )[:10], item["last_modified"].isoformat()[:10]
        if not test == actual:
            raise utils.HandledError("Wrong key %s: "
                                     "expected date %s, got %s" %
                                     (item["key"], test, actual))

        # Validate file size
        if item["size"] == 0:
            raise utils.HandledError("%s key is empty" % item["key"])

        # Validate file size delta
        if tolerance:

            if iteration > 1:
                size_diff = item["size"] - previous_file_size
                variation = round(size_diff / item["size"] * 100)

                utils.Log.debug(
                    "Found %d bytes variation (%d percent) for key %s "
                    "compared to previous backup", size_diff, variation,
                    item["key"])

                if variation > tolerance:
                    raise utils.HandledError(
                        "Size difference compared to previous backup for key %s "
                        "above threshold of %d percent" %
                        (item["key"], tolerance))

        previous_file_size = item["size"]
        check_day += timedelta(days=1)
Exemple #4
0
def _send_attachment_to_kindle(key: str,
                               bucket: str,
                               item_id: int = None) -> utils.Response:
    utils.Log.info("Send attachment to %s via %s email notification service",
                   env["KINDLE_EMAIL"], env["LAMBDA_NOTIFICATIONS"])

    extension = None
    if key.endswith(".mobi"):
        extension = "mobi"
    if key.endswith(".html"):
        extension = "html"
    if extension not in ["mobi", "html"]:
        raise utils.HandledError(
            message=
            "Invalid document extension: must be either '.mobi' or '.html'",
            status_code=401)

    return aws.invoke_lambda(
        name=env["LAMBDA_NOTIFICATIONS"],
        payload={
            "mail_to": env["KINDLE_EMAIL"],
            "attachments": [{
                # https://www.iana.org/assignments/media-types/
                "ContentType": "application/vnd.amazon.mobi8-ebook" \
                               if extension == "mobi" else "text/html",
                "Key": key,
                "Bucket": bucket,
                "Filename": f"pocket-{item_id}.{extension}" \
                            if item_id else f"{uuid4()}.{extension}",
            }],
        },
        invoke_type="Event")
Exemple #5
0
def create_doc(event: utils.LambdaEvent) -> str:
    """Build clean HTML file from URL source and store it to S3."""
    utils.Log.info("Fetch content from %s", event["url"])
    requests = helpers.import_non_stdlib_module("requests")
    response = requests.get(url=event["url"])

    if not response.status_code == 200:
        raise utils.HandledError("Error downloading %s: "
                                 "HTTP status code %d" %
                                 (event["ur"], response.status_code),
                                 status_code=response.status_code)

    utils.Log.info("Create readability-clean HTML text from %s source",
                   event["url"])
    readability = helpers.import_non_stdlib_module("readability")

    doc = readability.Document(response.text)

    utils.Log.debug("Document title:\n%s", doc.title())
    utils.Log.debug("Document readability-cleaned content:\n%s", doc.summary())

    now = datetime.utcnow()
    file_name = f"pocket-{event['item_id']}" if "item_id" in event else uuid4()
    key_name = now.strftime(f"%Y/%m/%d/{file_name}.html")

    aws.put_object_to_s3_bucket(key=key_name,
                                bucket=env["DOCUMENT_BUCKET"],
                                body=bytes(doc.summary(), encoding="utf-8"))

    file_url = f"s3://{env['DOCUMENT_BUCKET']}/{key_name}"

    utils.Log.info("File %s created successfully", file_url)

    return f"success: {file_url}"
def publish(event: dict) -> str:
    """
    SNS message producer

    If 'message' is an event key, use message as content.
    If 'url' is an event key, ignore 'message' key and scrape web page
    at URL in event["url"] searching for:

    - url
    - title tag content
    - meta description tag content

    Deliver the content to `publish_to_social` SNS topic.

    SNS consumers are supposed to subscribe to the topic and publish the content via
    social medias' public APIs.
    """
    if "url" in event:
        utils.Log.debug("Found 'url' key in client input, ignoring other keys")
        helpers.validate_url(event["url"])
        content = build_message(event["url"], disable=event.get("disable", []))

    else:
        raise utils.HandledError("Missing 'url' key in payload")

    message_id = aws.publish_to_sns_topic(sns_topic=environ["SNS_TOPIC"],
                                          subject="publish_to_social",
                                          content=content).text

    return "messageId '{}' with content scraped " \
           "from source {} delivered successfully".format(message_id, event["url"])
def _get_expected_values(
        bucket_name: str,
        retention_days: int,
        start_day_isoformat: Optional[str] = None) -> Tuple[int, date]:
    """Validate input, return exected backup list size and first backup date."""
    try:
        assert isinstance(bucket_name, str)
    except AssertionError:
        raise utils.HandledError("Invalid bucket_name argument: %s" %
                                 bucket_name,
                                 status_code=500)
    try:
        assert retention_days > 0
    except (TypeError, AssertionError) as error:
        raise utils.HandledError(
            f"Invalid retention_days: expected positive integer, got {error}",
            status_code=500)

    utils.Log.debug("Validate backups in bucket %s", bucket_name)

    if start_day_isoformat is None:
        start_day = TODAY - timedelta(days=retention_days)

    else:
        try:
            start_day = date.fromisoformat(start_day_isoformat)

        except (TypeError, ValueError) as error:
            raise utils.HandledError(
                f"{bucket_name}: Invalid `start_day_isoformat` argument: {error}",
                status_code=500) from error

    if start_day > TODAY:
        raise utils.HandledError(
            f"{bucket_name}: Wrong start day: {start_day} is in the future",
            status_code=500)

    expected_backups = min(retention_days, (TODAY - start_day).days + 1)
    first_expected = start_day if expected_backups < retention_days \
                               else TODAY - timedelta(days=(retention_days - 1))

    utils.Log.debug("%s: Expected %d backup(s) starting from %s", bucket_name,
                    expected_backups, first_expected)

    return expected_backups, first_expected
Exemple #8
0
def contact(event: utils.LambdaEvent) -> str:
    """
    Send event payload to Notifications lambda for delivery.

    Expects these keys in event mapping:

    - source
    - name
    - email
    - description
    """
    lambda_notifications = env["LAMBDA_NOTIFICATIONS"]

    body = event["body"]

    utils.Log.debug("Processing body payload: %s", body)

    try:
        utils.Log.debug("Loading JSON content from body")
        utils.Log.info("json.loads should be safe to use: "
                       "https://stackoverflow.com/a/45483187/2274124")

        msg = """Source: {source}
Name: {name}
Mail: {email}
Desc: {description}
""".format(**json.loads(body))

    except (TypeError, json.JSONDecodeError) as error:
        raise utils.HandledError("JSON body is malformatted: %s" % error)

    except KeyError as error:
        raise utils.HandledError("Missing JSON key: %s" % error)

    utils.Log.debug("### Message content below ###")
    utils.Log.debug(msg)
    utils.Log.debug("#############################")

    return aws.invoke_lambda(
        name=lambda_notifications,
        payload={
            "title": "New /contact submission received",
            "payload": msg,
        }).text
Exemple #9
0
def send(event: utils.LambdaEvent) -> str:
    """Send payload as message to Pushover API."""
    utils.Log.info("Delivering message via Pushover")

    title = event["title"]
    payload = event["payload"]
    token = env["PUSHOVER_TOKEN"]
    user = env["PUSHOVER_USERKEY"]

    for string in token, user:
        try:
            assert len(string) == 30
            assert match(r"[a-z0-9]+", string)
        except AssertionError as error:
            raise utils.HandledError(
                f"Pushover string token malformed: {string}",
                status_code=500) from error

    data = {
        "token": token,
        "user": user,
        "message": payload,
        "title": title,
    }

    resp = helpers.send_http_request(url=PUSHOVER_API_ENDPOINT, data=data).text

    try:
        status = resp["status"]
        assert status == 1
        req_id = resp["request"]
        return "Message sent to Pushover successful (request '%s')" % req_id

    except Exception as error:
        raise utils.HandledError("Unexpected response from Pushover: %s" %
                                 error,
                                 status_code=500)
Exemple #10
0
def _check_bucket_validity(bucket_name: str,
                           retention_days: int,
                           regexp: str = None,
                           start_day_isoformat: str = None,
                           tolerance: int = None) -> str:
    expected_backups, first_expected = _get_expected_values(
        bucket_name, retention_days, start_day_isoformat)

    content = [{
        "last_modified": item["LastModified"],
        "size": item["Size"],
        "key": item["Key"]
    } for item in aws.list_bucket(bucket_name=bucket_name)]

    if len(content) < expected_backups:
        raise utils.HandledError(
            "%s: Invalid backups number. Expected at least %d, got %d" %
            (bucket_name, expected_backups, len(content)))

    elif len(content) > expected_backups:
        diff = len(content) - expected_backups
        if diff > 2:
            raise utils.HandledError(
                "%s: Invalid backups number. Expected at most %d, got %d" %
                (bucket_name, expected_backups + 2, len(content)))

        utils.Log.info("%s: Ignoring oldest %d key(s): %s", bucket_name, diff,
                       content[:diff])
        content = content[diff:]

    _validate_backup_keys(keys=content,
                          first_expected=first_expected,
                          regexp=regexp,
                          tolerance=tolerance)

    return f"{bucket_name}: OK"
Exemple #11
0
def send(event: utils.LambdaEvent) -> str:
    """Send document to Kindle."""
    # When file matches "pocket" format we're processing a Pocket item, so if everything goes
    # as expected we archive it before exiting
    match = search(r'pocket-(\d+)', event["keyName"])
    item_id = None
    if match:
        item_id = int(match.groups()[0])

    response = _send_attachment_to_kindle(key=event["keyName"],
                                          bucket=env["DOCUMENT_SRC_BUCKET"],
                                          item_id=item_id)

    if response.status_code != 200:
        raise utils.HandledError(
            message=
            f"Unexpected response from notifications service: {response.text}",
            status_code=response.status_code)

    if item_id:
        _archive_pocket_item(item_id)
Exemple #12
0
def _message_from_event(event: utils.LambdaEvent,
                        default_from: str = "",
                        default_to: str = "") -> Dict:
    """Return message dictionary compatible with MAILJET_API_ENDPOINT API argument."""
    msg = {
        "From": {},
        "TextPart": event.get("text", "no content"),
        "To": [{}],
        "Cc": [],
        "Bcc": [],
        "CustomID": event.get("custom_id", "api-l3x-in"),
    }

    if "mail_from" in event:
        utils.Log.debug("Found `mail_from` in event, parsing content: %s",
                        event["mail_from"])

    else:
        if not default_from:
            raise utils.HandledError("Missing MAIL_FROM")
        utils.Log.debug("Using default MAIL_FROM address: %s", default_from)
        event["mail_from"] = default_from

    name, email = helpers.parsed_email_address(event["mail_from"])

    if email:
        msg["From"]["Email"] = email
        if name:
            msg["From"]["Name"] = name
    else:
        raise utils.HandledError("Missing MAIL_FROM")

    if "mail_to" in event:
        utils.Log.debug("Found `mail_to` in event, parsing content: %s",
                        event["mail_to"])
    else:
        utils.Log.debug("Using default MAIL_TO address: %s", default_to)
        event["mail_to"] = default_to

    name, email = helpers.parsed_email_address(event["mail_to"])

    if email:
        msg["To"][0]["Email"] = email
        if name:
            msg["To"][0]["Name"] = name
    else:
        raise utils.HandledError("Missing MAIL_TO")

    for src_key, target_key in {"mail_cc": "Cc", "mail_bcc": "Bcc"}.items():

        if src_key in event:
            utils.Log.debug("Found `%s` in event, parsing content: %s",
                            src_key, event[src_key])

            for mail_string in event[src_key]:
                name, email = helpers.parsed_email_address(mail_string)

                if email:
                    utils.Log.debug("Adding %s address: %s", target_key, email)
                    msg[target_key].append({"Email": email})

                    if name:
                        msg[target_key][-1]["Name"] = name

    if "subject" in event:
        msg["Subject"] = event["subject"]

    utils.Log.debug("Message content: %s", msg)
    return msg
Exemple #13
0
def create_epub(event: utils.LambdaEvent) -> str:
    """Build EPUB file from URL source and store it to S3."""
    utils.Log.info("Fetch content from %s", event["url"])
    requests = helpers.import_non_stdlib_module("requests")
    response = requests.get(url=event["url"])

    if not response.status_code == 200:
        raise utils.HandledError("Error downloading %s: "
                                 "HTTP status code %d" %
                                 (event["ur"], response.status_code),
                                 status_code=response.status_code)

    utils.Log.info("Create Markdown text from %s source", event["url"])
    html2text = helpers.import_non_stdlib_module("html2text")
    markdown_maker = html2text.HTML2Text()
    markdown_maker.ignore_links = True
    markdown = markdown_maker.handle(response.text)
    utils.Log.debug("Markdown content:\n%s", markdown)

    utils.Log.info("Create temporary file to store epub content")
    epub = NamedTemporaryFile(suffix=".epub")
    utils.Log.debug("tempfile created: %s", epub.name)

    try:
        completed = run(["pandoc", "--version"],
                        check=True,
                        capture_output=True,
                        text=True)
        utils.Log.debug(completed.stdout)

        pandoc_cmd = [
            "pandoc",
            "--quiet",
            "--from=markdown",
            "--to=epub",
            f"--metadata=title:'{event['title']}'",
            f"--output={epub.name}",
        ]
        timeout = 200
        utils.Log.info("Executing %s", join(pandoc_cmd))
        run(pandoc_cmd,
            input=bytes(markdown, encoding="utf-8"),
            check=True,
            timeout=timeout)
        utils.Log.info("EPUB creation completed (%d bytes)",
                       stat(epub.name).st_size)

    except TimeoutExpired:
        raise utils.HandledError(
            "Error: pandoc execution exceeded timeout of %d seconds" % timeout,
            status_code=500)

    except CalledProcessError as error:
        raise utils.HandledError("Error: %s" % error,
                                 status_code=500) from error

    now = datetime.utcnow()
    file_name = f"pocket-{event['item_id']}" if "item_id" in event else uuid4()
    key_name = now.strftime(f"%Y/%m/%d/{file_name}.epub")

    aws.put_object_to_s3_bucket(key=key_name,
                                bucket=env["EPUB_BUCKET"],
                                body=epub)

    file_url = f"s3://{env['EPUB_BUCKET']}/{key_name}"

    utils.Log.info("File %s created successfully", file_url)

    return f"success: {file_url}"