Ejemplo n.º 1
0
def deploy_if_feed_updates(_: utils.LambdaEvent):
    '''Trigger deploy if there are updates in the feeds.'''
    netlify_url = f'https://api.netlify.com/build_hooks/{env["NETLIFY_HOOK"]}'
    table_name = env['DYNAMODB_TABLE']
    urls = env['DEPLOYER_FEED_URLS'].replace(' ', '').split(',')

    utils.Log.info("Checking updates for feeds %s", urls)

    stored_timestamps = _get_stored_timestamp()

    def update_feed(feed_url: str) -> bool:
        '''Check if last feed entry is up to date, return True if updated.'''
        last_update = _get_last_update(feed_url)

        # Table might be empty, we treat that case as a miss
        if feed_url in stored_timestamps:
            stored_update = datetime.fromisoformat(stored_timestamps[feed_url])

            utils.Log.debug(
                "%s: checking equality between stored(%s) and last(%s)",
                feed_url, stored_update, last_update)
            if stored_update == last_update:
                utils.Log.info('%s: no database update needed', feed_url)
                return False

        aws.update_dynamo_item(table_name=table_name,
                               key={"url": {
                                   "S": feed_url
                               }},
                               att_updates={
                                   'latest_update_timestamp': {
                                       'Value': {
                                           'S': last_update.isoformat()
                                       },
                                       'Action': 'PUT',
                                   }
                               })

        return True

    futures = helpers.exec_in_thread_and_wait(
        (update_feed, (url, )) for url in urls)

    if any(future.result() for future in futures.done):
        utils.Log.info("Found updates, triggering Netlify deploy")
        helpers.send_http_request(url=netlify_url, method="POST", data={})

    utils.Log.info("All done")
Ejemplo n.º 2
0
def scrape_page(url: str) -> dict:
    """Scrape title and description from webpage at `url`."""
    utils.Log.info("Scraping %s in search of title and description", url)
    output = {"url": url}

    utils.Log.debug("Fetching content from %s", url)
    page = helpers.send_http_request(url=url, method="GET").text

    utils.Log.debug("Parsing content with BeautifulSoup4")
    bs4 = helpers.import_non_stdlib_module("bs4")
    soup = bs4.BeautifulSoup(page, "html.parser")

    try:
        utils.Log.debug("Searching for categories meta tag")
        found = soup.find("meta", {"name": "categories"})
        categories = found["content"].split(",") if found else []
        categories = [cat.strip(' ') for cat in categories if len(cat) > 0]

    except TypeError as error:
        utils.Log.warning("Could not find any categories meta tag: %s", error)

    output.update({
        "title":
        soup.find("title").contents[0],
        "categories":
        categories,
        "description":
        soup.find("meta", {"name": "description"})["content"],
    })

    utils.Log.debug("Parsing done. Output: %s", output)
    utils.Log.info("Scraping completed successfully")

    return output
Ejemplo n.º 3
0
def deliver_to_mailjet(event: utils.LambdaEvent) -> str:
    """Send email message via Mailjet APIs.

    :param event:
      - may have "mail_from" email address string key (use MAILJET_FROM_ADDRESS from env if not),
        in the form 'Some Name <*****@*****.**>' ('Some Name' optional)
      - may have "mail_to" email address string key (use MAILJET_DEFAULT_TO_ADDRESS from env if not)
        in the form 'Some Name <*****@*****.**>' ('Some Name' optional)
      - may have "mail_cc" list of optional email CC: email addresses in the form
        'Some Name <*****@*****.**>' ('Some Name' optional)
      - may have "mail_bcc" list of optional email BCC: email addresses in the form
        'Some Name <*****@*****.**>' ('Some Name' optional)
      - may have "custom_id" key (for internal Mailjet use)
      - may have "subject" key to be used as email subject
      - may have "text" key to be used as text content
      - may have "attachments" key (list of attachments dict metadata). E.g:
        {
            # https://www.iana.org/assignments/media-types/application/vnd.amazon.mobi8-ebook
            "ContentType": "application/vnd.amazon.mobi8-ebook",
            "Key": <s3_key>,
            "Bucket": <s3_bucket>,
            "Filename": "article.mobi",
        }
        Actual content is downloaded from S3 bucket.

        Refer to docs for attachment format:
        https://dev.mailjet.com/email/guides/send-api-v31/#send-with-attached-files

    API docs: https://dev.mailjet.com/email/guides/send-api-v31/
    """
    utils.Log.info("Sending email message via %s", MAILJET_API_ENDPOINT)

    msg = _message_from_event(event=event,
                              default_from=env["MAILJET_FROM_ADDRESS"],
                              default_to=env["MAILJET_DEFAULT_TO_ADDRESS"])

    if "attachments" in event:
        msg["Attachments"] = []

        utils.Log.debug("Adding %d attachments", len(event["attachments"]))
        for att in event["attachments"]:
            _add_content_to_attachment(att)
            utils.Log.debug("Adding %s, content-type %s", att["Filename"],
                            att["ContentType"])
            msg["Attachments"].append(att)

    return helpers.send_http_request(
        url=MAILJET_API_ENDPOINT,
        data=bytes(json.dumps({"Messages": [msg]}), encoding="utf-8"),
        headers={
            "Content-Type": "application/json"
        },
        auth={
            "user": env["MAILJET_API_KEY"],
            "pass": env["MAILJET_API_SECRET"]
        },
    ).text
Ejemplo n.º 4
0
def retrieve() -> Tuple:
    """Docs: https://getpocket.com/developer/docs/v3/retrieve"""
    articles = tuple()
    data = {
        "access_token": env["POCKET_SECRET_TOKEN"],
        "consumer_key": env["POCKET_CONSUMER_KEY"],
        "detailType": "complete",
    }

    utils.Log.info("Fetch 'since' from storage")
    result = aws.read_log_stream(log_group=env["SINCE_LOG_GROUP"],
                                 log_stream=env["SINCE_LOG_GROUP"])

    if result:
        last_event = result[-1]

        utils.Log.debug("Deserializing CloudWatch Logs message content")
        message = json.loads(last_event["message"])

        data["since"] = message["since"]
        utils.Log.debug("Found 'since': %d", data["since"])

    utils.Log.info("Retrieve new articles from getpocket.com APIs")
    response = helpers.send_http_request(
        url=POCKET_API_ENDPOINT,
        data=data,
    ).text

    since = response["since"]
    pocket_items = response["list"]

    if pocket_items:
        articles = tuple(
            filter(
                _validate_article,
                ({
                    "item_id": item_id,
                    "title": item["resolved_title"],
                    "url": item["resolved_url"],
                    "tags": item.get("tags", {}),
                } for item_id, item in pocket_items.items()
                 if int(item["status"]) == 0
                 )))  ## status 0: we filter out archived/to-be-deleted items

    return articles, since
Ejemplo n.º 5
0
def _archive_pocket_item(item_id: int):
    """https://getpocket.com/developer/docs/v3/modify#action_archive"""
    utils.Log.info("Archive Pocket item %d", item_id)

    return helpers.send_http_request(
        url=POCKET_API_ENDPOINT,
        data={
            "access_token":
            env["POCKET_SECRET_TOKEN"],
            "consumer_key":
            env["POCKET_CONSUMER_KEY"],
            "actions":
            json.dumps([{
                "action": "archive",
                # NOTE: API docs are inconsistent, they say `item_id` is an integer
                # but examples show string usage
                "item_id": str(item_id),
            }])
        })
Ejemplo n.º 6
0
def _domain_is_expiring(domain: str,
                        days: int = DEFAULT_EXPIRY_DAYS
                        ) -> Tuple[str, bool, str]:
    now = datetime.utcnow()
    delta = timedelta(days=days)

    api_key = env["WHOISXMLAPI_KEY"]
    url = f"{WHOISXMLAPI_ENDPOINT}?apiKey={api_key}&domainName={domain}&outputFormat=JSON"

    response = helpers.send_http_request(
        url=url,
        method="GET",
    )
    response = json.loads(response.body)
    expires_date_str = response["message"]["WhoisRecord"]["registryData"][
        "expiresDate"]
    expires_date = datetime.fromisoformat(expires_date_str.rstrip("Z"))

    return domain, (expires_date - now) < delta, expires_date_str
Ejemplo n.º 7
0
def send(event: utils.LambdaEvent) -> str:
    """Send payload as message to Pushover API."""
    utils.Log.info("Delivering message via Pushover")

    title = event["title"]
    payload = event["payload"]
    token = env["PUSHOVER_TOKEN"]
    user = env["PUSHOVER_USERKEY"]

    for string in token, user:
        try:
            assert len(string) == 30
            assert match(r"[a-z0-9]+", string)
        except AssertionError as error:
            raise utils.HandledError(
                f"Pushover string token malformed: {string}",
                status_code=500) from error

    data = {
        "token": token,
        "user": user,
        "message": payload,
        "title": title,
    }

    resp = helpers.send_http_request(url=PUSHOVER_API_ENDPOINT, data=data).text

    try:
        status = resp["status"]
        assert status == 1
        req_id = resp["request"]
        return "Message sent to Pushover successful (request '%s')" % req_id

    except Exception as error:
        raise utils.HandledError("Unexpected response from Pushover: %s" %
                                 error,
                                 status_code=500)