def deploy_if_feed_updates(_: utils.LambdaEvent): '''Trigger deploy if there are updates in the feeds.''' netlify_url = f'https://api.netlify.com/build_hooks/{env["NETLIFY_HOOK"]}' table_name = env['DYNAMODB_TABLE'] urls = env['DEPLOYER_FEED_URLS'].replace(' ', '').split(',') utils.Log.info("Checking updates for feeds %s", urls) stored_timestamps = _get_stored_timestamp() def update_feed(feed_url: str) -> bool: '''Check if last feed entry is up to date, return True if updated.''' last_update = _get_last_update(feed_url) # Table might be empty, we treat that case as a miss if feed_url in stored_timestamps: stored_update = datetime.fromisoformat(stored_timestamps[feed_url]) utils.Log.debug( "%s: checking equality between stored(%s) and last(%s)", feed_url, stored_update, last_update) if stored_update == last_update: utils.Log.info('%s: no database update needed', feed_url) return False aws.update_dynamo_item(table_name=table_name, key={"url": { "S": feed_url }}, att_updates={ 'latest_update_timestamp': { 'Value': { 'S': last_update.isoformat() }, 'Action': 'PUT', } }) return True futures = helpers.exec_in_thread_and_wait( (update_feed, (url, )) for url in urls) if any(future.result() for future in futures.done): utils.Log.info("Found updates, triggering Netlify deploy") helpers.send_http_request(url=netlify_url, method="POST", data={}) utils.Log.info("All done")
def scrape_page(url: str) -> dict: """Scrape title and description from webpage at `url`.""" utils.Log.info("Scraping %s in search of title and description", url) output = {"url": url} utils.Log.debug("Fetching content from %s", url) page = helpers.send_http_request(url=url, method="GET").text utils.Log.debug("Parsing content with BeautifulSoup4") bs4 = helpers.import_non_stdlib_module("bs4") soup = bs4.BeautifulSoup(page, "html.parser") try: utils.Log.debug("Searching for categories meta tag") found = soup.find("meta", {"name": "categories"}) categories = found["content"].split(",") if found else [] categories = [cat.strip(' ') for cat in categories if len(cat) > 0] except TypeError as error: utils.Log.warning("Could not find any categories meta tag: %s", error) output.update({ "title": soup.find("title").contents[0], "categories": categories, "description": soup.find("meta", {"name": "description"})["content"], }) utils.Log.debug("Parsing done. Output: %s", output) utils.Log.info("Scraping completed successfully") return output
def deliver_to_mailjet(event: utils.LambdaEvent) -> str: """Send email message via Mailjet APIs. :param event: - may have "mail_from" email address string key (use MAILJET_FROM_ADDRESS from env if not), in the form 'Some Name <*****@*****.**>' ('Some Name' optional) - may have "mail_to" email address string key (use MAILJET_DEFAULT_TO_ADDRESS from env if not) in the form 'Some Name <*****@*****.**>' ('Some Name' optional) - may have "mail_cc" list of optional email CC: email addresses in the form 'Some Name <*****@*****.**>' ('Some Name' optional) - may have "mail_bcc" list of optional email BCC: email addresses in the form 'Some Name <*****@*****.**>' ('Some Name' optional) - may have "custom_id" key (for internal Mailjet use) - may have "subject" key to be used as email subject - may have "text" key to be used as text content - may have "attachments" key (list of attachments dict metadata). E.g: { # https://www.iana.org/assignments/media-types/application/vnd.amazon.mobi8-ebook "ContentType": "application/vnd.amazon.mobi8-ebook", "Key": <s3_key>, "Bucket": <s3_bucket>, "Filename": "article.mobi", } Actual content is downloaded from S3 bucket. Refer to docs for attachment format: https://dev.mailjet.com/email/guides/send-api-v31/#send-with-attached-files API docs: https://dev.mailjet.com/email/guides/send-api-v31/ """ utils.Log.info("Sending email message via %s", MAILJET_API_ENDPOINT) msg = _message_from_event(event=event, default_from=env["MAILJET_FROM_ADDRESS"], default_to=env["MAILJET_DEFAULT_TO_ADDRESS"]) if "attachments" in event: msg["Attachments"] = [] utils.Log.debug("Adding %d attachments", len(event["attachments"])) for att in event["attachments"]: _add_content_to_attachment(att) utils.Log.debug("Adding %s, content-type %s", att["Filename"], att["ContentType"]) msg["Attachments"].append(att) return helpers.send_http_request( url=MAILJET_API_ENDPOINT, data=bytes(json.dumps({"Messages": [msg]}), encoding="utf-8"), headers={ "Content-Type": "application/json" }, auth={ "user": env["MAILJET_API_KEY"], "pass": env["MAILJET_API_SECRET"] }, ).text
def retrieve() -> Tuple: """Docs: https://getpocket.com/developer/docs/v3/retrieve""" articles = tuple() data = { "access_token": env["POCKET_SECRET_TOKEN"], "consumer_key": env["POCKET_CONSUMER_KEY"], "detailType": "complete", } utils.Log.info("Fetch 'since' from storage") result = aws.read_log_stream(log_group=env["SINCE_LOG_GROUP"], log_stream=env["SINCE_LOG_GROUP"]) if result: last_event = result[-1] utils.Log.debug("Deserializing CloudWatch Logs message content") message = json.loads(last_event["message"]) data["since"] = message["since"] utils.Log.debug("Found 'since': %d", data["since"]) utils.Log.info("Retrieve new articles from getpocket.com APIs") response = helpers.send_http_request( url=POCKET_API_ENDPOINT, data=data, ).text since = response["since"] pocket_items = response["list"] if pocket_items: articles = tuple( filter( _validate_article, ({ "item_id": item_id, "title": item["resolved_title"], "url": item["resolved_url"], "tags": item.get("tags", {}), } for item_id, item in pocket_items.items() if int(item["status"]) == 0 ))) ## status 0: we filter out archived/to-be-deleted items return articles, since
def _archive_pocket_item(item_id: int): """https://getpocket.com/developer/docs/v3/modify#action_archive""" utils.Log.info("Archive Pocket item %d", item_id) return helpers.send_http_request( url=POCKET_API_ENDPOINT, data={ "access_token": env["POCKET_SECRET_TOKEN"], "consumer_key": env["POCKET_CONSUMER_KEY"], "actions": json.dumps([{ "action": "archive", # NOTE: API docs are inconsistent, they say `item_id` is an integer # but examples show string usage "item_id": str(item_id), }]) })
def _domain_is_expiring(domain: str, days: int = DEFAULT_EXPIRY_DAYS ) -> Tuple[str, bool, str]: now = datetime.utcnow() delta = timedelta(days=days) api_key = env["WHOISXMLAPI_KEY"] url = f"{WHOISXMLAPI_ENDPOINT}?apiKey={api_key}&domainName={domain}&outputFormat=JSON" response = helpers.send_http_request( url=url, method="GET", ) response = json.loads(response.body) expires_date_str = response["message"]["WhoisRecord"]["registryData"][ "expiresDate"] expires_date = datetime.fromisoformat(expires_date_str.rstrip("Z")) return domain, (expires_date - now) < delta, expires_date_str
def send(event: utils.LambdaEvent) -> str: """Send payload as message to Pushover API.""" utils.Log.info("Delivering message via Pushover") title = event["title"] payload = event["payload"] token = env["PUSHOVER_TOKEN"] user = env["PUSHOVER_USERKEY"] for string in token, user: try: assert len(string) == 30 assert match(r"[a-z0-9]+", string) except AssertionError as error: raise utils.HandledError( f"Pushover string token malformed: {string}", status_code=500) from error data = { "token": token, "user": user, "message": payload, "title": title, } resp = helpers.send_http_request(url=PUSHOVER_API_ENDPOINT, data=data).text try: status = resp["status"] assert status == 1 req_id = resp["request"] return "Message sent to Pushover successful (request '%s')" % req_id except Exception as error: raise utils.HandledError("Unexpected response from Pushover: %s" % error, status_code=500)