def save_website(url, file_name): """Takes a screenshot with puppeteer and saves it as a PDF""" try: LOGGER.info("puppeteer print for {}".format(url)) target_path = config.get_config()['pdf_target_path'] os.makedirs(target_path, exist_ok=True) file_path = os.path.join(target_path, file_name) LOGGER.info("generating PDF file with name {}".format(file_name)) tmplt = config.get_config()['puppeteer_template'] cmd = tmplt.format(url, str(file_path)).split(' ') LOGGER.info("calling puppeteer script as -- {}".format(cmd)) return_code = call(cmd) return return_code except Exception as e: LOGGER.error(e)
def save_pdf(html, file_name): """Save Markdown variant as PDF""" try: target_path = config.get_config()['pdf_target_path'] os.makedirs(target_path, exist_ok=True) file_path = os.path.join(target_path, file_name) LOGGER.info("generating PDF file with name {}".format(file_name)) pdfkit.from_string(html, file_name) except Exception as e: LOGGER.error(e)
def save_html(html: str, file_name: str): """Saves given html as such, directly as html file""" try: target_path = config.get_config()['html_target_path'] os.makedirs(target_path, exist_ok=True) file_path = os.path.join(target_path, file_name) LOGGER.info("saving html file with name {}".format(file_name)) with open(file_path, mode='w') as f: f.write(html) except Exception as e: LOGGER.error(e)
def save_pdf_directly(response): target_path = config.get_config()['pdf_target_path'] os.makedirs(target_path, exist_ok=True) title = re.sub("[\W]", "", response.url.strip()) file_name = get_fn_from_header(response) if file_name is None: file_name = "{}.pdf".format(title) # making it host specific host = urlparse(response.url).netloc file_name = host + file_name file_path = os.path.join(target_path, file_name) if os.path.exists(file_path): return else: print('Saving PDF {}'.format(file_path)) with open(file_path, 'wb') as f: f.write(response.body)
def try_parse_url(url): try: req_url = url_template.format(parse.quote_plus(url)) LOGGER.info("mercury outgoing request -- {}".format(req_url)) req = request.Request(req_url) req.add_header('x-api-key', config.get_config()['mercury']) req.add_header('Content-Type', 'application/json') response = request.urlopen(req) raw_content = response.read() LOGGER.info("mercury parsing complete for {}".format(url)) json_string = raw_content.decode('utf-8') summary = json.JSONDecoder().decode(json_string) return summary except Exception as e: LOGGER.error(e) LOGGER.warning("might have reached a timeout. waiting a bit") LOGGER.warning(url) time.sleep(120) return None
from components import telegram_conn, mercury, website, pdf_maker, config import json import logging #configure logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') def startup(): telegram_conn.start_polling() #blocks until disconnect if __name__ == "__main__": telegram_conn.init(config.get_config()['telegram']) startup()
from urllib import request, parse import json import logging from components import config, website, mercury LOGGER = logging.getLogger(__name__) consumer_key = config.get_config()['pocket'] urls = { "auth_post": "https://getpocket.com/v3/oauth/request", "get_articles": "https://getpocket.com/v3/get", "get_access_token": "https://getpocket.com/v3/oauth/authorize" } headers = { "Content-Type": "application/x-www-form-urlencoded; charset=UTF8", "X-Accept": "application/json" } def get_request_token() -> str: params = [('consumer_key', consumer_key), ('redirect_uri', 'https://pascalbrokmeier.de')] params_bytes = parse.urlencode(params).encode() req = request.Request(urls['auth_post'], method="POST", data=params_bytes, headers=headers) json_string = request.urlopen(req).read().decode('utf-8') return json.JSONDecoder().decode(json_string)['code']
import json import os from datetime import date from components import config web_root = config.get_config()['website_root'] import logging LOGGER = logging.getLogger(__name__) def add_json_summary(summary): """takes a json summary from mercury and adds it to the websites list of read things """ summary = process_summary(summary) if summary is None: return _add_summary(summary) def process_summary(summary: dict): """TODO: Docstring for process_summary. :summary: dict: TODO :returns: TODO """ try: del summary['content'] summary['date_read'] = date.today().isoformat() except Exception as e: LOGGER.error(e) LOGGER.error(summary)