def fetch_sites_metadata( self, opml_filename: str ) -> List[Tuple[str, Optional[str], Optional[str]]]: opml_filepath = os.path.join(self.settings.base_output_path, opml_filename) def site_outline(element: Any) -> bool: return all([ element.name == "outline", element.has_attr("xmlUrl"), element.has_attr("type"), element.get("type", "") == "rss", ]) def site_category(element: Any) -> Optional[str]: parent = element.findParent() if parent.name == "outline" and parent.has_attr("title"): return parent.get("title") else: return None if not os.path.exists(opml_filepath): Log.error_and_exit( "OPML file '{}' not found".format(opml_filepath)) with open(opml_filepath, encoding="utf-8") as opml_file_handle: xml_contents = opml_file_handle.read() soup = BeautifulSoup(xml_contents, "xml") sites = soup.opml.body.findAll(site_outline) return [(site["xmlUrl"], site.get("title"), site_category(site)) for site in sites if not any([ True for skip_url in self.settings.skip_urls if site["xmlUrl"].startswith(skip_url) ])]
def save(self) -> None: fetch_mark = datetime.now() file_path = os.path.join(self.base_output_path, SETTINGS_FILENAME) if not os.path.exists(self.base_output_path): Log.error_and_exit("Output path '{}' not found".format(self.base_output_path)) data = { KEY_LAST_FETCH: fetch_mark.timestamp(), KEY_SKIP_URLS: self.skip_urls, } with open(file_path, "w") as file_handle: json.dump(data, file_handle, indent=None) Log.info("> Fetch mark set to: {}".format(fetch_mark))