예제 #1
0
    def fetch_sites_metadata(
            self, opml_filename: str
    ) -> List[Tuple[str, Optional[str], Optional[str]]]:
        opml_filepath = os.path.join(self.settings.base_output_path,
                                     opml_filename)

        def site_outline(element: Any) -> bool:
            return all([
                element.name == "outline",
                element.has_attr("xmlUrl"),
                element.has_attr("type"),
                element.get("type", "") == "rss",
            ])

        def site_category(element: Any) -> Optional[str]:
            parent = element.findParent()
            if parent.name == "outline" and parent.has_attr("title"):
                return parent.get("title")
            else:
                return None

        if not os.path.exists(opml_filepath):
            Log.error_and_exit(
                "OPML file '{}' not found".format(opml_filepath))

        with open(opml_filepath, encoding="utf-8") as opml_file_handle:
            xml_contents = opml_file_handle.read()
        soup = BeautifulSoup(xml_contents, "xml")
        sites = soup.opml.body.findAll(site_outline)

        return [(site["xmlUrl"], site.get("title"), site_category(site))
                for site in sites if not any([
                    True for skip_url in self.settings.skip_urls
                    if site["xmlUrl"].startswith(skip_url)
                ])]
예제 #2
0
파일: settings.py 프로젝트: davidfq/pbrr
    def save(self) -> None:
        fetch_mark = datetime.now()
        file_path = os.path.join(self.base_output_path, SETTINGS_FILENAME)

        if not os.path.exists(self.base_output_path):
            Log.error_and_exit("Output path '{}' not found".format(self.base_output_path))

        data = {
            KEY_LAST_FETCH: fetch_mark.timestamp(),
            KEY_SKIP_URLS: self.skip_urls,
        }

        with open(file_path, "w") as file_handle:
            json.dump(data, file_handle, indent=None)

        Log.info("> Fetch mark set to: {}".format(fetch_mark))