async def resolve_url_to_content(self, url): if not url.startswith(self.url): return None art = url_to_n3karticle(url) text = clean_html_text(art.text) if len(text) < 100: return None return text
async def resolve_url_to_content(self, url): if not url.startswith(self.url): return None art = url_to_n3karticle(url) text = clean_html_text(art.text) filtered_text = "\n".join([ line for line in text.split("\n") if not line.startswith("Photo:") ]) return filtered_text
async def resolve_url_to_content(self, url): if not url.startswith(self.url): return None art = url_to_n3karticle(url) text = clean_html_text(art.text) filtered_text = "\n".join([ line for line in text.split("\n") if not string_contains(line, IGNORE_ITEMS) ]) return filtered_text
async def resolve_url_to_content(self, url): art = url_to_n3karticle(url) text = clean_html_text(art.text) if len(text) < 100: return None for trim_token in TRIM_AT: try: idx = text.index(trim_token) except: continue text = text[:idx].strip() return text
async def resolve_url_to_content(self, url): html = await self._get(url.replace(self.url, "")) art = url_to_n3karticle(url, input_html=html) text = clean_html_text(art.text) return text