def index() -> Results: from . import hpi from my.hackernews import dogsheep for item in dogsheep.items(): if isinstance(item, Exception): yield item continue hn_url = item.permalink title = "hackernews" if item.title: title = item.title elif item.text_html: title = item.text_html title = textwrap.shorten( title, width=79, placeholder="…", break_long_words=True) # The locator is always the HN story. If the story is a link (as # opposed to a text post), we insert a visit such that the link # will point back to the corresponding HN story. loc = Loc.make(title=title, href=hn_url) urls = [hn_url] if item.url is not None: urls.append(item.url) for url in urls: yield Visit( url=url, dt=item.created, locator=loc, context=title, )
def index(*, render_markdown: bool = False) -> Results: from my.discord import messages # TODO: optionally import? this would break if someone # hasnt installed promnesia like pip3 install '.[all]' to # to install the markdown module for promnesia from promnesia.sources.markdown import TextParser for m in messages(): # hmm - extract URLs from attachments? # Probably not very useful unless I extract info from them with url_metadata or something context: str = m.content # if render_markdown flag is enabled, render the text as markdown (HTML) if render_markdown: context = TextParser(m.content)._doc_ashtml() # permalink back to this discord message loc = Loc.make(title=m.channel.description, href=m.link) for u in iter_urls(m.content): yield Visit( url=u, dt=m.timestamp, context=context, locator=loc, )
def err_ex(): for i in range(3): if i == 1: yield ExtractionError() else: yield Visit( url=f'http://test{i}', dt=datetime.utcfromtimestamp(0), locator=Loc.make('whatever'), )
def index() -> Results: from my.grouvee import games from grouvee_export.dal import Game for g in games(): game_url = g.url loc = Loc.make(title=f"Grouvee {g.name}", href=game_url) for s in g.shelves: yield Visit( url=game_url, dt=s.added, locator=loc, context=f"{g.name} ({s.name})" )
def index() -> Results: from my.twitch.gdpr import events for e in events(): if e.event_type == "pageview": url = str(e.context) yield Visit( url=url, dt=e.dt, locator=Loc.make(title=f"Twitch {e.context}", href=url), )