def pipe_fetchsitefeed(context=None, _INPUT=None, conf=None, **kwargs): """A source that fetches and parses the first feed found on one or more sites. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipeforever pipe or an iterable of items or fields conf : URL -- url Yields ------ _OUTPUT : items """ conf = DotDict(conf) urls = utils.listize(conf['URL']) for item in _INPUT: for item_url in urls: url = utils.get_value(DotDict(item_url), DotDict(item), **kwargs) url = utils.get_abspath(url) if context and context.verbose: print "pipe_fetchsitefeed loading:", url for link in autorss.getRSSLink(url.encode('utf-8')): parsed = speedparser.parse(urlopen(link).read()) for entry in utils.gen_entries(parsed): yield entry if item.get('forever'): # _INPUT is pipeforever and not a loop, # so we just yield our item once break
def pipe_feedautodiscovery(context=None, _INPUT=None, conf=None, **kwargs): """A source that searches for and returns feed links found in a page. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipeforever pipe or an iterable of items or fields conf : URL -- url Yields ------ _OUTPUT : items """ conf = DotDict(conf) urls = utils.listize(conf['URL']) for item in _INPUT: for item_url in urls: url = utils.get_value(DotDict(item_url), DotDict(item), **kwargs) url = utils.get_abspath(url) if context and context.verbose: print "pipe_feedautodiscovery loading:", url for entry in autorss.getRSSLink(url.encode('utf-8')): yield {'link': entry} # todo: add rel, type, title if item.get('forever'): # _INPUT is pipeforever and not a loop, # so we just yield our item once break
def pipe_feedautodiscovery(context=None, _INPUT=None, conf=None, **kwargs): """This source search for feed links in a page Keyword arguments: context -- pipeline context _INPUT -- not used conf: URL -- url Yields (_OUTPUT): feed entries """ conf = DotDict(conf) urls = util.listize(conf['URL']) for item in _INPUT: for item_url in urls: url = util.get_value(DotDict(item_url), DotDict(item), **kwargs) url = util.get_abspath(url) if context and context.verbose: print "pipe_feedautodiscovery loading:", url for entry in autorss.getRSSLink(url.encode('utf-8')): yield {'link': entry} # todo: add rel, type, title if item.get('forever'): # _INPUT is pipeforever and not a loop, # so we just yield our item once break