def get_rss(u): o = [] try: result = rssparser.parse(u) for item in result['items']: title = item.get('title', u"(none)") url = item.get('link', u"(none)") o.append((url, title.encode('ascii', 'replace'))) return o except: return []
def pingback(request, source, target): source_file = urllib.urlopen(source.split('#')[0]) if source_file.headers.get('error', '') == '404': raise Fault(0x0010, "Target %s not exists" % target) source_page = parser() source_page.feed(source_file.read()) source_file.close() if source_page.title == "": source_page.title = source if target in source_page.hrefs: target_entry = fileFor(request, target) body = '' try: from rssfinder import getFeeds from rssparser import parse baseurl = source.split("#")[0] for feed in getFeeds(baseurl): for item in parse(feed)['items']: if item['link'] == source: if 'title' in item: source_page.title = item['title'] if 'content_encoded' in item: body = item['content_encoded'].strip() if 'description' in item: body = item['description'].strip() or body body = re.compile('<.*?>', re.S).sub('', body) body = re.sub('\s+', ' ', body) body = body[:body.rfind(' ', 0, 250 )][:250] + " ...<br />" except: pass cmt = {'title':source_page.title, \ 'author':'Pingback from %s' % source_page.title, 'pubDate' : str(time.time()), \ 'link': source, 'source' : '', 'description' : body} from comments import writeComment config = request.getConfiguration() data = request.getData() data['entry_list'] = [target_entry] # TODO: Check if comment from the URL exists writeComment(config, data, cmt) return "success pinging %s from %s\n" % (source, target) else: raise Fault(0x0011, "%s does not point to %s" % (target, source))
def main(): import sys if sys.argv[1:]: urls = sys.argv[1:] else: urls = URLS from pprint import pprint urls = tv.aggregator.db.services.getsubscriptions() random.shuffle(urls) for url in urls: print url print "->", sys.stdout.flush() service, feedconfig = tv.aggregator.db.services.getserviceinfoandconfig(url) # pprint((service, feedconfig)) if mx.DateTime.now() < service.get("TVlastfetched", mx.DateTime.DateTime(0)) + mx.DateTime.TimeDelta( minutes=feedconfig.get("fetchhowoften", 60) ): print "will wait until at least %s for next fetch (%d minutes after last fetch)" % ( service.get("TVlastfetched") + mx.DateTime.TimeDelta(minutes=feedconfig.get("fetchhowoften", 60)), feedconfig.get("fetchhowoften", 60), ) continue if "TVmodified" in service: modified = service["TVmodified"].tuple() else: modified = None result = rssparser.parse(url, etag=service.get("TVetag"), modified=modified) service.update(result["channel"]) fixService(service, result.get("etag"), result.get("modified")) for x in result["items"]: fixItem(x, url, result["channel"]) if tv.aggregator.db.items.checkdupe(x): print "-", sys.stdout.flush() else: service["TVitemsfetched"] = service.get("TVitemsfetched", 0) + 1 service["TVlastnewitem"] = mx.DateTime.now() try: tv.aggregator.db.items.saveitem(x) except: print "DUPERROR", print "*", # pprint(x) print # pprint(service) tv.aggregator.db.services.savefeedinfo(service)
def pingback(request, source, target): source_file = urllib.urlopen(source.split('#')[0]) if source_file.headers.get('error', '') == '404': raise Fault(0x0010, "Target %s not exists" % target) source_page = parser() source_page.feed(source_file.read()) source_file.close() if source_page.title == "": source_page.title = source if target in source_page.hrefs: target_entry = fileFor(request, target) body = '' try: from rssfinder import getFeeds from rssparser import parse baseurl=source.split("#")[0] for feed in getFeeds(baseurl): for item in parse(feed)['items']: if item['link']==source: if 'title' in item: source_page.title = item['title'] if 'content_encoded' in item: body = item['content_encoded'].strip() if 'description' in item: body = item['description'].strip() or body body=re.compile('<.*?>',re.S).sub('',body) body=re.sub('\s+',' ',body) body=body[:body.rfind(' ',0,250)][:250] + " ...<br />" except: pass cmt = {'title':source_page.title, \ 'author':'Pingback from %s' % source_page.title, 'pubDate' : str(time.time()), \ 'link': source, 'source' : '', 'description' : body} from comments import writeComment config = request.getConfiguration() data = request.getData() data['entry_list'] = [ target_entry ] # TODO: Check if comment from the URL exists writeComment(config, data, cmt) return "success pinging %s from %s\n" % (source, target) else: raise Fault(0x0011, "%s does not point to %s" % (target, source))
def _rss(self, irc, text): severe = '' m = self._rsswunderSevere.search(text) if m: severe = ircutils.bold(m.group(1)) feed = self._rsswunderfeed.search(text) if not feed: Weather._noLocation() feed = feed.group(1) rss = utils.web.getUrl(feed, headers=Weather.headers) info = rssparser.parse(rss) resp = [e['summary'] for e in info['entries']] resp = [s.encode('utf-8') for s in resp] resp.append(severe) irc.reply(utils.web.htmlToText('; '.join(resp)))
def pingback(request, source, target): logger = tools.getLogger() logger.info("pingback started") source_file = urllib.urlopen(source.split('#')[0]) if source_file.headers.get('error', '') == '404': raise Fault(0x0010, "Target %s not exists" % target) source_page = parser() source_page.feed(source_file.read()) source_file.close() if source_page.title == "": source_page.title = source if target in source_page.hrefs: target_entry = fileFor(request, target) body = '' try: from rssfinder import getFeeds from rssparser import parse baseurl=source.split("#")[0] for feed in getFeeds(baseurl): for item in parse(feed)['items']: if item['link']==source: if 'title' in item: source_page.title = item['title'] if 'content_encoded' in item: body = item['content_encoded'].strip() if 'description' in item: body = item['description'].strip() or body body=re.compile('<.*?>',re.S).sub('',body) body=re.sub('\s+',' ',body) body=body[:body.rfind(' ',0,250)][:250] + " ...<br />" except: pass cmt = {'title':source_page.title, \ 'author':'Pingback from %s' % source_page.title, 'pubDate' : str(time.time()), \ 'link': source, 'source' : '', 'description' : body} # run anti-spam plugins argdict = { "request": request, "comment": cmt } reject = tools.run_callback("trackback_reject", argdict, donefunc=lambda x:x != 0) if ((isinstance(reject, tuple) or isinstance(reject, list)) and len(reject) == 2): reject_code, reject_message = reject else: reject_code, reject_message = reject, "Pingback rejected." if reject_code == 1: raise Fault(0x0031, reject_message) from comments import writeComment config = request.getConfiguration() data = request.getData() data['entry_list'] = [ target_entry ] # TODO: Check if comment from the URL exists writeComment(request, config, data, cmt, config['blog_encoding']) return "success pinging %s from %s\n" % (target, source) else: raise Fault(0x0011, "%s does not point to %s" % (source, target))
def fetch_feed(self,url): """ """ return rssparser.parse(url)
def stats(self, irc, msg, args, project): """[<project>] Returns the current statistics for <project>. <project> is not needed if there is a default project set. """ url = 'http://sourceforge.net/' \ 'export/rss2_projsummary.php?project=' + project results = rssparser.parse(url) if not results['items']: irc.errorInvalid('SourceForge project name', project) class x: pass def get(r, s): m = r.search(s) if m is not None: return m.group(0) else: irc.error('Sourceforge gave me a bad RSS feed.', Raise=True) def gets(r, s): L = [] for m in r.finditer(s): L.append(m.group(1)) return L def afterColon(s): return s.split(': ', 1)[-1] try: for item in results['items']: title = item['title'] description = item['description'] if 'Project name' in title: x.project = afterColon(title) elif 'Developers on project' in title: x.devs = get(self._intRe, title) elif 'Activity percentile' in title: x.activity = get(self._percentRe, title) x.ranking = get(self._intRe, afterColon(description)) elif 'Downloadable files' in title: x.downloads = get(self._intRe, title) x.downloadsToday = afterColon(description) elif 'Tracker: Bugs' in title: (x.bugsOpen, x.bugsTotal) = gets(self._intRe, title) elif 'Tracker: Patches' in title: (x.patchesOpen, x.patchesTotal) = gets(self._intRe, title) elif 'Tracker: Feature' in title: (x.rfesOpen, x.rfesTotal) = gets(self._intRe, title) except AttributeError: irc.error('Unable to parse stats RSS.', Raise=True) irc.reply( format('%s has %n, ' 'is %s active (ranked %i), ' 'has had %n (%s today), ' 'has %n (out of %i), ' 'has %n (out of %i), ' 'and has %n (out of %i).', x.project, (int(x.devs), 'developer'), x.activity, x.ranking, (int(x.downloads), 'download'), x.downloadsToday, (int(x.bugsOpen), 'open', 'bug'), x.bugsTotal, (int(x.rfesOpen), 'open', 'rfe'), x.rfesTotal, (int(x.patchesOpen), 'open', 'patch'), x.patchesTotal))