Python parseの例、rssparser.parse Pythonの例

コード例 #1

0

ファイルを表示

def get_rss(u):
	o = []
	try:
		result = rssparser.parse(u)
		for item in result['items']:
			title = item.get('title', u"(none)")
			url  = item.get('link', u"(none)")
			o.append((url, title.encode('ascii', 'replace')))
		return o
	except:
		return []

コード例 #2

0

ファイルを表示

def pingback(request, source, target):
    source_file = urllib.urlopen(source.split('#')[0])
    if source_file.headers.get('error', '') == '404':
        raise Fault(0x0010, "Target %s not exists" % target)
    source_page = parser()
    source_page.feed(source_file.read())
    source_file.close()

    if source_page.title == "": source_page.title = source

    if target in source_page.hrefs:
        target_entry = fileFor(request, target)

        body = ''
        try:
            from rssfinder import getFeeds
            from rssparser import parse

            baseurl = source.split("#")[0]
            for feed in getFeeds(baseurl):
                for item in parse(feed)['items']:
                    if item['link'] == source:
                        if 'title' in item: source_page.title = item['title']
                        if 'content_encoded' in item:
                            body = item['content_encoded'].strip()
                        if 'description' in item:
                            body = item['description'].strip() or body
                        body = re.compile('<.*?>', re.S).sub('', body)
                        body = re.sub('\s+', ' ', body)
                        body = body[:body.rfind(' ', 0, 250
                                                )][:250] + " ...<br />"
        except:
            pass

        cmt = {'title':source_page.title, \
               'author':'Pingback from %s' % source_page.title,
               'pubDate' : str(time.time()), \
               'link': source,
               'source' : '',
               'description' : body}

        from comments import writeComment
        config = request.getConfiguration()
        data = request.getData()
        data['entry_list'] = [target_entry]

        # TODO: Check if comment from the URL exists
        writeComment(config, data, cmt)

        return "success pinging %s from %s\n" % (source, target)
    else:
        raise Fault(0x0011, "%s does not point to %s" % (target, source))

コード例 #3

0

ファイルを表示

ファイル: rssfetch.py プロジェクト: mdornseif/TvLuserland

def main():
    import sys

    if sys.argv[1:]:
        urls = sys.argv[1:]
    else:
        urls = URLS
    from pprint import pprint

    urls = tv.aggregator.db.services.getsubscriptions()
    random.shuffle(urls)

    for url in urls:
        print url
        print "->",
        sys.stdout.flush()
        service, feedconfig = tv.aggregator.db.services.getserviceinfoandconfig(url)
        # pprint((service, feedconfig))
        if mx.DateTime.now() < service.get("TVlastfetched", mx.DateTime.DateTime(0)) + mx.DateTime.TimeDelta(
            minutes=feedconfig.get("fetchhowoften", 60)
        ):
            print "will wait until at least %s for next fetch (%d minutes after last fetch)" % (
                service.get("TVlastfetched") + mx.DateTime.TimeDelta(minutes=feedconfig.get("fetchhowoften", 60)),
                feedconfig.get("fetchhowoften", 60),
            )
            continue
        if "TVmodified" in service:
            modified = service["TVmodified"].tuple()
        else:
            modified = None
        result = rssparser.parse(url, etag=service.get("TVetag"), modified=modified)
        service.update(result["channel"])
        fixService(service, result.get("etag"), result.get("modified"))
        for x in result["items"]:
            fixItem(x, url, result["channel"])
            if tv.aggregator.db.items.checkdupe(x):
                print "-",
                sys.stdout.flush()
            else:
                service["TVitemsfetched"] = service.get("TVitemsfetched", 0) + 1
                service["TVlastnewitem"] = mx.DateTime.now()
                try:
                    tv.aggregator.db.items.saveitem(x)
                except:
                    print "DUPERROR",
                print "*",
            # pprint(x)
        print
        # pprint(service)
        tv.aggregator.db.services.savefeedinfo(service)

コード例 #4

0

ファイルを表示

ファイル: xmlrpc_pingback.py プロジェクト: BSierakowski/personal_code

def pingback(request, source, target):
    source_file = urllib.urlopen(source.split('#')[0])
    if source_file.headers.get('error', '') == '404':
        raise Fault(0x0010, "Target %s not exists" % target)
    source_page = parser()
    source_page.feed(source_file.read())
    source_file.close()

    if source_page.title == "": source_page.title = source
    
    if target in source_page.hrefs:
        target_entry = fileFor(request, target)

        body = ''
        try:
            from rssfinder import getFeeds
            from rssparser import parse

            baseurl=source.split("#")[0]
            for feed in getFeeds(baseurl):
                for item in parse(feed)['items']:
                    if item['link']==source:
                        if 'title' in item: source_page.title = item['title']
                        if 'content_encoded' in item: body = item['content_encoded'].strip()
                        if 'description' in item: body = item['description'].strip() or body
                        body=re.compile('<.*?>',re.S).sub('',body)
                        body=re.sub('\s+',' ',body)
                        body=body[:body.rfind(' ',0,250)][:250] + " ...<br />"
        except:
            pass

        cmt = {'title':source_page.title, \
               'author':'Pingback from %s' % source_page.title,
               'pubDate' : str(time.time()), \
               'link': source,
               'source' : '',
               'description' : body}
        
        from comments import writeComment
        config = request.getConfiguration()
        data = request.getData()
        data['entry_list'] = [ target_entry ]

        # TODO: Check if comment from the URL exists
        writeComment(config, data, cmt)
               
        return "success pinging %s from %s\n" % (source, target)
    else:
        raise Fault(0x0011, "%s does not point to %s" % (target, source))

コード例 #5

0

ファイルを表示

ファイル: plugin.py プロジェクト: nixon/boombot

 def _rss(self, irc, text):
     severe = ''
     m = self._rsswunderSevere.search(text)
     if m:
         severe = ircutils.bold(m.group(1))
     feed = self._rsswunderfeed.search(text)
     if not feed:
         Weather._noLocation()
     feed = feed.group(1)
     rss = utils.web.getUrl(feed, headers=Weather.headers)
     info = rssparser.parse(rss)
     resp = [e['summary'] for e in info['entries']]
     resp = [s.encode('utf-8') for s in resp]
     resp.append(severe)
     irc.reply(utils.web.htmlToText('; '.join(resp)))

コード例 #6

0

ファイルを表示

ファイル: plugin.py プロジェクト: yo-bj/supybot-plugins

 def _rss(self, irc, text):
     severe = ''
     m = self._rsswunderSevere.search(text)
     if m:
         severe = ircutils.bold(m.group(1))
     feed = self._rsswunderfeed.search(text)
     if not feed:
         Weather._noLocation()
     feed = feed.group(1)
     rss = utils.web.getUrl(feed, headers=Weather.headers)
     info = rssparser.parse(rss)
     resp = [e['summary'] for e in info['entries']]
     resp = [s.encode('utf-8') for s in resp]
     resp.append(severe)
     irc.reply(utils.web.htmlToText('; '.join(resp)))

コード例 #7

0

ファイルを表示

ファイル: xmlrpc_pingback.py プロジェクト: hylom/fusuma

def pingback(request, source, target):
    logger = tools.getLogger()
    logger.info("pingback started")
    source_file = urllib.urlopen(source.split('#')[0])
    if source_file.headers.get('error', '') == '404':
        raise Fault(0x0010, "Target %s not exists" % target)
    source_page = parser()
    source_page.feed(source_file.read())
    source_file.close()

    if source_page.title == "": source_page.title = source
    
    if target in source_page.hrefs:
        target_entry = fileFor(request, target)

        body = ''
        try:
            from rssfinder import getFeeds
            from rssparser import parse

            baseurl=source.split("#")[0]
            for feed in getFeeds(baseurl):
                for item in parse(feed)['items']:
                    if item['link']==source:
                        if 'title' in item: source_page.title = item['title']
                        if 'content_encoded' in item: body = item['content_encoded'].strip()
                        if 'description' in item: body = item['description'].strip() or body
                        body=re.compile('<.*?>',re.S).sub('',body)
                        body=re.sub('\s+',' ',body)
                        body=body[:body.rfind(' ',0,250)][:250] + " ...<br />"
        except:
            pass

        cmt = {'title':source_page.title, \
               'author':'Pingback from %s' % source_page.title,
               'pubDate' : str(time.time()), \
               'link': source,
               'source' : '',
               'description' : body}
        
        # run anti-spam plugins
        argdict = { "request": request, "comment": cmt }
        reject = tools.run_callback("trackback_reject",
                                    argdict,
                                    donefunc=lambda x:x != 0)
        if ((isinstance(reject, tuple) or isinstance(reject, list))
            and len(reject) == 2):
            reject_code, reject_message = reject
        else:
            reject_code, reject_message = reject, "Pingback rejected."
        if reject_code == 1:
            raise Fault(0x0031, reject_message)

        from comments import writeComment
        config = request.getConfiguration()
        data = request.getData()
        data['entry_list'] = [ target_entry ]

        # TODO: Check if comment from the URL exists
        writeComment(request, config, data, cmt, config['blog_encoding'])
               
        return "success pinging %s from %s\n" % (target, source)
    else:
        raise Fault(0x0011, "%s does not point to %s" % (source, target))

コード例 #8

0

ファイルを表示

def pingback(request, source, target):
    logger = tools.getLogger()
    logger.info("pingback started")
    source_file = urllib.urlopen(source.split('#')[0])
    if source_file.headers.get('error', '') == '404':
        raise Fault(0x0010, "Target %s not exists" % target)
    source_page = parser()
    source_page.feed(source_file.read())
    source_file.close()

    if source_page.title == "": source_page.title = source
    
    if target in source_page.hrefs:
        target_entry = fileFor(request, target)

        body = ''
        try:
            from rssfinder import getFeeds
            from rssparser import parse

            baseurl=source.split("#")[0]
            for feed in getFeeds(baseurl):
                for item in parse(feed)['items']:
                    if item['link']==source:
                        if 'title' in item: source_page.title = item['title']
                        if 'content_encoded' in item: body = item['content_encoded'].strip()
                        if 'description' in item: body = item['description'].strip() or body
                        body=re.compile('<.*?>',re.S).sub('',body)
                        body=re.sub('\s+',' ',body)
                        body=body[:body.rfind(' ',0,250)][:250] + " ...<br />"
        except:
            pass

        cmt = {'title':source_page.title, \
               'author':'Pingback from %s' % source_page.title,
               'pubDate' : str(time.time()), \
               'link': source,
               'source' : '',
               'description' : body}
        
        # run anti-spam plugins
        argdict = { "request": request, "comment": cmt }
        reject = tools.run_callback("trackback_reject",
                                    argdict,
                                    donefunc=lambda x:x != 0)
        if ((isinstance(reject, tuple) or isinstance(reject, list))
            and len(reject) == 2):
            reject_code, reject_message = reject
        else:
            reject_code, reject_message = reject, "Pingback rejected."
        if reject_code == 1:
            raise Fault(0x0031, reject_message)

        from comments import writeComment
        config = request.getConfiguration()
        data = request.getData()
        data['entry_list'] = [ target_entry ]

        # TODO: Check if comment from the URL exists
        writeComment(request, config, data, cmt, config['blog_encoding'])
               
        return "success pinging %s from %s\n" % (target, source)
    else:
        raise Fault(0x0011, "%s does not point to %s" % (source, target))

コード例 #9

0

ファイルを表示

ファイル: rss_manager.py プロジェクト: renansfs/Plone_SP

 def fetch_feed(self,url):
     """ """
     return rssparser.parse(url)

コード例 #10

0

ファイルを表示

    def stats(self, irc, msg, args, project):
        """[<project>]

        Returns the current statistics for <project>.  <project> is not needed
        if there is a default project set.
        """
        url = 'http://sourceforge.net/' \
              'export/rss2_projsummary.php?project=' + project
        results = rssparser.parse(url)
        if not results['items']:
            irc.errorInvalid('SourceForge project name', project)
        class x:
            pass
        def get(r, s):
            m = r.search(s)
            if m is not None:
                return m.group(0)
            else:
                irc.error('Sourceforge gave me a bad RSS feed.', Raise=True)
        def gets(r, s):
            L = []
            for m in r.finditer(s):
                L.append(m.group(1))
            return L
        def afterColon(s):
            return s.split(': ', 1)[-1]
        try:
            for item in results['items']:
                title = item['title']
                description = item['description']
                if 'Project name' in title:
                    x.project = afterColon(title)
                elif 'Developers on project' in title:
                    x.devs = get(self._intRe, title)
                elif 'Activity percentile' in title:
                    x.activity = get(self._percentRe, title)
                    x.ranking = get(self._intRe, afterColon(description))
                elif 'Downloadable files' in title:
                    x.downloads = get(self._intRe, title)
                    x.downloadsToday = afterColon(description)
                elif 'Tracker: Bugs' in title:
                    (x.bugsOpen, x.bugsTotal) = gets(self._intRe, title)
                elif 'Tracker: Patches' in title:
                    (x.patchesOpen, x.patchesTotal) = gets(self._intRe, title)
                elif 'Tracker: Feature' in title:
                    (x.rfesOpen, x.rfesTotal) = gets(self._intRe, title)
        except AttributeError:
            irc.error('Unable to parse stats RSS.', Raise=True)
        irc.reply(
            format('%s has %n, '
                   'is %s active (ranked %i), '
                   'has had %n (%s today), '
                   'has %n (out of %i), '
                   'has %n (out of %i), '
                   'and has %n (out of %i).',
                   x.project, (int(x.devs), 'developer'),
                   x.activity, x.ranking,
                   (int(x.downloads), 'download'), x.downloadsToday,
                   (int(x.bugsOpen), 'open', 'bug'), x.bugsTotal,
                   (int(x.rfesOpen), 'open', 'rfe'), x.rfesTotal,
                   (int(x.patchesOpen), 'open', 'patch'), x.patchesTotal))