Ejemplo n.º 1
0
    def get_feed(self, url, request_headers=None, **kwargs):

        if not self._check_auth_cookie():
            return

        session = None
        if self.provider and hasattr(self.provider, 'session'):
            session = self.provider.session

        response = helpers.getURL(url, headers=request_headers, session=session,
                                  hooks=dict(response=self.cb_response), **kwargs)
        if not response:
            return

        try:
            feed = feedparser.parse(response)
            feed['rq_response'] = self.response
            if feed and 'entries' in feed:
                return feed

            if feed and 'error' in feed.feed:
                err_code = feed.feed['error']['code']
                err_desc = feed.feed['error']['description']
                logger.log(u'RSS ERROR:[%s] CODE:[%s]' % (err_desc, err_code), logger.DEBUG)
            else:
                logger.log(u'RSS error loading url: ' + url, logger.DEBUG)

        except Exception as e:
            logger.log(u'RSS error: ' + ex(e), logger.DEBUG)
Ejemplo n.º 2
0
def get_news(request, slug, first=0, nresults=3):
    inst = get_object_or_404(Institution, slug__exact=slug)

    if not inst.feed_url:
        raise Http404('The is no feed url for this institution.')

    feed = feedparser.parse(inst.feed_url)

    for news in feed.entries:
        news.updated   = datetime.fromtimestamp(mktime(news.updated_parsed))
        news.main_link = news.links[0].href
        match = re.search(r'<\s*img[^>]+>', news.summary_detail.value)
        if match:
            news.main_image = match.group(0)
            news.summary_detail.value = re.sub(r'<\s*img[^>]+>', '', news.summary_detail.value)

    first    = int(first)
    nresults = int(nresults)

    ctx = {
        'entries': feed.entries[first : nresults + first],
    }

    if (first + nresults) <= len(feed.entries):
        ctx['next_url'] = reverse('portal.institutions.views.get_news', args=[slug, int(first + nresults), int(nresults),])
    if (first - nresults) >= 0:
        ctx['prev_url'] = reverse('portal.institutions.views.get_news', args=[slug, int(first - nresults), int(nresults),])

    return direct_to_template(request, 'news/list.html', ctx)
Ejemplo n.º 3
0
def get_news(request, id, first=0, nresults=3):
    disc = get_object_or_404(Discipline, pk=id)

    if not disc.feed_url:
        raise Http404("The is no feed url for this course.")

    feed = feedparser.parse(disc.feed_url)

    for news in feed.entries:
        news.updated = datetime.fromtimestamp(mktime(news.updated_parsed))
        news.main_link = news.links[0].href
        match = re.search(r"<\s*img[^>]+>", news.summary_detail.value)
        if match:
            news.main_image = match.group(0)
            news.summary_detail.value = re.sub(r"<\s*img[^>]+>", "", news.summary_detail.value)

    first = int(first)
    nresults = int(nresults)

    ctx = {"entries": feed.entries[first : nresults + first]}

    if (first + nresults) <= len(feed.entries):
        ctx["next_url"] = reverse("portal.disciplines.views.get_news", args=[id, int(first + nresults), int(nresults)])
    if (first - nresults) >= 0:
        ctx["prev_url"] = reverse("portal.disciplines.views.get_news", args=[id, int(first - nresults), int(nresults)])

    return direct_to_template(request, "news/list.html", ctx)
Ejemplo n.º 4
0
    def get_feed(self, url, request_headers=None):

        if not self._check_auth_cookie():
            return

        session = None
        if self.provider and hasattr(self.provider, 'session'):
            session = self.provider.session

        response = helpers.getURL(url, headers=request_headers, session=session)
        if not response:
            return

        try:
            feed = feedparser.parse(response)
            if feed and 'entries' in feed:
                return feed

            if feed and 'error' in feed.feed:
                err_code = feed.feed['error']['code']
                err_desc = feed.feed['error']['description']
                logger.log(u'RSS ERROR:[%s] CODE:[%s]' % (err_desc, err_code), logger.DEBUG)
            else:
                logger.log(u'RSS error loading url: ' + url, logger.DEBUG)

        except Exception as e:
            logger.log(u'RSS error: ' + ex(e), logger.DEBUG)
Ejemplo n.º 5
0
    def _parse_and_write_entry(self, raw_rss):
        rss = feedparser.parse(raw_rss)
        
        for entry in rss.entries:
            content = entry.content[0]["value"]
            published = datetime.fromtimestamp(mktime(entry.updated_parsed))
            title = entry.title
            if title is None or title == "":
                title = entry.link

            em = EntryModel.get_or_insert_by_link(
                entry.link,
                link=entry.link, 
                content=content,
                description=entry.description,
                published=published,
                title=title
                )
            # update model
            em.link = entry.link
            em.content = em.content
            em.description = entry.description
            em.published = published
            em.title = title
            em.put()
Ejemplo n.º 6
0
    def rpc_fetch():
        q = Feed.query()
        results = ndb.get_multi(q.fetch(keys_only=True))

        rpcs = []
        for f in results:
            rpc = urlfetch.create_rpc()
            urlfetch.make_fetch_call(rpc, f.url)
            rpcs.append(rpc)

        for rpc in rpcs:
            rpc.wait()
            result = rpc.get_result()
            d = feedparser.parse(result.content)
            for e in d['entries']:
                dt = parser.parse(e["published"]).replace(tzinfo=None)
                dy = (datetime.datetime.utcnow() - datetime.timedelta(days=COLLECT_DAYS, seconds=COLLECT_HOURS*3600)).replace(tzinfo=None)
                if dt > dy:
                    obj = EntryCollect.get_or_insert(e["id"])
                    if obj.published and obj.published >= dt:
                        pass
                    else:
                        logging.info("new entry : %s" % e["id"])
                        obj.published = dt
                        obj.title = e["title"]
                        obj.link = e["link"]
                        obj.summary = clean_html(e["summary"])
                        obj.feed = d['feed']['title']
                        obj.need_collect_word = True
                        obj.need_notice = True
                        obj.put()
Ejemplo n.º 7
0
def extract_videourl_for_each_category(rss_category_links):
    category_page_video_url = {}
    for each_rss_path in rss_category_links:
        resp = feedparser.parse('http://www.pyvideo.org' + each_rss_path)
        page_urls, video_urls, page_video_urls = get_page_video_urls(resp)
        py_category = extract_category_from_rsslinks(each_rss_path)
        category_page_video_url[py_category] = [page_urls, video_urls]
    return category_page_video_url
Ejemplo n.º 8
0
def extract_videourl_for_each_category(rss_category_links):
    category_page_video_url={}
    for each_rss_path in rss_category_links:
        resp=feedparser.parse('http://www.pyvideo.org'+each_rss_path)
        page_urls,video_urls,page_video_urls=get_page_video_urls(resp)
        py_category=extract_category_from_rsslinks(each_rss_path)
        category_page_video_url[py_category]=[page_urls,video_urls]
    return category_page_video_url    
Ejemplo n.º 9
0
 def action(self):
     feed = feedparser.parse("http://s1.dlnws.com/dealnews/rss/editors-choice.xml")
     list_of_products_brands = shared.watch_list.splitlines()
     matches_from_feed = set()
     for i in range(0, len(feed['entries'])):
         for keyword in list_of_products_brands:
             if keyword in feed['entries'][i].title:
                 matches_from_feed.add(feed['entries'][i].title)
     shared.deal_list = '\n'.join(matches_from_feed)
Ejemplo n.º 10
0
    def handle_single_source(self, src):
        url = src.url

        try:
            data = urlfetch.fetch(url)
        except:
            logging.log(logging.WARN, "Failed to fetch url %s" % url)
            return
        feed = feedparser.parse(data.content)

        current_title = None
        try:
            current_title = src.title
        except:
            pass
        if hasattr(feed.feed, 'title'):
            if feed.feed.title != current_title:
                src.title = feed.feed.title
                ndb.put_multi([src])

        maxpublished = datetime.datetime.fromtimestamp(0)
        logging.log(logging.INFO, "#entries=%s" % len(feed.entries))
        for entry in feed.entries:
            try:
                entry.published_parsed = datetime.datetime(
                    *entry.published_parsed[:6])
                if maxpublished is None:
                    maxpublished = entry.published_parsed
                else:
                    maxpublished = max(maxpublished, entry.published_parsed)
            except:
                entry.published_parsed = None

            if hasattr(entry, 'pkw_tags'):
                entry.pkw_tags = set(entry.pkw_tags.split(','))
            else:
                entry.pkw_tags = None

            if hasattr(entry, 'pkw_score'):
                entry.pkw_score = float(entry.pkw_score)
            else:
                entry.pkw_score = 1
        logging.log(logging.INFO, "#maxpublished=%r" % maxpublished)

        if maxpublished is None:
            logging.log(logging.WARN,
                        "Could not get published date for feed %s" % url)
            return

        now = datetime.datetime.now()
        subscriptions = Subscription.query(Subscription.next_poll < now,
                                           Subscription.source == src.key)

        for subscription in subscriptions:
            logging.log(logging.DEBUG, "subscription=%r" % subscription)
            self.send_mail(subscription, feed, maxpublished)
Ejemplo n.º 11
0
    def handle_single_source(self,src):
        url = src.url

        try:
            data = urlfetch.fetch(url)
        except:
            logging.log(logging.WARN, "Failed to fetch url %s" % url)
            return
        feed = feedparser.parse(data.content)

        current_title = None
        try:
            current_title = src.title
        except:
            pass
        if hasattr(feed.feed,'title'):
            if feed.feed.title != current_title:
                src.title = feed.feed.title
                ndb.put_multi([src])

        maxpublished = datetime.datetime.fromtimestamp(0)
        logging.log(logging.INFO, "#entries=%s" % len(feed.entries))
        for entry in feed.entries:
            try:
                entry.published_parsed = datetime.datetime(*entry.published_parsed[:6])
                if maxpublished is None:
                    maxpublished = entry.published_parsed
                else:
                    maxpublished = max(maxpublished,entry.published_parsed)
            except:
                entry.published_parsed = None

            if hasattr(entry,'pkw_tags'):
                entry.pkw_tags = set(entry.pkw_tags.split(','))
            else:
                entry.pkw_tags = None

            if hasattr(entry,'pkw_score'):
                entry.pkw_score = float(entry.pkw_score)
            else:
                entry.pkw_score = 1
        logging.log(logging.INFO, "#maxpublished=%r" % maxpublished)

        if maxpublished is None:
            logging.log(logging.WARN, "Could not get published date for feed %s" % url)
            return

        now = datetime.datetime.now()
        subscriptions = Subscription.query( Subscription.next_poll < now,
                                            Subscription.source == src.key )

        for subscription in subscriptions:
            logging.log(logging.DEBUG, "subscription=%r" % subscription)
            self.send_mail( subscription, feed, maxpublished )
Ejemplo n.º 12
0
    def fetchSource(self,source):
        feedUrl = source.rssUrl
        feed = None
        try:
            feed = feedparser.parse( feedUrl )
            #logging.debug("loaded the source from " + feedUrl)
        except Exception as ex:
            logging.error( "Error calling " + feedUrl )
            logging.error( ex )
            return None

        return self.parseFeed(feed,source.quality)
Ejemplo n.º 13
0
    def fetchSource(self, source):
        feedUrl = source.rssUrl
        feed = None
        try:
            feed = feedparser.parse(feedUrl)
            #logging.debug("loaded the source from " + feedUrl)
        except Exception as ex:
            logging.error("Error calling " + feedUrl)
            logging.error(ex)
            return None

        return self.parseFeed(feed, source.quality)
Ejemplo n.º 14
0
	def refresh(self):
		for name, url in self.feedlist:
			try:
				if not self.feeds.has_key(name):
					# just download the feed
					self.feeds[name] = feedparser.parse(url)
				else:
					of = self.feeds[name] # back up old feed
					if hasattr(of, 'etag'):
						# download new feed
						nf = feedparser.parse(url, etag=of.etag)
					elif hasattr(of, 'modified'):
						nf = feedparser.parse(url, modified=of.modified)
					else:
						nf = feedparser.parse(url)
					# if not all the same
					if nf.status != 304:
						self.feeds[name] = nf
			except Exception as e:
				print 'feeds.py: Warning: unable to download feed "%s":' % name, e

		# generate new picture list
		pics_new = {}
		for name, feed in self.feeds.iteritems():
			pic_list = self.extract(feed)
			for p in pic_list:
				pics_new[p['url']] = p
		
		# delete obsolete pics
		for url in self.pics.keys():
			if not url in pics_new:
				self.delete_pic(url)
		
		# move new pics
		for url in pics_new.keys():
			if not url in self.pics:
				self.pics[url] = pics_new[url]
		self.save()
Ejemplo n.º 15
0
    def getRss(self):
        # get rss feed
        feed = feedparser.parse(self.rss)

        # if new feed not equal to last feed
        if self.lastFeedTitle != feed.entries[0].title:
            self.lastFeedTitle = feed.entries[0].title
            # compose message
            outgoingMessage = feed.entries[0].title + "\n" + feed.entries[0].link
            self.sendMessageToChat(outgoingMessage)

        t = Timer(10.0, self.getRss)
        t.start()
        return
Ejemplo n.º 16
0
def loadSport(sport_id):
    mc.LogDebug("Loading sport %s" % sport_id)
    mc.ShowDialogWait()

    label = mc.GetActiveWindow().GetLabel(9010)
    sport = None
    for SPORT in SPORTS:
        if SPORT['id'] == sport_id:
            sport = SPORT
            break
    else:
        mc.LogError("Sport %s not found." % sport_id)
        mc.ShowDialogOk("Error", "Sport %s not found." % sport_id)
        mc.CloseWindow()
    label.SetLabel(sport['title'])

    gamesList = mc.GetActiveWindow().GetList(9000)
    gamesItems = mc.ListItems()

    content = feedparser.parse(urllib.urlopen(FEED % sport_id))
    for item in content['entries']:
        title = str(item.title)
        match = sport['regex_title'].match(title)
        if match:
            day_date = date(int(match.group('year')), MONTHS.index(match.group('month').lower()), int(match.group('day')))
            if day_date < date.today():
                continue

            for game in sport['regex_game'].finditer(str(item.content)):
                game_time = time.strptime('%s %s EST' % (day_date, game.group('est_time')), '%Y-%m-%d %I:%M %p %Z')
                #Skip games that occured more than 5 hours ago
                if time.localtime(time.mktime(game_time) + 5 * 60 * 60) < time.time():
                    continue
                
                name = '%s @ %s' % (game.group('team_away').strip(), game.group('team_home').strip())

                streams = []
                for stream in sport['regex_stream'].finditer(game.group('streams')):
                    mc.LogDebug('MYP2P::: "%s" - Stream "%s" (%s)' % (name, stream.group('name'), stream.group('url')))
                    streams.append(stream.groupdict())
                stream_string = '\n'.join(['%s\t%s' % (stream['name'], stream['url']) for stream in streams])

                gameItem = mc.ListItem(mc.ListItem.MEDIA_UNKNOWN)
                gameItem.SetLabel(name)
                gameItem.SetProperty('streams', stream_string)
                gamesItems.append(gameItem)

    gamesList.SetItems(gamesItems)
    mc.HideDialogWait()
    def parse_feed(self, key):
    
        ok = True; status = 0; feedcount = 0
        url = FeedInfo().geturl(key.id)
        rss = feedparser.parse(url)
        
        if self.verbose:
            print 'parse feed from %s' % url
        
        try:
            status = rss.status
        except:
            raise PluginConnectionError(
                    'Connection Problem',
                    'Plugin could not connect',
                    'No data received. Abort directory listing..'
                )
                    
        if status not in range(200,300):
            raise PluginHTTPError(url, status)
        else:
            if self.verbose:
                print 'HTTP response code for rss: %d' % status
            
            
        for feed in rss.entries:
        
            label = feed.title; url = feed.links[1].href
            path = os.path.dirname(url)
            base = os.path.basename(url)
            image = self.IMG_TPL % ( path, base, self.IMG_TAIL_TPL )

            li = self.entry( label, thumbnailImage=image )
            
            ok = self.add( 
                    self.hndl,
                    url = url,
                    listitem = li, 
                    isFolder = False 
                )
            feedcount += 1
        
        if self.verbose:
            print 'added %d entries. success: %s' % ( feedcount, repr(ok) )
        
        if ok: #tell end of directory
            self.eod( self.hndl )
Ejemplo n.º 18
0
    def get(self):
        for src in NotificationSource.query():
            logging.log(logging.INFO, "src=%s" % src.url)
            url = src.url

            try:
                data = urlfetch.fetch(url)
            except:
                logging.log(logging.WARN, "Failed to fetch url %s" % url)
                continue
            feed = feedparser.parse(data.content)

            if feed.feed.title != src.title:
                src.title = feed.feed.title
                ndb.put_multi([src])

            maxpublished = datetime.datetime.fromtimestamp(0)
            logging.log(logging.INFO, "#entries=%s" % len(feed.entries))
            for entry in feed.entries:
                try:
                    entry.published_parsed = datetime.datetime(*entry.published_parsed[:6])
                    if maxpublished is None:
                        maxpublished = entry.published_parsed
                    else:
                        maxpublished = max(maxpublished,entry.published_parsed)
                except:
                    entry.published_parsed = None

                if hasattr(entry,'pkw_tags'):
                    entry.pkw_tags = set(entry.pkw_tags.split(','))
                else:
                    entry.pkw_tags = None
            logging.log(logging.INFO, "#maxpublished=%r" % maxpublished)

            if maxpublished is None:
                logging.log(logging.WARN, "Could not get published date for feed %s" % url)
                continue

            now = datetime.datetime.now()
            subscriptions = Subscription.query( Subscription.next_poll < now,
                                                Subscription.source == src.key )

            for subscription in subscriptions:
                logging.log(logging.DEBUG, "subscription=%r" % subscription)
                self.send_mail( subscription, feed, maxpublished )
Ejemplo n.º 19
0
 def parseFeed(self, feed):
     # Grab the gmail atom feed
     mail = {}
     atom = feedparser.parse(feed)
     
     if atom.bozo <> 0:
         if self.debug: print "[gmail-plasmoid] Error parsing feed:", atom.bozo_exception
         mail = None
     else:
         mail["fullcount"] = int(atom.feed.fullcount)
         mail["url"] = atom.feed.link
         
         mail["entries"] = []
         for i in xrange(len(atom.entries)):
             entry = {}
             
             if atom.entries[i].has_key("title"):
                 entry["subject"] = atom.entries[i].title
             else:
                 entry["subject"] = ""
             
             if atom.entries[i].has_key("author_detail"):
                 if atom.entries[i].author_detail.has_key("name"):
                     entry["authorname"] = atom.entries[i].author_detail.name
                 else:
                     entry["authorname"] = ""
                 
                 if atom.entries[i].author_detail.has_key("email"):
                     entry["authoremail"] = atom.entries[i].author_detail.email
                 else:
                     entry["authoremail"] = ""
             else:
                 entry["authorname"] = ""
                 entry["authoremail"] = ""
                 
             #if atom.entries[i].has_key("link"):
                 #entry["link"] = atom.entries[i].link
             #else:
                 #entry["link"] = ""
                 
             mail["entries"].append(entry)
         
     return mail
Ejemplo n.º 20
0
    def parse_feed(self, key):

        ok = True
        status = 0
        feedcount = 0
        url = FeedInfo().geturl(key.id)
        rss = feedparser.parse(url)

        if self.verbose:
            print 'parse feed from %s' % url

        try:
            status = rss.status
        except:
            raise PluginConnectionError(
                'Connection Problem', 'Plugin could not connect',
                'No data received. Abort directory listing..')

        if status not in range(200, 300):
            raise PluginHTTPError(url, status)
        else:
            if self.verbose:
                print 'HTTP response code for rss: %d' % status

        for feed in rss.entries:

            label = feed.title
            url = feed.links[1].href
            path = os.path.dirname(url)
            base = os.path.basename(url)
            image = self.IMG_TPL % (path, base, self.IMG_TAIL_TPL)

            li = self.entry(label, thumbnailImage=image)

            ok = self.add(self.hndl, url=url, listitem=li, isFolder=False)
            feedcount += 1

        if self.verbose:
            print 'added %d entries. success: %s' % (feedcount, repr(ok))

        if ok:  #tell end of directory
            self.eod(self.hndl)
Ejemplo n.º 21
0
 def _retrieveFeed(self):
     """do the actual work and try to retrieve the feed"""
     url = self.url
     if url != '':
         self._last_update_time_in_minutes = time.time() / 60
         self._last_update_time = DateTime()
         d = feedparser.parse(url)
         if getattr(d, 'bozo', 0) == 1 and not isinstance(
                 d.get('bozo_exception'), ACCEPTED_FEEDPARSER_EXCEPTIONS):
             self._loaded = True  # we tried at least but have a failed load
             self._failed = True
             return False
         self._title = d.feed.title
         self._siteurl = d.feed.link
         self._items = []
         for item in d['items']:
             try:
                 link = item.links[0]['href']
                 itemdict = {
                     'title': item.title,
                     'url': link,
                     'summary': item.get('description', ''),
                 }
                 if hasattr(item, "updated"):
                     try:
                         itemdict['updated'] = DateTime(item.updated)
                     except DateTimeError:
                         # It's okay to drop it because in the
                         # template, this is checked with
                         # ``exists:``
                         pass
             except AttributeError:
                 continue
             self._items.append(itemdict)
         self._loaded = True
         self._failed = False
         return True
     self._loaded = True
     self._failed = True  # no url set means failed
     return False  # no url set, although that actually should not really happen
Ejemplo n.º 22
0
 def parse_feed(self, url, image):
     
     if self.verbose:
         print 'parse feed from %s' % url
         
     feedcount = 0
     
     #parse feed
     rss = feedparser.parse(url)
     
     for f in rss.entries:
         #iterate and add to directory
         
         link  = f.enclosures[0].href
         size  = f.enclosures[0].length
         label = f.title.encode('utf-8', 'replace')
         
         listitem = self.entry( label, thumbnailImage=image, path=link )
         
         #add to directory
         ok = self.add(
                 self.hndl, 
                 url = link, 
                 listitem = listitem, 
                 isFolder = False
             )
         
         feedcount += 1
     
     if self.verbose:
         print 'added %d feed entries' % feedcount
         print 'success: %s' % repr(feedcount > 0)
     
     #Tell end of directory listing
     if feedcount > 0:
         self.eod( self.hndl, ok, False, False ) 
     else:
          exec "xbmcgui.Dialog().ok('No podcasts received from NRK.no', '')"
Ejemplo n.º 23
0
    def parse_feed(self, url, image):

        if self.verbose:
            print 'parse feed from %s' % url

        feedcount = 0

        #parse feed
        rss = feedparser.parse(url)

        for f in rss.entries:
            #iterate and add to directory

            link = f.enclosures[0].href
            size = f.enclosures[0].length
            label = f.title.encode('utf-8', 'replace')

            listitem = self.entry(label, thumbnailImage=image, path=link)

            #add to directory
            ok = self.add(self.hndl,
                          url=link,
                          listitem=listitem,
                          isFolder=False)

            feedcount += 1

        if self.verbose:
            print 'added %d feed entries' % feedcount
            print 'success: %s' % repr(feedcount > 0)

        #Tell end of directory listing
        if feedcount > 0:
            self.eod(self.hndl, ok, False, False)
        else:
            exec "xbmcgui.Dialog().ok('No podcasts received from NRK.no', '')"
Ejemplo n.º 24
0
def loadSport(sport_id):
    mc.LogDebug("Loading sport %s" % sport_id)
    mc.ShowDialogWait()

    label = mc.GetActiveWindow().GetLabel(9010)
    sport = None
    for SPORT in SPORTS:
        if SPORT['id'] == sport_id:
            sport = SPORT
            break
    else:
        mc.LogError("Sport %s not found." % sport_id)
        mc.ShowDialogOk("Error", "Sport %s not found." % sport_id)
        mc.CloseWindow()
    label.SetLabel(sport['title'])

    gamesList = mc.GetActiveWindow().GetList(9000)
    gamesItems = mc.ListItems()

    content = feedparser.parse(urllib.urlopen(FEED % sport_id))
    for item in content['entries']:
        title = str(item.title)
        match = sport['regex_title'].match(title)
        if match:
            day_date = date(int(match.group('year')),
                            MONTHS.index(match.group('month').lower()),
                            int(match.group('day')))
            if day_date < date.today():
                continue

            for game in sport['regex_game'].finditer(str(item.content)):
                game_time = time.strptime(
                    '%s %s EST' % (day_date, game.group('est_time')),
                    '%Y-%m-%d %I:%M %p %Z')
                #Skip games that occured more than 5 hours ago
                if time.localtime(time.mktime(game_time) +
                                  5 * 60 * 60) < time.time():
                    continue

                name = '%s @ %s' % (game.group('team_away').strip(),
                                    game.group('team_home').strip())

                streams = []
                for stream in sport['regex_stream'].finditer(
                        game.group('streams')):
                    mc.LogDebug(
                        'MYP2P::: "%s" - Stream "%s" (%s)' %
                        (name, stream.group('name'), stream.group('url')))
                    streams.append(stream.groupdict())
                stream_string = '\n'.join([
                    '%s\t%s' % (stream['name'], stream['url'])
                    for stream in streams
                ])

                gameItem = mc.ListItem(mc.ListItem.MEDIA_UNKNOWN)
                gameItem.SetLabel(name)
                gameItem.SetProperty('streams', stream_string)
                gamesItems.append(gameItem)

    gamesList.SetItems(gamesItems)
    mc.HideDialogWait()
Ejemplo n.º 25
0
    def fetch(self,
              url,
              force_update=False,
              offline=False,
              request_headers=None,
              referrer=None):
        """Return the feed at url.

        url - The URL of the feed.

        force_update=False - When True, update the cache whether the
                                           current contents have
                                           exceeded their time-to-live
                                           or not.

        offline=False - When True, only return data from the local
                                 cache and never access the remote
                                 URL.

        request_headers=None - Add addition request headers to request

        referrer=None - Added a referrer to request

        If there is data for that feed in the cache already, check
        the expiration date before accessing the server.  If the
        cached data has not expired, return it without accessing the
        server.

        In cases where the server is accessed, check for updates
        before deciding what to return.  If the server reports a
        status of 304, the previously cached content is returned.

        The cache is only updated if the server returns a status of
        200, to avoid holding redirected data in the cache.
        """
        logger.debug('url="%s"' % url)

        # Convert the URL to a value we can use
        # as a key for the storage backend.
        key = url
        if isinstance(key, unicode):
            key = key.encode('utf-8')

        modified = None
        etag = None
        now = time.time()

        cached_time, cached_content = self.storage.get(key, (None, None))

        # Offline mode support (no networked requests)
        # so return whatever we found in the storage.
        # If there is nothing in the storage, we'll be returning None.
        if offline:
            logger.debug('offline mode')
            return cached_content

        # Does the storage contain a version of the data
        # which is older than the time-to-live?
        logger.debug('cache modified time: %s' % str(cached_time))
        if cached_time is not None and not force_update:
            if self.time_to_live:
                age = now - cached_time
                if age <= self.time_to_live:
                    logger.debug('cache contents still valid')
                    return cached_content
                else:
                    logger.debug('cache contents older than TTL')
            else:
                logger.debug('no TTL value')

            # The cache is out of date, but we have
            # something.  Try to use the etag and modified_time
            # values from the cached content.
            etag = cached_content.get('etag')
            modified = cached_content.get('modified')
            logger.debug('cached etag=%s' % etag)
            logger.debug('cached modified=%s' % str(modified))
        else:
            logger.debug('nothing in the cache, or forcing update')

        # We know we need to fetch, so go ahead and do it.
        logger.debug('fetching...')
        parsed_result = feedparser.parse(url,
                                         agent=self.user_agent,
                                         modified=modified,
                                         etag=etag,
                                         referrer=referrer,
                                         request_headers=request_headers)

        status = parsed_result.get('status', None)
        logger.debug('HTTP status=%s' % status)
        if status == 304:
            # No new data, based on the etag or modified values.
            # We need to update the modified time in the
            # storage, though, so we know that what we have
            # stored is up to date.
            self.storage[key] = (now, cached_content)

            # Return the data from the cache, since
            # the parsed data will be empty.
            parsed_result = cached_content
        elif status == 200:
            # There is new content, so store it unless there was an error.
            error = parsed_result.get('bozo_exception')
            if not error:
                logger.debug('Updating stored data for %s' % url)
                self.storage[key] = (now, parsed_result)
            else:
                logger.warning('Not storing data with exception: %s', error)
        else:
            logger.warning('Not updating cache with HTTP status %s', status)

        return parsed_result
Ejemplo n.º 26
0
 def load(self, data):
     
     parsed = feedparser.parse(data)
     Newsfeed.__init__(self, parsed, self._none)
Ejemplo n.º 27
0
__author__ = 'Tramel Jones'
"""
2014
RSS Reader created with the intention of viewing Valve Software blog posts and other dev blogs with RSS.
"""

import feedparser.feedparser
d = feedparser.parse("http://blogs.valvesoftware.com/feed/")

for item in d.entries:
    print(item.link)
Ejemplo n.º 28
0
    def load(self, data):

        parsed = feedparser.parse(data)
        Newsfeed.__init__(self, parsed, self._none)
Ejemplo n.º 29
0
    def fetch(self, url, force_update=False, offline=False, request_headers=None, referrer=None, handlers=[]):
        """Return the feed at url.

        url - The URL of the feed.

        force_update=False - When True, update the cache whether the
                                           current contents have
                                           exceeded their time-to-live
                                           or not.

        offline=False - When True, only return data from the local
                                 cache and never access the remote
                                 URL.

        request_headers=None - Add addition request headers to request

        referrer=None - Added a referrer to request

        handlers=None - Urllib2 handlers

        If there is data for that feed in the cache already, check
        the expiration date before accessing the server.  If the
        cached data has not expired, return it without accessing the
        server.

        In cases where the server is accessed, check for updates
        before deciding what to return.  If the server reports a
        status of 304, the previously cached content is returned.

        The cache is only updated if the server returns a status of
        200, to avoid holding redirected data in the cache.
        """
        logger.debug('url="%s"' % url)

        # Convert the URL to a value we can use
        # as a key for the storage backend.
        key = url
        if isinstance(key, unicode):
            key = key.encode('utf-8')

        modified = None
        etag = None
        now = time.time()

        cached_time, cached_content = self.storage.get(key, (None, None))

        # Offline mode support (no networked requests)
        # so return whatever we found in the storage.
        # If there is nothing in the storage, we'll be returning None.
        if offline:
            logger.debug('offline mode')
            return cached_content

        # Does the storage contain a version of the data
        # which is older than the time-to-live?
        logger.debug('cache modified time: %s' % str(cached_time))
        if cached_time is not None and not force_update:
            if self.time_to_live:
                age = now - cached_time
                if age <= self.time_to_live:
                    logger.debug('cache contents still valid')
                    return cached_content
                else:
                    logger.debug('cache contents older than TTL')
            else:
                logger.debug('no TTL value')

            # The cache is out of date, but we have
            # something.  Try to use the etag and modified_time
            # values from the cached content.
            etag = cached_content.get('etag')
            modified = cached_content.get('modified')
            logger.debug('cached etag=%s' % etag)
            logger.debug('cached modified=%s' % str(modified))
        else:
            logger.debug('nothing in the cache, or forcing update')

        # We know we need to fetch, so go ahead and do it.
        logger.debug('fetching...')
        parsed_result = feedparser.parse(url,
                                         agent=self.user_agent,
                                         modified=modified,
                                         etag=etag,
                                         referrer=referrer,
                                         request_headers=request_headers,
                                         handlers = handlers)

        status = parsed_result.get('status', None)
        logger.debug('HTTP status=%s' % status)
        if status == 304:
            # No new data, based on the etag or modified values.
            # We need to update the modified time in the
            # storage, though, so we know that what we have
            # stored is up to date.
            self.storage[key] = (now, cached_content)

            # Return the data from the cache, since
            # the parsed data will be empty.
            parsed_result = cached_content
        elif status == 200:
            # There is new content, so store it unless there was an error.
            error = parsed_result.get('bozo_exception')
            if not error:
                logger.debug('Updating stored data for %s' % url)
                self.storage[key] = (now, parsed_result)
            else:
                logger.warning('Not storing data with exception: %s',
                               error)
        else:
            logger.warning('Not updating cache with HTTP status %s', status)

        return parsed_result
def getArticleSource(article):
	return cleanUpText(article['summary_detail']['base'])

def cleanUpText(text):
	"""
	Function handles encoding of xml file

	Xml usually based in as UTF-8. We want to return 
	Ascii version for python
	"""
	return text.encode('ascii','ignore')

db = Database()
db.connect()
for feed in rssFeeds:
	rss = feedparser.parse(feed) #RSS object

	numFound += len(rss['entries'])
	print("Found a total of "+str(numFound)+" articles from rss feed: "+str(feed))
	for entry in rss['entries']:
		outputDir = relativePath + buildArticlePath(entry)
		
		if not os.path.exists(outputDir):
			os.makedirs(outputDir)
		# Download the article
		link = entry['link']
		originalTitle = cleanUpText(entry['title'])
		
		formattedTitle = originalTitle.replace(' ','_').replace('/','-').replace("'",'')
		
		relFilePath = buildArticlePath(entry) + formattedTitle+".html"