def produce_entries(self): """ Filter entries from a feed using the regex map, use the feed normalizer to produce FeedEntryDict objects. """ # Use the cache to get the feed for filtering. feed_data = feedparser.parse(self.feed_uri) # Build the output feed's normalized metadata self.FEED_META = normalize_feed_meta(feed_data, self.date_fmt) # Now, apply the regex map to filter each incoming entry. entries_filtered = [] for entry in feed_data.entries: # Initially assume the entry is okay for inclusion. ok_include = True # Iterate through each entry key and regex pair. for k,r in self.filter_re.items(): # The first time a field of the entry fails to match # the regex map criteria, reject it for inclusion. if not (entry.has_key(k) and r.match(entry[k])): ok_include = False break # Finally, if the entry passes all the tests, include it. if ok_include: entries_filtered.append(entry) # Normalize all the filtered entries entries = normalize_entries(entries_filtered) for entry in entries: entry.date_fmt = self.date_fmt return entries
def produce_entries(self): """ Get a feed, attempt to parse out hCalendar content and add mod_event metadata based on it. """ # Grab and parse the feed feed = feedparser.parse(self.main_feed) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) # Run through all the normalized entries... hp = HCalendarParser() entries = normalize_entries(feed.entries) for entry in entries: events = hp.parse(entry.data['summary']) if events: event = events[0] if 'dtstart' in event: dtstart = event.decoded('dtstart') entry.data['ev_startdate'] = \ dtstart.strftime('%Y-%m-%dT%H:%M:%SZ') if 'dtend' in event: dtend = event.decoded('dtend') entry.data['ev_enddate'] = \ dtend.strftime('%Y-%m-%dT%H:%M:%SZ') return entries
def produce_entries(self): """ Filter entries from a feed using the regex map, use the feed normalizer to produce FeedEntryDict objects. """ # If this hasn't already been done, filter aggregator entries. if len(self.entries_filtered) < 1: self.filter_aggregator_entries() # Normalize all the filtered entries entries = normalize_entries(self.entries_filtered) for e in entries: e.date_fmt = self.date_fmt return entries
def produce_entries(self): """ Use FeedNormalizer to get feed entries, then merge the lists together. """ entries = [] # Iterate and gather normalized entries for each feed. for feed_uri in self.feed_uris: # Grab and parse the feed feed_data = feedparser.parse(HTTPCache(feed_uri).content()) # Append the list of normalized entries onto merged list. curr_entries = normalize_entries(feed_data.entries) for e in curr_entries: if self.INCLUDE_TITLE: e['title'] = "["+ feed_data.feed.title + "] " + \ e.data['title'] entries.extend(curr_entries) return entries
def produce_entries(self): """ Use FeedNormalizer to get feed entries, then merge the lists together. """ # Grab and parse the feed feed = feedparser.parse(HTTPCache(self.main_feed).content()) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) self.FEED_META['feed.title'] += ' (with related links)' # Normalize entries from the feed entries = normalize_entries(feed.entries) # Run through all the normalized entries... for e in entries: # Perform a search on the entry title, extract the items result = self.technorati_search(e['title']) items = [x for x in result if x._name == 'item'] # Use each search result item to populate the templates. insert_items = [ self.INSERT_ITEM_TMPL % { 'weblog.name': i.weblog.name, 'weblog.url': i.weblog.url, 'title': i.title, 'permalink': i.permalink } for i in items ] insert_out = self.INSERT_TMPL % '\n'.join(insert_items) # Append the rendered search results onto the entry summary. e.data['summary'] += insert_out.decode('utf-8', 'ignore') return entries
def produce_entries(self): """ Use FeedNormalizer to get feed entries, then merge the lists together. """ # Grab and parse the feed feed = feedparser.parse(HTTPCache(self.main_feed).content()) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) self.FEED_META['feed.title'] += ' (with Amazon items)' # Normalize entries from the feed entries = normalize_entries(feed.entries) # Run through all the normalized entries... for e in entries: # Perform a search on the entry title, extract the items result = self.amazon_search(e['summary']) items = [x for x in result.Items if 'Item' in x._name] # Use each search result item to populate the templates. insert_items = [ self.INSERT_ITEM_TMPL % { 'title': i.ItemAttributes.Title, 'url': i.DetailPageURL, 'img': i.SmallImage.URL } for i in items[:self.MAX_ITEMS] ] insert_out = self.INSERT_TMPL % '\n'.join(insert_items) # Append the rendered search results onto the entry summary. e.data['summary'] += insert_out.decode('utf-8', 'ignore') return entries
def produce_entries(self): """ Normalize the source feed, insert del.icio.us daily link recaps. """ # Grab and parse the feed feed = feedparser.parse(HTTPCache(self.main_feed).content()) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) self.FEED_META['feed.title'] += ' (with del.icio.us links)' # Normalize entries from the feed entries = normalize_entries(feed.entries) # Iterate through a number of past days' links for n in range(self.NUM_DAYS): # Calculate and format date for this query post_secs = time.time() - ((n + 1) * 24 * 60 * 60) post_time = time.localtime(post_secs) post_dt = time.strftime('%Y-%m-%d', post_time) # Prepare for Basic Authentication in calling del API auth = urllib2.HTTPBasicAuthHandler() auth.add_password('del.icio.us API', 'del.icio.us', self.DEL_USER, self.DEL_PASSWD) urllib2.install_opener(urllib2.build_opener(auth)) # Build del API URL, execute the query, and parse response. url = self.DEL_API_URL % post_dt data = HTTPCache(url).content() doc = xmltramp.parse(data) # Skip this day if no posts resulted from the query if not len(doc) > 0: continue # Iterate through all posts retrieved, build content for entry. post_out = [] for post in doc: # Run through post tags, render links with template. tags_out = [ self.DEL_TAG_TMPL % { 'tag': t, 'href': 'http://del.icio.us/%s/%s' % (self.DEL_USER, t) } for t in post("tag").split() ] # Build content for this link posting using template. try: extended = post('extended') except: extended = '' post_out.append( self.DEL_LINK_TMPL % { 'href': post('href'), 'description': post('description'), 'extended': extended, 'tags': ''.join(tags_out) }) # Construct and append a new feed entry based on the day's links new_entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={ 'title' : 'del.icio.us links on %s' % post_dt, 'issued' : post_secs, 'modified' : post_secs, 'link' : 'http://del.icio.us/%s#%s' % \ (self.DEL_USER, post_dt), 'summary' : self.DEL_ENTRY_TMPL % "\n".join(post_out) }) entries.append(new_entry) # Pause, because http://del.icio.us/doc/api says so. time.sleep(1) # Return the list of entries built return entries
def produce_entries(self): """ Normalize the source feed, insert del.icio.us daily link recaps. """ # Grab and parse the feed feed = feedparser.parse(HTTPCache(self.main_feed).content()) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) self.FEED_META['feed.title'] += ' (with del.icio.us links)' # Normalize entries from the feed entries = normalize_entries(feed.entries) # Iterate through a number of past days' links for n in range(self.NUM_DAYS): # Calculate and format date for this query post_secs = time.time() - ( (n+1) * 24 * 60 * 60 ) post_time = time.localtime(post_secs) post_dt = time.strftime('%Y-%m-%d', post_time) # Prepare for Basic Authentication in calling del API auth = urllib2.HTTPBasicAuthHandler() auth.add_password('del.icio.us API', 'del.icio.us', self.DEL_USER, self.DEL_PASSWD) urllib2.install_opener(urllib2.build_opener(auth)) # Build del API URL, execute the query, and parse response. url = self.DEL_API_URL % post_dt data = HTTPCache(url).content() doc = xmltramp.parse(data) # Skip this day if no posts resulted from the query if not len(doc) > 0: continue # Iterate through all posts retrieved, build content for entry. post_out = [] for post in doc: # Run through post tags, render links with template. tags_out = [ self.DEL_TAG_TMPL % { 'tag' : t, 'href' : 'http://del.icio.us/%s/%s' % (self.DEL_USER, t) } for t in post("tag").split() ] # Build content for this link posting using template. try: extended = post('extended') except: extended = '' post_out.append(self.DEL_LINK_TMPL % { 'href' : post('href'), 'description' : post('description'), 'extended' : extended, 'tags' : ''.join(tags_out) }) # Construct and append a new feed entry based on the day's links new_entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={ 'title' : 'del.icio.us links on %s' % post_dt, 'issued' : post_secs, 'modified' : post_secs, 'link' : 'http://del.icio.us/%s#%s' % \ (self.DEL_USER, post_dt), 'summary' : self.DEL_ENTRY_TMPL % "\n".join(post_out) }) entries.append(new_entry) # Pause, because http://del.icio.us/doc/api says so. time.sleep(1) # Return the list of entries built return entries