def addRSS(self): #Indexes RSS data by item URL tc = TrackingChannel() StartDate = self.eventdate - datetime.timedelta(days=3) EndDate = self.eventdate + datetime.timedelta(days=7) #Returns the RSSParser instance used, which can usually be ignored tc.parse(self.createURL(StartDate,EndDate)) RSS10_TITLE = (ns.rss10, 'title') RSS10_DESC = (ns.rss10, 'description') #You can also use tc.keys() items = tc.listItems() for item in items: #Each item is a (url, order_index) tuple url = item[0] #Get all the data for the item as a Python dictionary item_data = tc.getItem(item) title = item_data.get(RSS10_TITLE, "(none)") desc = item_data.get(RSS10_DESC, "(none)").replace("<br/>","").replace("\n","").replace("\r","").replace(" "," ") for q in self.query.split(): if(title.lower().find(q.lower()) >= 0 or desc.lower().find(q.lower())): if(len(self.results) <= self.rpp): self.results.append(SearchResult(title.decode("utf-8"), url.decode("utf-8"), desc.decode("utf-8"))) break
def get_hackaday_feed(): valid = [] tc = TrackingChannel() tc.parse("http://hackaday.com/feed/") items = tc.listItems() for i in range(len(items)): entry = {} item = tc.getItem(items[i]) entry['title'] = cut_title(item.get((ns.rss10, "title"))) entry['description'] = item.get((ns.rss10, "description")) entry['link'] = item.get((ns.rss10, "link")) encoded = item.get((u'http://purl.org/rss/1.0/modules/content/', u'encoded')) try: img_tags = images_from_html(encoded) img_src = img_tags[0]['src'] if not DEBUG: thumb = download_and_thumbnail(img_src) if thumb == None: continue entry['img'] = thumb #entry.description = remove_html_tags(entry.description) valid.append(entry) except IOError: # don't care about links with pic we can't thumbnail continue return valid
def getRSSData(stationCode=10326): """acquires rss data from wunderground and parses it for current weather conditions at stationCode""" """stationCode for Graz: 11240""" from RSS import ns, CollectionChannel, TrackingChannel #Create a tracking channel, which is a data structure that #Indexes RSS data by item URL tc = TrackingChannel() #Returns the RSSParser instance used, which can usually be ignored tc.parse("http://rss.wunderground.com/auto/rss_full/global/stations/%s.xml" % stationCode) RSS10_TITLE = (ns.rss10, 'title') RSS10_DESC = (ns.rss10, 'description') #You can also use tc.keys() items = tc.listItems() item = items[0] #Each item is a (url, order_index) tuple url = item[0] #Get all the data for the item as a Python dictionary item_data = tc.getItem(item) title = item_data.get(RSS10_TITLE, "(none)") description = item_data.get(RSS10_DESC, "(none)") #print "Title:", title #print "Description:", item_data.get(RSS10_DESC, "(none)") if title.find("Current Conditions") >= 0: valueDict = parseCurrentCond(description) return valueDict
def getRSSData(stationCode=10326): """acquires rss data from wunderground and parses it for current weather conditions at stationCode""" """stationCode for Graz: 11240""" from RSS import ns, CollectionChannel, TrackingChannel #Create a tracking channel, which is a data structure that #Indexes RSS data by item URL tc = TrackingChannel() #Returns the RSSParser instance used, which can usually be ignored tc.parse( "http://rss.wunderground.com/auto/rss_full/global/stations/%s.xml" % stationCode) RSS10_TITLE = (ns.rss10, 'title') RSS10_DESC = (ns.rss10, 'description') #You can also use tc.keys() items = tc.listItems() item = items[0] #Each item is a (url, order_index) tuple url = item[0] #Get all the data for the item as a Python dictionary item_data = tc.getItem(item) title = item_data.get(RSS10_TITLE, "(none)") description = item_data.get(RSS10_DESC, "(none)") #print "Title:", title #print "Description:", item_data.get(RSS10_DESC, "(none)") if title.find("Current Conditions") >= 0: valueDict = parseCurrentCond(description) return valueDict
#twitter for t in twitter: created = t['created_at'] title = MySQLdb.escape_string(t['text']) summary = '' link = '' image = '' source_id = str(t['id']) sql = "INSERT INTO streams (type, source_id, title, image, link, summary, created) VALUES ('tweet', '"+source_id+"', '"+title+"', '"+image+"', '"+link+"', '"+summary+"', '"+created+"')" cur.execute(sql) for item in items: #Each item is a (url, order_index) tuple link = MySQLdb.escape_string(item[0]) #Get all the data for the item as a Python dictionary item_data = tc.getItem(item) title = MySQLdb.escape_string(item_data.get(RSS10_TITLE) ) #process summary summary = item_data.get(RSS10_DESC) parser = PotatoHTMLParser() parser.feed(summary) summary = [] for s in parser.summary: summary.append(s) if (s[-1] == "."): break summary = MySQLdb.escape_string(" ".join(summary) ) created = item_data.get(RSS10_PUBDATE) #process image image = []