def __call__(self): # Get the users most recent diggs user_url = 'http://services.digg.com/user/%s/diggs/?appkey=%s&count=%s' % (self.username, self.api_key, self.count) user_xml = utils.getxml(user_url) # Parse out the story_id and datetime diggs = [(i.get('story'), i.get('date')) for i in user_xml.getchildren()] # A list of we'll ultimately pass out stories = [] # Now loop through the diggs for story, date in diggs: # And pull information about the stories story_url = 'http://services.digg.com/story/%s/?appkey=%s' % (str(story), self.api_key) story_xml = utils.getxml(story_url) story_obj = story_xml # A dict to stuff all the good stuff in story_dict = { # Since the digg_date is expressed in epoch seconds, we can start like so... 'date': dateutil.parser.parse((time.ctime(float(date)))), } # Loop through the story node story_node = story_obj.getiterator('story') for ele in story_node: # Get the link link = smart_unicode(ele.get('link')) story_dict['url'] = link # Get the title title_node = ele.find('title') story_dict['title'] = smart_unicode(title_node.text) # Get the description description_node = ele.find('description') story_dict['description'] = smart_unicode(description_node.text) # Get the topic topic_node = ele.find('topic') story_dict['topic'] = smart_unicode(topic_node.get('name')) # Pass the dict out to our list stories.append(story_dict) return stories
def get_latest_data(self): self.xml = utils.getxml(self.feed_url) self.commit_list = [] self.entry_list = self.xml.getiterator('{http://www.w3.org/2005/Atom}entry') for entry in self.entry_list: title = entry.find('{http://www.w3.org/2005/Atom}title').text match = self.GITHUB_TITLE_REGEX.search(title) # If it doesn't match, it's one of the less important entries # like when you start watching somebody's repo. if not match: # And we can just skip those continue pub_date = self._extract_entry_pubdate(entry) html = entry.find('{http://www.w3.org/2005/Atom}content').text soup = BeautifulSoup(html) commits_html = soup.find('div', attrs={'class': 'commits'}).findAll('li') for commit_html in commits_html: # Create a dict to stuff the goodies entry_dict = { 'pub_date': pub_date, 'branch': smart_unicode(match.group('branch')), 'repository': smart_unicode(match.group('repository')), 'message': self._extract_commit_message(commit_html), 'url': self._extract_commit_url(commit_html), } # Add the dict to the entry list self.commit_list.append(entry_dict) # Pass out the commit_list return self.commit_list
def get_latest_data(self): # Get the users most recent diggs self.base_url = 'http://services.digg.com/1.0/endpoint?method=user.getDiggs&username=%s&count=%s' self.url = self.base_url % (self.username, self.count) self.xml = utils.getxml(self.url) # Parse out the story_id and datetime self.diggs = [(i.get('story'), i.get('date')) for i in self.xml.getchildren()] # A list of we'll ultimately pass out self.link_list = [] # Now loop through the diggs for story, date in self.diggs: # And pull information about the stories story_url = 'http://services.digg.com/2.0/story.getInfo?story_ids=%s' % str(story) story_json = utils.getjson(story_url) story_obj = story_json['stories'][0] # A dict to stuff all the good stuff in story_dict = { # Since the digg_date is expressed in epoch seconds, # we can start like so... 'date': utils.parsedate(time.ctime(float(date))), } # Get the link story_dict['url'] = smart_unicode(story_obj.get('url')) # Get the title story_dict['title'] = smart_unicode(story_obj.get('title')) story_dict['description'] = smart_unicode(story_obj.get('description')) # Get the topic story_dict['topic'] = smart_unicode(story_obj.get("topic").get('name')) # Pass the dict out to our list self.link_list.append(story_dict) return self.link_list
def get_latest_data(self): # Fetch the XML via web request self.url = 'http://www.flixster.com/api/v1/users/%s/ratings.rss' % self.username self.xml = utils.getxml(self.url) # Parse the XML down to the item entries self.channel = self.xml.find('channel') self.items = self.channel.findall('item') # Make a list to stuff all the cleaned data into. self.movies = [] # Loop through all the entries for item in self.items: # Dictionary where we'll stuff all the goodies movie_dict = {} # Get the name of the movie title = item.find('title').text movie_dict['title'] = smart_unicode(title) # Get the URL to the review url = item.find('link').text movie_dict['url'] = smart_unicode(url) # Get the start rating, translate it to a float. rating = item.find('rating').text movie_dict['rating'] = self._prep_rating(rating) # Get the pubdate pub_date = item.find('pubDate').text movie_dict['pub_date'] = utils.parsedate(pub_date) # Add it to the list self.movies.append(movie_dict) return self.movies
def _tags_for_url(self, url): tags = set() xml = utils.getxml(url) for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= self.tag_usage_threshold: tag = slugify(smart_unicode(t.find("name").text)) tags.add(tag[:50]) return tags
def _tags_for_url(self, url): tags = set() try: xml = utils.getxml(url) except HttpLib2Error, e: if e.code == 408: return "" else: raise
def __call__(self, **params): # Enforce Yahoo's "no calls quicker than every 1 second" rule delta = time.time() - DeliciousClient.lastcall if delta < 2: time.sleep(2 - delta) DeliciousClient.lastcall = time.time() url = ("https://api.del.icio.us/%s?" % self.method) + urllib.urlencode(params) xml = utils.getxml(url, username=self.username, password=self.password) return xml
def __call__(self): # Fetch the XML via web request url = "http://github.com/%s.atom" % self.username xml = utils.getxml(url) # pdb.set_trace() commits = [] GITHUB_TITLE_REGEX = re.compile(r"Schwanksta pushed to (?P<branch>(.*)) at (?P<repository>(.*))") # Loop through all the entries entries = list(xml.getiterator("{http://www.w3.org/2005/Atom}entry")) for entry in entries: # Grab the date pub_date = entry.find("{http://www.w3.org/2005/Atom}published").text # Grab the title title = entry.find("{http://www.w3.org/2005/Atom}title").text # Test it against our regex match = GITHUB_TITLE_REGEX.search(title) # If it doesn't match, it's one of the less important entries # like when you start watching somebody's repo. if not match: # And we can just skip those continue # Grab the HTML with the commits html = entry.find("{http://www.w3.org/2005/Atom}content").text soup = BeautifulSoup(html) commits_html = soup.find("div", attrs={"class": "commits"}).findAll("li") # Loop through the one-to-many commits for commit_html in commits_html: # Create a dict to stuff the goodies entry_dict = {} entry_dict["pub_date"] = dateutil.parser.parse(pub_date).strftime("%Y-%m-%d %H:%M:%S") # Add the matches to our dictionary entry_dict["branch"] = smart_unicode(match.group("branch")) entry_dict["repository"] = smart_unicode(match.group("repository")) # Add the others entry_dict["url"] = smart_unicode(commit_html.find("a")["href"]) entry_dict["message"] = smart_unicode(commit_html.find("blockquote").string.strip()) print entry_dict # Add the dict to the entry list commits.append(entry_dict) return commits
def sync(self): last_update_date = Shout.sync.get_last_update() logger.debug("Last update date: %s", last_update_date) xml = utils.getxml(RECENT_STATUSES_URL % self.username) for status in xml.getiterator("item"): message = status.find('title') message_text = smart_unicode(message.text) url = smart_unicode(status.find('link').text) # pubDate delivered as UTC timestamp = utils.parsedate(str(status.find('pubDate').text)) if not self._status_exists(url): self._handle_status(message_text, url, timestamp)
def get_latest_data(self): self.xml = utils.getxml(self.feed_url) commit_list = [] for link in self.xml.getiterator("{http://www.w3.org/2005/Atom}entry"): entry_dict = dict( pub_date = utils.parsedate(link.find('{http://www.w3.org/2005/Atom}published').text), message = self.prep_message(link.find('{http://www.w3.org/2005/Atom}title').text), branch = '', repository = 'openstreetmap', url = smart_unicode(link.find('{http://www.w3.org/2005/Atom}id').text) ) commit_list.append(entry_dict) return commit_list
def sync(self): last_update_date = Track.sync.get_last_update() xml = utils.getxml(RECENT_TRACKS_URL % self.username) for track in xml.getiterator("track"): artist = track.find('artist') artist_name = smart_unicode(artist.text) artist_mbid = artist.get('mbid') track_name = smart_unicode(track.find('name').text) track_mbid = smart_unicode(track.find('mbid').text) url = smart_unicode(track.find('url').text) timestamp = datetime.datetime.fromtimestamp(int(track.find('date').get('uts'))) if timestamp > last_update_date: tags = self._tags_for_track(artist_name, track_name) self._handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags)
def get_latest_data(self): self.link_list = [] self.xml = utils.getxml(self.url) for link in self.xml.getiterator("{http://www.w3.org/2005/Atom}entry"): title = smart_unicode(link.find('{http://www.w3.org/2005/Atom}title').text) url = smart_unicode(link.find('{http://www.w3.org/2005/Atom}link').get('href')) date = link.find('{http://www.w3.org/2005/Atom}updated').text date = utils.parsedate(date) d = dict( title=title, date=date, url=url, ) self.link_list.append(d) return self.link_list
def update(): last_update_date = Shout.sync.get_last_update() log.debug("Last update date: %s", last_update_date) xml = utils.getxml(RECENT_STATUSES_URL % settings.TWITTER_USER) for status in xml.getiterator("item"): message = status.find('title') message_text = smart_unicode(message.text) url = smart_unicode(status.find('link').text) # pubDate delivered as UTC timestamp = utils.parsedate(str(status.find('pubDate').text)) if not _status_exists(message_text, url, timestamp): _handle_status(message_text, url, timestamp)
def update(): last_update_date = Track.sync.get_last_update() log.debug("Last update date: %s", last_update_date) xml = utils.getxml(RECENT_TRACKS_URL % settings.LASTFM_USER) for track in xml.getiterator("track"): artist = track.find('artist') artist_name = smart_unicode(artist.text) artist_mbid = artist.get('mbid') track_name = smart_unicode(track.find('name').text) track_mbid = smart_unicode(track.find('mbid').text) url = smart_unicode(track.find('url').text) timestamp = datetime.datetime.fromtimestamp(int(track.find('date').get('uts'))) if timestamp > last_update_date: log.debug("Handling track: %r - %r", artist_name, track_name) tags = _tags_for_track(artist_name, track_name) _handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags)
def get_latest_data(self): self.location_list = [] self.xml = utils.getxml(self.url) for checkin in self.xml.getiterator("item"): title = smart_unicode(checkin.find('title').text) description = smart_unicode(checkin.find('description').text) url = smart_unicode(checkin.find('link').text) date = checkin.find('pubDate').text date = utils.parsedate(date) d = dict( title=title, description=description, date=date, url=url, ) self.location_list.append(d) return self.location_list
def get_latest_data(self): self.link_list = [] self.xml = utils.getxml("http://delicious.com/v2/rss/palewire") for link in self.xml.getiterator("item"): title = smart_unicode(link.find('title').text) description = smart_unicode(link.find('description').text) url = smart_unicode(link.find('link').text) date = link.find('pubDate').text date = utils.parsedate(date) d = dict( title=title, description=description, date=date, url=url, ) self.link_list.append(d) return self.link_list
def __call__(self): # Fetch the XML via web request url = 'http://readernaut.com/api/v1/xml/%s/books/' % self.username xml = utils.getxml(url) books = [] for book in xml.getchildren(): # Dictionary where we'll stuff all the goodies book_dict = {} # Get the date date = book.find('created').text book_dict['date'] = dateutil.parser.parse(date) # Step down the XML edition = book.find('book_edition') # Get the title title = edition.find('title').text book_dict['title'] = smart_unicode(title) # Get the ISBN isbn = edition.find('isbn').text book_dict['isbn'] = smart_unicode(isbn) # Get the authors as a text list authors = [] for author in edition.getiterator('authors'): name = getattr(author.find('author'), 'text', None) if name: authors.append(smart_unicode(name)) book_dict['authors'] = get_text_list(authors, 'and') # Get the link url = edition.find('permalink').text book_dict['url'] = smart_unicode(url) books.append(book_dict) return books
def update(): last_update_date = Track.sync.get_last_update() log.debug("Last update date: %s", last_update_date) xml = utils.getxml(BOOKMARKED_TRACKS_URL % settings.PANDORA_USER) for track in xml.getiterator("item"): artist = track.find("{http://musicbrainz.org/mm/mm-2.1#}Artist").find("{http://purl.org/dc/elements/1.1/}title") artist_name = smart_unicode(artist.text) track_name = smart_unicode( track.find("{http://musicbrainz.org/mm/mm-2.1#}Track").find("{http://purl.org/dc/elements/1.1/}title").text ) album_name = smart_unicode( track.find("{http://musicbrainz.org/mm/mm-2.1#}Album").find("{http://purl.org/dc/elements/1.1/}title").text ) url = smart_unicode(track.find("link").text) pdate = track.find("pubDate").text.split("-")[0].strip() timestamp = datetime.datetime.strptime(pdate, "%a, %d %b %Y %H:%M:%S") if timestamp > last_update_date: log.debug("Handling track: %r - %r", artist_name, track_name) _handle_track(artist_name, track_name, album_name, url, timestamp)
def get_latest_data(self): # Fetch the XML via web request url = 'https://www.goodreads.com/review/list_rss/%s?key=%s&shelf=read' % ( self.user_id, self.api_key ) xml = utils.getxml(url) books = [] for book in xml.getiterator("item"): # Dictionary where we'll stuff all the goodies book_dict = {} # Get the date date = book.find('pubDate').text book_dict['date'] = dateutil.parser.parse(date) # Get the title title = book.find('title').text book_dict['title'] = smart_unicode(title) # Get the ISBN isbn = book.find('isbn').text # If no ISBN, substitute the GoodReads id if not isbn: isbn = 'goodreads:%s' % book.find('book_id').text book_dict['isbn'] = smart_unicode(isbn) # Get the authors author = book.find('author_name').text book_dict['authors'] = smart_unicode(author) # Get the link url = book.find('guid').text book_dict['url'] = smart_unicode(url) books.append(book_dict) return books