def update(): flickr = FlickrClient(settings.FLICKR_API_KEY) # Preload the list of licenses licenses = licenses = flickr.photos.licenses.getInfo() licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"]) # Handle update by pages until we see photos we've already handled last_update_date = Photo.sync.get_last_update() page = 1 while True: log.debug("Fetching page %s of photos", page) resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken", per_page="500", page=str(page)) photos = resp["photos"] if page > photos["pages"]: log.debug("Ran out of photos; stopping.") break for photodict in photos["photo"]: timestamp = utils.parsedate(str(photodict["datetaken"])) if timestamp < last_update_date: log.debug("Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date) break photo_id = utils.safeint(photodict["id"]) license = licenses[photodict["license"]] secret = smart_unicode(photodict["secret"]) server = smart_unicode(photodict["server"]) _handle_photo(flickr, photo_id, secret, license, timestamp) page += 1
def get_latest_data(self): # Fetch the XML via web request self.url = 'http://www.flixster.com/api/v1/users/%s/ratings.rss' % self.username self.xml = utils.getxml(self.url) # Parse the XML down to the item entries self.channel = self.xml.find('channel') self.items = self.channel.findall('item') # Make a list to stuff all the cleaned data into. self.movies = [] # Loop through all the entries for item in self.items: # Dictionary where we'll stuff all the goodies movie_dict = {} # Get the name of the movie title = item.find('title').text movie_dict['title'] = smart_unicode(title) # Get the URL to the review url = item.find('link').text movie_dict['url'] = smart_unicode(url) # Get the start rating, translate it to a float. rating = item.find('rating').text movie_dict['rating'] = self._prep_rating(rating) # Get the pubdate pub_date = item.find('pubDate').text movie_dict['pub_date'] = utils.parsedate(pub_date) # Add it to the list self.movies.append(movie_dict) return self.movies
def get_latest_data(self): # Get the users most recent diggs self.base_url = 'http://services.digg.com/1.0/endpoint?method=user.getDiggs&username=%s&count=%s' self.url = self.base_url % (self.username, self.count) self.xml = utils.getxml(self.url) # Parse out the story_id and datetime self.diggs = [(i.get('story'), i.get('date')) for i in self.xml.getchildren()] # A list of we'll ultimately pass out self.link_list = [] # Now loop through the diggs for story, date in self.diggs: # And pull information about the stories story_url = 'http://services.digg.com/2.0/story.getInfo?story_ids=%s' % str(story) story_json = utils.getjson(story_url) story_obj = story_json['stories'][0] # A dict to stuff all the good stuff in story_dict = { # Since the digg_date is expressed in epoch seconds, # we can start like so... 'date': utils.parsedate(time.ctime(float(date))), } # Get the link story_dict['url'] = smart_unicode(story_obj.get('url')) # Get the title story_dict['title'] = smart_unicode(story_obj.get('title')) story_dict['description'] = smart_unicode(story_obj.get('description')) # Get the topic story_dict['topic'] = smart_unicode(story_obj.get("topic").get('name')) # Pass the dict out to our list self.link_list.append(story_dict) return self.link_list
def sync(self): last_update_date = Shout.sync.get_last_update() logger.debug("Last update date: %s", last_update_date) xml = utils.getxml(RECENT_STATUSES_URL % self.username) for status in xml.getiterator("item"): message = status.find('title') message_text = smart_unicode(message.text) url = smart_unicode(status.find('link').text) # pubDate delivered as UTC timestamp = utils.parsedate(str(status.find('pubDate').text)) if not self._status_exists(url): self._handle_status(message_text, url, timestamp)
def get_latest_data(self): self.location_list = [] self.json = utils.getjson(self.url) self.beer_list = [] for b in self.json['response']['checkins']['items']: d = dict( title=b['beer']['beer_name'], brewery=b['brewery']['brewery_name'], pub_date=utils.parsedate(b['created_at']), url='https://untappd.com/user/palewire/checkin/%s' % b['checkin_id'], ) self.beer_list.append(d) return self.beer_list
def get_latest_data(self): self.xml = utils.getxml(self.feed_url) commit_list = [] for link in self.xml.getiterator("{http://www.w3.org/2005/Atom}entry"): entry_dict = dict( pub_date = utils.parsedate(link.find('{http://www.w3.org/2005/Atom}published').text), message = self.prep_message(link.find('{http://www.w3.org/2005/Atom}title').text), branch = '', repository = 'openstreetmap', url = smart_unicode(link.find('{http://www.w3.org/2005/Atom}id').text) ) commit_list.append(entry_dict) return commit_list
def get_latest_data(self): self.location_list = [] self.json = utils.getjson(self.url) self.beer_list = [] for b in self.json['results']: d = dict( title=b['beer_name'], brewery=b['brewery_name'], pub_date=utils.parsedate(b['created_at']), url=b['checkin_link'], ) self.beer_list.append(d) return self.beer_list
def get_latest_data(self): self.link_list = [] self.xml = utils.getxml(self.url) for link in self.xml.getiterator("{http://www.w3.org/2005/Atom}entry"): title = smart_unicode(link.find('{http://www.w3.org/2005/Atom}title').text) url = smart_unicode(link.find('{http://www.w3.org/2005/Atom}link').get('href')) date = link.find('{http://www.w3.org/2005/Atom}updated').text date = utils.parsedate(date) d = dict( title=title, date=date, url=url, ) self.link_list.append(d) return self.link_list
def update(): last_update_date = Shout.sync.get_last_update() log.debug("Last update date: %s", last_update_date) xml = utils.getxml(RECENT_STATUSES_URL % settings.TWITTER_USER) for status in xml.getiterator("item"): message = status.find('title') message_text = smart_unicode(message.text) url = smart_unicode(status.find('link').text) # pubDate delivered as UTC timestamp = utils.parsedate(str(status.find('pubDate').text)) if not _status_exists(message_text, url, timestamp): _handle_status(message_text, url, timestamp)
def update(): """ Updates Delicious data and syncs it to the Link model. """ delicious = DeliciousClient(settings.DELICIOUS_USER, settings.DELICIOUS_PASSWORD) # Check to see if we need an update. last_update_date = Link.sync.get_last_update() for datenode in reversed(list(delicious.posts.dates().getiterator('date'))): dt = utils.parsedate(datenode.get("date")) # If the date in the record is the same or newer than the date of the last update.. print dt.date(), last_update_date.date() if dt.date() >= last_update_date.date(): # ... pass the data along. _update_bookmarks_from_date(delicious, dt)
def fetch(self, cat): params = dict( count=100, cat=cat, ) data = utils.getjson(self.URL % params) headline_list = [ dict( title=i['title'].replace("(%s)" % i['source'], "").strip(), link=i['url'], description=i['summary'], pub_date=utils.parsedate(i['publish_date']), source=i['source'], ) for i in data['articles'] ] headline_list = [i for i in headline_list if len(i['title']) > 1] return [i for i in headline_list if i['title'][-1] == '?']
def get_latest_data(self): self.link_list = [] self.xml = utils.getxml("http://delicious.com/v2/rss/palewire") for link in self.xml.getiterator("item"): title = smart_unicode(link.find('title').text) description = smart_unicode(link.find('description').text) url = smart_unicode(link.find('link').text) date = link.find('pubDate').text date = utils.parsedate(date) d = dict( title=title, description=description, date=date, url=url, ) self.link_list.append(d) return self.link_list
def get_latest_data(self): self.location_list = [] self.xml = utils.getxml(self.url) for checkin in self.xml.getiterator("item"): title = smart_unicode(checkin.find('title').text) description = smart_unicode(checkin.find('description').text) url = smart_unicode(checkin.find('link').text) date = checkin.find('pubDate').text date = utils.parsedate(date) d = dict( title=title, description=description, date=date, url=url, ) self.location_list.append(d) return self.location_list
def sync(self): last_update_date = Shout.sync.get_last_update() logger.debug("Last update date: %s", last_update_date) api = twitter.Api( consumer_key=settings.TWITTER_CONSUMER_KEY, consumer_secret=settings.TWITTER_CONSUMER_SECRET, access_token_key=settings.TWITTER_ACCESS_TOKEN_KEY, access_token_secret=settings.TWITTER_ACCESS_TOKEN_SECRET ) for status in api.GetUserTimeline(settings.TWITTER_USER): message_text = smart_unicode(status.text) url = smart_unicode('https://twitter.com/%s/status/%s' % ( settings.TWITTER_USER, status.id )) # pubDate delivered as UTC timestamp = utils.parsedate(status.created_at) if not self._status_exists(url): self._handle_status(message_text, url, timestamp)
def _handle_bookmark(info): """ Accept a data dictionary drawn from the Delicious API and syncs it to the database. """ try: # Just test the URL in case it's already been logged by another bookmarking service like Delicious. l = Link.objects.get(url=info['href']) # And just quit out silently if it already exists. log.debug("Link already exists for %s" % info["description"]) except Link.DoesNotExist: # If it doesn't exist, add it fresh. log.debug("Adding link to %s" % info["description"]) l = Link( url = info['href'], title = info['description'], description = info.get('extended', ''), pub_date = utils.parsedate(str(info['time'])), tags = info['tag'] ) l.save()