def update(): flickr = FlickrClient(settings.FLICKR_API_KEY) # Preload the list of licenses licenses = licenses = flickr.photos.licenses.getInfo() licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"]) # Handle update by pages until we see photos we've already handled last_update_date = Photo.sync.get_last_update() page = 1 while True: log.debug("Fetching page %s of photos", page) resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken", per_page="500", page=str(page)) photos = resp["photos"] if page > photos["pages"]: log.debug("Ran out of photos; stopping.") break for photodict in photos["photo"]: timestamp = utils.parsedate(str(photodict["datetaken"])) if timestamp < last_update_date: log.debug("Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date) break photo_id = utils.safeint(photodict["id"]) license = licenses[photodict["license"]] secret = smart_unicode(photodict["secret"]) server = smart_unicode(photodict["server"]) _handle_photo(flickr, photo_id, secret, license, timestamp) page += 1
def _handle_photo(flickr, photo_id, secret, license, timestamp): info = flickr.photos.getInfo(photo_id=photo_id, secret=secret)["photo"] photo_id = str(photo_id) taken_by = smart_unicode(info["owner"]["username"]) url = "http://www.flickr.com/photos/%s/%s/" % (taken_by, photo_id) title = smart_unicode(info["title"]["_content"]) description = smart_unicode(info["description"]["_content"]) date_uploaded = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["posted"])) tags = _convert_tags(info["tags"]) log.debug("Handling photo: %r (taken %s)" % (title, timestamp)) try: photo = Photo.objects.get(url = url) photo.title = title photo.description = description photo.pub_date = date_uploaded photo.tags = tags photo.save() except Photo.DoesNotExist: photo = Photo.objects.create( title = title, url = url, description = description, pub_date = date_uploaded, tags = tags ) print "Added %s" % photo
def _tags_for_url(self, url): tags = set() xml = utils.getxml(url) for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= self.tag_usage_threshold: tag = slugify(smart_unicode(t.find("name").text)) tags.add(tag[:50]) return tags
tags.update(self._tags_for_url(url)) return tags def _tags_for_url(self, url): tags = set() try: xml = utils.getxml(url) except HttpLib2Error, e: if e.code == 408: return "" else: raise except SyntaxError: return "" for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= self.tag_usage_threshold: tag = slugify(smart_unicode(t.find("name").text)) tags.add(tag[:50]) return tags # Memoize tags to avoid unnecessary API calls. _tag_cache = {} _tags_for_url = memoize(_tags_for_url, _tag_cache, 1) def _handle_track(self, artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags): t, created = Track.objects.get_or_create( artist_name = artist_name, track_name = track_name, pub_date = timestamp,