def update(): flickr = FlickrClient(settings.FLICKR_API_KEY) # Preload the list of licenses licenses = licenses = flickr.photos.licenses.getInfo() licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"]) # Handle update by pages until we see photos we've already handled last_update_date = Item.objects.get_last_update_of_model(Photo) page = 1 while True: log.debug("Fetching page %s of photos", page) resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken", per_page="500", page=str(page)) photos = resp["photos"] if page > photos["pages"]: log.debug("Ran out of photos; stopping.") break for photodict in photos["photo"]: timestamp = utils.parsedate(str(photodict["datetaken"])) if timestamp < last_update_date: log.debug("Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date) break photo_id = utils.safeint(photodict["id"]) license = licenses[photodict["license"]] secret = smart_unicode(photodict["secret"]) server = smart_unicode(photodict["server"]) _handle_photo(flickr, photo_id, secret, license, timestamp) page += 1
def _tags_for_track(artist_name, track_name): """ Get the top tags for a track. Also fetches tags for the artist. Only includes tracks that break a certain threshold of usage, defined by settings.LASTFM_TAG_USAGE_THRESHOLD (which defaults to 15). """ urls = [ ARTIST_TAGS_URL % (urllib.quote(artist_name)), TRACK_TAGS_URL % (urllib.quote(artist_name), urllib.quote(track_name)), ] tags = set() for url in urls: log.debug("Fetching tags from %r", url) try: xml = utils.getxml(url) except HttpLib2Error, e: if e.code == 408: return "" else: raise for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= getattr(settings, "LASTFM_TAG_USAGE_THRESHOLD", 15): tags.add(slugify(smart_unicode(t.find("name").text)))
def _handle_photo(flickr, photo_id, secret, license, timestamp): info = flickr.photos.getInfo(photo_id=photo_id, secret=secret)["photo"] server_id = utils.safeint(info["server"]) taken_by = smart_unicode(info["owner"]["username"]) title = smart_unicode(info["title"]["_content"]) description = smart_unicode(info["description"]["_content"]) comment_count = utils.safeint(info["comments"]["_content"]) date_uploaded = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["posted"])) date_updated = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["lastupdate"])) log.debug("Handling photo: %r (taken %s)" % (title, timestamp)) photo, created = Photo.objects.get_or_create( photo_id = str(photo_id), defaults = dict( server_id = server_id, secret = secret, taken_by = taken_by, cc_license = license, title = title, description = description, comment_count = comment_count, date_uploaded = date_uploaded, date_updated = date_updated, ) ) if created: photo.exif = _convert_exif(flickr.photos.getExif(photo_id=photo_id, secret=secret)) else: photo.server_id = server_id photo.secret = secret photo.taken_by = taken_by photo.cc_license = license photo.title = title photo.description = description photo.comment_count = comment_count photo.date_uploaded = date_uploaded photo.date_updated = date_updated photo.save() return Item.objects.create_or_update( instance = photo, timestamp = timestamp, tags = _convert_tags(info["tags"]), source = __name__, )