def dump_albums(data): path = create_folder(BACKUP_DIR, 'facebook_app', 'albums') photo_queue = Queue.Queue(1500) workers = WorkerPool(Worker, photo_queue) workers.start() for album_id, album in data['albums'].iteritems(): album_path = create_folder(path, album['id']) downloaded_photos = os.listdir(album_path) for photo_id, photo in album['photos'].iteritems(): source_url = urlparse.urlparse(photo['source']) ext = re.match("^.*/([^/]+?(\.[^/]+)?)$", source_url.path).groups()[1] if not ext: ext = '' if photo['id'] not in downloaded_photos: photo_path = create_folder(album_path, photo['id']) photo_queue.put((dump_photo_set, (photo_path, photo, ext,), {})) else: # either thumbnail or photo is missing. redownloading set if not photo_exists(album_path, photo): photo_path = create_folder(album_path, photo['id']) photo_queue.put((dump_photo_set, (photo_path, photo, ext,), {})) else: pass # photo is already in the filesystem workers.stop()
def dump_photos(data): path = create_folder(BACKUP_DIR, 'facebook_app', 'photos') photo_queue = Queue.Queue(1500) workers = WorkerPool(Worker, photo_queue) workers.start() downloaded_photos = os.listdir(path) for photo in data['photos']: photo_path = create_folder(path, photo['id']) if not photo['id'] in downloaded_photos: photo_queue.put((dump_photo_set, (photo_path, photo, ".jpg",), {})) else: if not photo_exists(path, photo): photo_queue.put((dump_photo_set, (photo_path, photo, ".jpg",), {})) else: pass # photo is alread in the filesystem workers.stop()
def dump_albums(data): path = create_folder(BACKUP_DIR, 'picasaweb_app', 'albums') photo_queue = Queue.Queue(1500) workers = WorkerPool(Worker, photo_queue) workers.start() for album_id, album in data['albums'].iteritems(): album_path = create_folder(path, album['id']) downloaded_photos = os.listdir(album_path) for photo_id, photo in album['photos'].iteritems(): if photo_id not in downloaded_photos: photo_path = create_folder(album_path, photo_id) photo_queue.put((dump_photo_set, (photo_path, photo,),{})) else: if not photo_exists(album_path, photo): photo_path = create_folder(album_path, photo['id']) photo_queue.put((dump_photo_set, (photo_path, photo,), {})) else: pass workers.stop()
GOOGLE_OAUTH_CONSUMER_SECRET, str(access_token['access_token']), str(access_token['access_token_secret'])) except Exception, e: logger.debug('PicasaWeb auth: %r', e) raise e logger.debug('we got auth') try: profile = auth.get_profile() except Exception, e: logger.debug('error getting profile %r', e) raise e logger.debug('we got profile: %r', profile) data = {'profile': profile} picasa_job_queue = Queue.Queue(1500) workers = WorkerPool(Worker, picasa_job_queue) workers.start() logger.debug('we got auth') progress_monitor.update(10) logger.debug('getting albums') albums = auth.google_api('/data/feed/api/user/default', {'kind': 'album'}, photos.UserFeedFromString) logger.debug('albums xml: %r', albums.ToString()) data['albums'] = {} def func(hash_key, result_dict, client, album): album_photos = client.PhotoFeedFromString( urllib2.urlopen(album.GetPhotosUri()).read())
def run_backup(preference): logger.debug("running backup routine for facebook_app") logger.debug("preference: %r", preference) progress_monitor = get_progress_monitor("facebook_app") graph = facebook.GraphAPI(preference['tokens']) data = {'profile': graph.get_object('me')} facebook_job_queue = Queue.Queue(1500) logger.debug("starting workers") workers = WorkerPool(Worker, facebook_job_queue) workers.start() for k, v in preference['settings'].iteritems(): progress_monitor.update(10) logger.debug("processing %s", k) if v: if k == 'albums': data[k] = {} def func(hash_key, result_dict, client, album): logger.debug("fetching data for album %s", album['id']) photos = {} album_photos = [photo for photo in client.get_connections(album['id'], 'photos', limit=1000000)['data']] album_photos = sorted(album_photos, key=itemgetter('created_time'), reverse=True) for photo in album_photos: logger.debug("fetching comments for photo %s in album %s", photo['id'], album['id']) comments = [comment for comment in client.get_connections(photo['id'], 'comments', limit=1000000)['data']] photo['comments'] = sorted(comments, key=itemgetter('created_time')) photos[photo['id']] = photo album['photos'] = photos result_dict[hash_key][album['id']] = album for obj in graph.get_connections('me', 'albums', limit=1000000)['data']: logger.debug("queueing album %s", obj['id']) facebook_job_queue.put((func, (k, data, graph, obj,), {})) elif k == 'photos': data[k] = [] logger.debug("fetching data for tagged photos") photos = graph.get_connections('me', k, limit=1000000)['data'] def func(hash_key, result_dict, client, obj): logger.debug("fetching comments for tagged photo %s", obj['id']) comments = [comment for comment in client.get_connections(obj['id'], 'comments', limit=1000000)['data']] obj['comments'] = comments result_dict[hash_key].append(obj) for photo in photos: logger.debug("queueing comments for tagged photo %s", photo['id']) facebook_job_queue.put((func, (k, data, graph, photo), {})) else: data[k] = [] def func(hash_key, result_dict, client, id): logger.debug("fetching data for %s %s", hash_key, id) obj = client.get_object(id) if hash_key in ['statuses', 'events']: logger.debug("fetching comments for %s %s", hash_key, id) comments = [comment for comment in client.get_connections(id, 'comments', limit=1000000)['data']] obj['comment'] = sorted(comments, key=itemgetter('created_time')) result_dict[hash_key].append(obj) for obj in graph.get_connections('me', k, limit=1000000)['data']: logger.debug("queueing %s %s", k, obj['id']) facebook_job_queue.put((func, (k, data, graph, obj['id'],), {})) progress_monitor.update(10) workers.stop() if 'albums' in data: dump_albums(data) if 'photos' in data: dump_photos(data) dump_profile_pics(data) dump_json(data, progress_monitor.start_time) filename = "%s.json" % progress_monitor.start_time.strftime("%Y%m%d%H%M%S") add_to_archive("facebook_app", filename, progress_monitor.start_time) progress_monitor.complete() logger.debug("facebook_app backup done") return progress_monitor.start_time