photo_album['count'] = int(album.numphotos.text) logger.debug('album: %r', photo_album) data['albums'][album.gphoto_id.text] = photo_album picasa_job_queue.put((func, (photo_album['id'], data, photos, album,), {})) progress_monitor.update(10) workers.stop() dump_albums(data) dump_json(data, progress_monitor.start_time) filename = "%s.json" % progress_monitor.start_time.strftime("%Y%m%d%H%M%S") add_to_archive("picasaweb_app", filename, progress_monitor.start_time) progress_monitor.complete() logger.debug("picasaweb_app backup done") return progress_monitor.start_time def dump_json(data, backup_time): path = create_folder(BACKUP_DIR, 'picasaweb_app') fd = open(os.path.join(path, '%s.json' % backup_time.strftime('%Y%m%d%H%M%S')), 'w') fd.write(json.dumps(data)) fd.close() def dump_albums(data):
def run_backup(preference): logger.debug("running backup routine for facebook_app") logger.debug("preference: %r", preference) progress_monitor = get_progress_monitor("facebook_app") graph = facebook.GraphAPI(preference['tokens']) data = {'profile': graph.get_object('me')} facebook_job_queue = Queue.Queue(1500) logger.debug("starting workers") workers = WorkerPool(Worker, facebook_job_queue) workers.start() for k, v in preference['settings'].iteritems(): progress_monitor.update(10) logger.debug("processing %s", k) if v: if k == 'albums': data[k] = {} def func(hash_key, result_dict, client, album): logger.debug("fetching data for album %s", album['id']) photos = {} album_photos = [photo for photo in client.get_connections(album['id'], 'photos', limit=1000000)['data']] album_photos = sorted(album_photos, key=itemgetter('created_time'), reverse=True) for photo in album_photos: logger.debug("fetching comments for photo %s in album %s", photo['id'], album['id']) comments = [comment for comment in client.get_connections(photo['id'], 'comments', limit=1000000)['data']] photo['comments'] = sorted(comments, key=itemgetter('created_time')) photos[photo['id']] = photo album['photos'] = photos result_dict[hash_key][album['id']] = album for obj in graph.get_connections('me', 'albums', limit=1000000)['data']: logger.debug("queueing album %s", obj['id']) facebook_job_queue.put((func, (k, data, graph, obj,), {})) elif k == 'photos': data[k] = [] logger.debug("fetching data for tagged photos") photos = graph.get_connections('me', k, limit=1000000)['data'] def func(hash_key, result_dict, client, obj): logger.debug("fetching comments for tagged photo %s", obj['id']) comments = [comment for comment in client.get_connections(obj['id'], 'comments', limit=1000000)['data']] obj['comments'] = comments result_dict[hash_key].append(obj) for photo in photos: logger.debug("queueing comments for tagged photo %s", photo['id']) facebook_job_queue.put((func, (k, data, graph, photo), {})) else: data[k] = [] def func(hash_key, result_dict, client, id): logger.debug("fetching data for %s %s", hash_key, id) obj = client.get_object(id) if hash_key in ['statuses', 'events']: logger.debug("fetching comments for %s %s", hash_key, id) comments = [comment for comment in client.get_connections(id, 'comments', limit=1000000)['data']] obj['comment'] = sorted(comments, key=itemgetter('created_time')) result_dict[hash_key].append(obj) for obj in graph.get_connections('me', k, limit=1000000)['data']: logger.debug("queueing %s %s", k, obj['id']) facebook_job_queue.put((func, (k, data, graph, obj['id'],), {})) progress_monitor.update(10) workers.stop() if 'albums' in data: dump_albums(data) if 'photos' in data: dump_photos(data) dump_profile_pics(data) dump_json(data, progress_monitor.start_time) filename = "%s.json" % progress_monitor.start_time.strftime("%Y%m%d%H%M%S") add_to_archive("facebook_app", filename, progress_monitor.start_time) progress_monitor.complete() logger.debug("facebook_app backup done") return progress_monitor.start_time