Exemple #1
0
def dump_albums(data):
    path = create_folder(BACKUP_DIR, 'facebook_app', 'albums')

    photo_queue = Queue.Queue(1500)

    workers = WorkerPool(Worker, photo_queue)
    workers.start()

    for album_id, album in data['albums'].iteritems():
        album_path = create_folder(path, album['id'])

        downloaded_photos = os.listdir(album_path)
        for photo_id, photo in album['photos'].iteritems():
            source_url = urlparse.urlparse(photo['source'])
            ext = re.match("^.*/([^/]+?(\.[^/]+)?)$", source_url.path).groups()[1]
            if not ext:
                ext = ''

            if photo['id'] not in downloaded_photos:
                photo_path = create_folder(album_path, photo['id'])
                photo_queue.put((dump_photo_set, (photo_path, photo, ext,), {}))
            else:
                # either thumbnail or photo is missing. redownloading set
                if not photo_exists(album_path, photo):
                    photo_path = create_folder(album_path, photo['id'])
                    photo_queue.put((dump_photo_set, (photo_path, photo, ext,), {}))
                else:
                    pass # photo is already in the filesystem

    workers.stop()
Exemple #2
0
def dump_photos(data):
    path = create_folder(BACKUP_DIR, 'facebook_app', 'photos')
    photo_queue = Queue.Queue(1500)
    workers = WorkerPool(Worker, photo_queue)
    workers.start()

    downloaded_photos = os.listdir(path)

    for photo in data['photos']:
        photo_path = create_folder(path, photo['id'])

        if not photo['id'] in downloaded_photos:
            photo_queue.put((dump_photo_set, (photo_path, photo, ".jpg",), {}))
        else:
            if not photo_exists(path, photo):
                photo_queue.put((dump_photo_set, (photo_path, photo, ".jpg",), {}))
            else:
                pass # photo is alread in the filesystem

    workers.stop()
Exemple #3
0
def dump_albums(data):
    path = create_folder(BACKUP_DIR, 'picasaweb_app', 'albums')

    photo_queue = Queue.Queue(1500)

    workers = WorkerPool(Worker, photo_queue)
    workers.start()

    for album_id, album in data['albums'].iteritems():
        album_path = create_folder(path, album['id'])

        downloaded_photos = os.listdir(album_path)

        for photo_id, photo in album['photos'].iteritems():
            if photo_id not in downloaded_photos:
                photo_path = create_folder(album_path, photo_id)
                photo_queue.put((dump_photo_set, (photo_path, photo,),{}))
            else:
                if not photo_exists(album_path, photo):
                    photo_path = create_folder(album_path, photo['id'])
                    photo_queue.put((dump_photo_set, (photo_path, photo,), {}))
                else:
                    pass
    workers.stop()
Exemple #4
0
                GOOGLE_OAUTH_CONSUMER_SECRET,
                str(access_token['access_token']),
                str(access_token['access_token_secret']))
    except Exception, e:
        logger.debug('PicasaWeb auth: %r', e)
        raise e
    logger.debug('we got auth')
    try:
        profile = auth.get_profile()
    except Exception, e:
        logger.debug('error getting profile %r', e)
        raise e
    logger.debug('we got profile: %r', profile)    
    data = {'profile': profile}
    picasa_job_queue = Queue.Queue(1500)
    workers = WorkerPool(Worker, picasa_job_queue)
    workers.start()

    logger.debug('we got auth')
    progress_monitor.update(10)

    logger.debug('getting albums')
    albums = auth.google_api('/data/feed/api/user/default',
            {'kind': 'album'}, photos.UserFeedFromString)
   
    logger.debug('albums xml: %r', albums.ToString())
    data['albums'] = {}

    def func(hash_key, result_dict, client, album):
        album_photos = client.PhotoFeedFromString(
                urllib2.urlopen(album.GetPhotosUri()).read())
Exemple #5
0
def run_backup(preference):
    logger.debug("running backup routine for facebook_app")
    logger.debug("preference: %r", preference)
    progress_monitor = get_progress_monitor("facebook_app")
    
    graph = facebook.GraphAPI(preference['tokens'])
    data = {'profile': graph.get_object('me')}
    facebook_job_queue = Queue.Queue(1500)

    logger.debug("starting workers")

    workers = WorkerPool(Worker, facebook_job_queue)
    workers.start()

    for k, v in preference['settings'].iteritems():
        progress_monitor.update(10)
        logger.debug("processing %s", k)
        if v:
            if k == 'albums':
                data[k] = {}

                def func(hash_key, result_dict, client, album):
                    logger.debug("fetching data for album %s", album['id'])
                    photos = {}
                    album_photos = [photo for photo in client.get_connections(album['id'], 'photos', limit=1000000)['data']]
                    album_photos = sorted(album_photos, key=itemgetter('created_time'), reverse=True)
                    
                    for photo in album_photos:
                        logger.debug("fetching comments for photo %s in album %s",
                                photo['id'], album['id'])
                        comments = [comment for comment in client.get_connections(photo['id'], 'comments', limit=1000000)['data']]
                        photo['comments'] = sorted(comments, key=itemgetter('created_time'))
                        photos[photo['id']] = photo

                    album['photos'] = photos
                    result_dict[hash_key][album['id']] = album

                for obj in graph.get_connections('me', 'albums', limit=1000000)['data']:
                    logger.debug("queueing album %s", obj['id'])
                    facebook_job_queue.put((func, (k, data, graph, obj,), {}))

            elif k == 'photos':
                data[k] = []
                logger.debug("fetching data for tagged photos")
                photos = graph.get_connections('me', k, limit=1000000)['data']

                def func(hash_key, result_dict, client, obj):
                    logger.debug("fetching comments for tagged photo %s", obj['id'])
                    comments = [comment for comment in client.get_connections(obj['id'], 'comments', limit=1000000)['data']]
                    obj['comments'] = comments
                    result_dict[hash_key].append(obj)
                    
                for photo in photos:
                    logger.debug("queueing comments for tagged photo %s", photo['id'])
                    facebook_job_queue.put((func, (k, data, graph, photo), {}))


            else:
                data[k] = []
                
                def func(hash_key, result_dict, client, id):
                    logger.debug("fetching data for %s %s", hash_key, id)

                    obj = client.get_object(id)

                    if hash_key in ['statuses', 'events']:
                        logger.debug("fetching comments for %s %s", hash_key, id)
                        comments = [comment for comment in client.get_connections(id, 'comments', limit=1000000)['data']]
                        obj['comment'] = sorted(comments, key=itemgetter('created_time'))

                    result_dict[hash_key].append(obj)

                for obj in graph.get_connections('me', k, limit=1000000)['data']:
                    logger.debug("queueing %s %s", k, obj['id'])
                    facebook_job_queue.put((func, (k, data, graph, obj['id'],), {}))

    progress_monitor.update(10)

    workers.stop()

    if 'albums' in data:
        dump_albums(data)

    if 'photos' in data:
        dump_photos(data)

    dump_profile_pics(data)
    dump_json(data, progress_monitor.start_time)

    filename = "%s.json" % progress_monitor.start_time.strftime("%Y%m%d%H%M%S")
    add_to_archive("facebook_app", filename, progress_monitor.start_time)

    progress_monitor.complete()
    logger.debug("facebook_app backup done")
    return progress_monitor.start_time