Exemple #1
0
def dump_albums(data):
    path = create_folder(BACKUP_DIR, 'facebook_app', 'albums')

    photo_queue = Queue.Queue(1500)

    workers = WorkerPool(Worker, photo_queue)
    workers.start()

    for album_id, album in data['albums'].iteritems():
        album_path = create_folder(path, album['id'])

        downloaded_photos = os.listdir(album_path)
        for photo_id, photo in album['photos'].iteritems():
            source_url = urlparse.urlparse(photo['source'])
            ext = re.match("^.*/([^/]+?(\.[^/]+)?)$", source_url.path).groups()[1]
            if not ext:
                ext = ''

            if photo['id'] not in downloaded_photos:
                photo_path = create_folder(album_path, photo['id'])
                photo_queue.put((dump_photo_set, (photo_path, photo, ext,), {}))
            else:
                # either thumbnail or photo is missing. redownloading set
                if not photo_exists(album_path, photo):
                    photo_path = create_folder(album_path, photo['id'])
                    photo_queue.put((dump_photo_set, (photo_path, photo, ext,), {}))
                else:
                    pass # photo is already in the filesystem

    workers.stop()
Exemple #2
0
def dump_photos(data):
    path = create_folder(BACKUP_DIR, 'facebook_app', 'photos')
    photo_queue = Queue.Queue(1500)
    workers = WorkerPool(Worker, photo_queue)
    workers.start()

    downloaded_photos = os.listdir(path)

    for photo in data['photos']:
        photo_path = create_folder(path, photo['id'])

        if not photo['id'] in downloaded_photos:
            photo_queue.put((dump_photo_set, (photo_path, photo, ".jpg",), {}))
        else:
            if not photo_exists(path, photo):
                photo_queue.put((dump_photo_set, (photo_path, photo, ".jpg",), {}))
            else:
                pass # photo is alread in the filesystem

    workers.stop()
Exemple #3
0
def dump_albums(data):
    path = create_folder(BACKUP_DIR, 'picasaweb_app', 'albums')

    photo_queue = Queue.Queue(1500)

    workers = WorkerPool(Worker, photo_queue)
    workers.start()

    for album_id, album in data['albums'].iteritems():
        album_path = create_folder(path, album['id'])

        downloaded_photos = os.listdir(album_path)

        for photo_id, photo in album['photos'].iteritems():
            if photo_id not in downloaded_photos:
                photo_path = create_folder(album_path, photo_id)
                photo_queue.put((dump_photo_set, (photo_path, photo,),{}))
            else:
                if not photo_exists(album_path, photo):
                    photo_path = create_folder(album_path, photo['id'])
                    photo_queue.put((dump_photo_set, (photo_path, photo,), {}))
                else:
                    pass
    workers.stop()
Exemple #4
0
        photo_album = {'id': album.gphoto_id.text}
        photo_album['title'] = album.title.text
        photo_album['name'] = album.name.text
        photo_album['timestamp'] = album.timestamp.text
        photo_album['updated'] = album.updated.text
        photo_album['published'] = album.published.text
        photo_album['count'] = int(album.numphotos.text)
        logger.debug('album: %r', photo_album)

        data['albums'][album.gphoto_id.text] = photo_album

        picasa_job_queue.put((func, (photo_album['id'], data, photos, album,), {}))

    progress_monitor.update(10)

    workers.stop()

    dump_albums(data)
    dump_json(data, progress_monitor.start_time)

    filename = "%s.json" % progress_monitor.start_time.strftime("%Y%m%d%H%M%S")
    add_to_archive("picasaweb_app", filename, progress_monitor.start_time)

    progress_monitor.complete()
    logger.debug("picasaweb_app backup done")
    return progress_monitor.start_time


def dump_json(data, backup_time):
    path = create_folder(BACKUP_DIR, 'picasaweb_app')
Exemple #5
0
def run_backup(preference):
    logger.debug("running backup routine for facebook_app")
    logger.debug("preference: %r", preference)
    progress_monitor = get_progress_monitor("facebook_app")
    
    graph = facebook.GraphAPI(preference['tokens'])
    data = {'profile': graph.get_object('me')}
    facebook_job_queue = Queue.Queue(1500)

    logger.debug("starting workers")

    workers = WorkerPool(Worker, facebook_job_queue)
    workers.start()

    for k, v in preference['settings'].iteritems():
        progress_monitor.update(10)
        logger.debug("processing %s", k)
        if v:
            if k == 'albums':
                data[k] = {}

                def func(hash_key, result_dict, client, album):
                    logger.debug("fetching data for album %s", album['id'])
                    photos = {}
                    album_photos = [photo for photo in client.get_connections(album['id'], 'photos', limit=1000000)['data']]
                    album_photos = sorted(album_photos, key=itemgetter('created_time'), reverse=True)
                    
                    for photo in album_photos:
                        logger.debug("fetching comments for photo %s in album %s",
                                photo['id'], album['id'])
                        comments = [comment for comment in client.get_connections(photo['id'], 'comments', limit=1000000)['data']]
                        photo['comments'] = sorted(comments, key=itemgetter('created_time'))
                        photos[photo['id']] = photo

                    album['photos'] = photos
                    result_dict[hash_key][album['id']] = album

                for obj in graph.get_connections('me', 'albums', limit=1000000)['data']:
                    logger.debug("queueing album %s", obj['id'])
                    facebook_job_queue.put((func, (k, data, graph, obj,), {}))

            elif k == 'photos':
                data[k] = []
                logger.debug("fetching data for tagged photos")
                photos = graph.get_connections('me', k, limit=1000000)['data']

                def func(hash_key, result_dict, client, obj):
                    logger.debug("fetching comments for tagged photo %s", obj['id'])
                    comments = [comment for comment in client.get_connections(obj['id'], 'comments', limit=1000000)['data']]
                    obj['comments'] = comments
                    result_dict[hash_key].append(obj)
                    
                for photo in photos:
                    logger.debug("queueing comments for tagged photo %s", photo['id'])
                    facebook_job_queue.put((func, (k, data, graph, photo), {}))


            else:
                data[k] = []
                
                def func(hash_key, result_dict, client, id):
                    logger.debug("fetching data for %s %s", hash_key, id)

                    obj = client.get_object(id)

                    if hash_key in ['statuses', 'events']:
                        logger.debug("fetching comments for %s %s", hash_key, id)
                        comments = [comment for comment in client.get_connections(id, 'comments', limit=1000000)['data']]
                        obj['comment'] = sorted(comments, key=itemgetter('created_time'))

                    result_dict[hash_key].append(obj)

                for obj in graph.get_connections('me', k, limit=1000000)['data']:
                    logger.debug("queueing %s %s", k, obj['id'])
                    facebook_job_queue.put((func, (k, data, graph, obj['id'],), {}))

    progress_monitor.update(10)

    workers.stop()

    if 'albums' in data:
        dump_albums(data)

    if 'photos' in data:
        dump_photos(data)

    dump_profile_pics(data)
    dump_json(data, progress_monitor.start_time)

    filename = "%s.json" % progress_monitor.start_time.strftime("%Y%m%d%H%M%S")
    add_to_archive("facebook_app", filename, progress_monitor.start_time)

    progress_monitor.complete()
    logger.debug("facebook_app backup done")
    return progress_monitor.start_time