Example #1
0
def get_update_pictures(year='2012'):
    import subprocess
    from sqlalchemy.orm import joinedload
    from mptracker.scraper.gdrive import PictureFolder

    gdrive = PictureFolder(PICTURES_FOLDER_KEY)
    existing = set(p.filename for p in gdrive.list())

    query = (
        models.Mandate.query
        .filter_by(year=int(year))
        .options(joinedload('person'))
    )

    for mandate in query:
        person = mandate.person
        filename = '%s.jpg' % person.slug
        if filename in existing:
            continue

        if mandate.picture_url is None:
            logger.warn("No picure available for %r", person.name_first_last)
            continue
        assert mandate.picture_url.endswith('.jpg')

        logger.info("Downloading %r" % filename)

        with temp_dir() as tmp:
            orig_path = tmp / 'orig.jpg'
            thumb_path = tmp / 'thumb.jpg'

            resp = requests.get(mandate.picture_url, stream=True)
            assert resp.status_code == 200
            assert resp.headers['Content-Type'] == 'image/jpeg'

            try:
                with orig_path.open('wb') as f:
                    for chunk in resp.iter_content(65536):
                        f.write(chunk)
            finally:
                resp.close()

            logger.info("Converting to thumbnail")
            subprocess.check_call([
                'convert',
                orig_path,
                '-geometry', '300x300^',
                '-quality', '85',
                thumb_path,
            ])

            logger.info("Uploading to gdrive")

            with thumb_path.open('rb') as f:
                data = f.read()

            file_id = gdrive.upload(filename, data)
            logger.info("Upload successful: %r", file_id)
Example #2
0
def get_pictures(year='2012'):
    from mptracker.scraper.gdrive import PictureFolder

    pictures_dir = path(flask.current_app.static_folder) / 'pictures' / year
    pictures_dir.mkdir_p()

    gdrive = PictureFolder(PICTURES_FOLDER_KEY)

    for picture in gdrive.list():
        fs_path = pictures_dir / picture.filename
        if fs_path.exists():
            with fs_path.open('rb') as f:
                md5 = calculate_md5(iter_file(f))

            if md5 == picture.md5:
                logger.info("Already up to date %r", picture.filename)
                continue

        logger.info("Downloading %r", picture.filename)
        with tempfile.NamedTemporaryFile(dir=pictures_dir, delete=False) as f:
            for chunk in gdrive.download(picture):
                f.write(chunk)

        path(f.name).rename(fs_path)