def get_update_pictures(year='2012'): import subprocess from sqlalchemy.orm import joinedload from mptracker.scraper.gdrive import PictureFolder gdrive = PictureFolder(PICTURES_FOLDER_KEY) existing = set(p.filename for p in gdrive.list()) query = ( models.Mandate.query .filter_by(year=int(year)) .options(joinedload('person')) ) for mandate in query: person = mandate.person filename = '%s.jpg' % person.slug if filename in existing: continue if mandate.picture_url is None: logger.warn("No picure available for %r", person.name_first_last) continue assert mandate.picture_url.endswith('.jpg') logger.info("Downloading %r" % filename) with temp_dir() as tmp: orig_path = tmp / 'orig.jpg' thumb_path = tmp / 'thumb.jpg' resp = requests.get(mandate.picture_url, stream=True) assert resp.status_code == 200 assert resp.headers['Content-Type'] == 'image/jpeg' try: with orig_path.open('wb') as f: for chunk in resp.iter_content(65536): f.write(chunk) finally: resp.close() logger.info("Converting to thumbnail") subprocess.check_call([ 'convert', orig_path, '-geometry', '300x300^', '-quality', '85', thumb_path, ]) logger.info("Uploading to gdrive") with thumb_path.open('rb') as f: data = f.read() file_id = gdrive.upload(filename, data) logger.info("Upload successful: %r", file_id)
def get_pictures(year='2012'): from mptracker.scraper.gdrive import PictureFolder pictures_dir = path(flask.current_app.static_folder) / 'pictures' / year pictures_dir.mkdir_p() gdrive = PictureFolder(PICTURES_FOLDER_KEY) for picture in gdrive.list(): fs_path = pictures_dir / picture.filename if fs_path.exists(): with fs_path.open('rb') as f: md5 = calculate_md5(iter_file(f)) if md5 == picture.md5: logger.info("Already up to date %r", picture.filename) continue logger.info("Downloading %r", picture.filename) with tempfile.NamedTemporaryFile(dir=pictures_dir, delete=False) as f: for chunk in gdrive.download(picture): f.write(chunk) path(f.name).rename(fs_path)