def find_zipped_screenshottable_graphics(): # Return a set of ProductionLink objects that link to archive files, # that we can plausibly expect to extract screenshots from, for productions that don't # have screenshots already. # prods of supertype=graphics that have download links but no screenshots prods = Production.objects.annotate(screenshot_count=Count('screenshots')).filter( supertype='graphics', screenshot_count=0, links__is_download_link=True).prefetch_related('links', 'types') prod_links = [] for prod in prods: # skip ASCII/ANSI prods if prod.types.filter(internal_name__in=['ascii', 'ascii-collection', 'ansi']): continue for link in prod.links.all(): if not (link.is_download_link and link.is_zip_file()): continue if link.has_bad_image or not link.is_believed_downloadable(): continue file_for_screenshot = None # see if we've already got a best candidate archive member to take the image from if link.file_for_screenshot: file_for_screenshot = link.file_for_screenshot else: # failing that, see if we already have a directory listing for this download # and can derive a candidate from that archive_members = link.archive_members() if archive_members: file_for_screenshot = select_screenshot_file(archive_members) if file_for_screenshot: # we've found a candidate (which probably means we've improved select_screenshot_file # since it was last run on this archive) - might as well store it against the # ProductionLink, so it doesn't show up as something to be manually resolved link.file_for_screenshot = file_for_screenshot link.is_unresolved_for_screenshotting = False link.save() else: # we have a directory listing but no clear candidate, so give up on this link link.is_unresolved_for_screenshotting = True link.save() continue if file_for_screenshot: # we know in advance which file we'd like to extract from the archive - # better make sure it's a format we can actually handle, then. extension = link.file_for_screenshot.split('.')[-1].lower() if extension not in USABLE_IMAGE_FILE_EXTENSIONS: continue prod_links.append(link) break # success, so ignore any remaining links for this prod return prod_links
def create_screenshot_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.screenshots.count(): # don't create a screenshot if there's one already if prod_link.is_unresolved_for_screenshotting: prod_link.is_unresolved_for_screenshotting = False prod_link.save() return if prod_link.has_bad_image: return # don't create a screenshot if a previous attempt has failed during image processing production_id = prod_link.production_id url = prod_link.download_url blob = fetch_link(prod_link) sha1 = blob.sha1 if prod_link.is_zip_file(): # select the archive member to extract a screenshot from, if we don't have # a candidate already archive_members = ArchiveMember.objects.filter(archive_sha1=sha1) if not prod_link.file_for_screenshot: file_for_screenshot = select_screenshot_file(archive_members) if file_for_screenshot: prod_link.file_for_screenshot = file_for_screenshot prod_link.is_unresolved_for_screenshotting = False else: prod_link.is_unresolved_for_screenshotting = True prod_link.save() image_extension = prod_link.file_for_screenshot.split('.')[-1].lower() if image_extension in USABLE_IMAGE_FILE_EXTENSIONS: z = None try: z = blob.as_zipfile() # decode the filename as stored in the db filename = unpack_db_zip_filename( prod_link.file_for_screenshot) member_buf = io.BytesIO(z.read(filename)) except zipfile.BadZipfile: prod_link.has_bad_image = True prod_link.save() if z: # pragma: no cover z.close() return z.close() try: img = PILConvertibleImage( member_buf, name_hint=prod_link.file_for_screenshot) except IOError: prod_link.has_bad_image = True prod_link.save() return else: # image is not a usable format return else: try: img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1]) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot = Screenshot(production_id=production_id) basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str( production_link_id) + '.' try: upload_standard(img, screenshot, basename) upload_thumb(img, screenshot, basename) # leave original until last, because if it's already a websafe format it'll just return # the original file handle, and the storage backend might close the file after uploading # which screws with PIL's ability to create resized versions... upload_original(img, screenshot, basename) except IOError: # pragma: no cover prod_link.has_bad_image = True prod_link.save() return screenshot.save()
def create_screenshot_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.screenshots.count(): # don't create a screenshot if there's one already if prod_link.is_unresolved_for_screenshotting: prod_link.is_unresolved_for_screenshotting = False prod_link.save() return if prod_link.has_bad_image: return # don't create a screenshot if a previous attempt has failed during image processing production_id = prod_link.production_id url = prod_link.download_url blob = fetch_link(prod_link) sha1 = blob.sha1 if prod_link.is_zip_file(): # select the archive member to extract a screenshot from, if we don't have # a candidate already archive_members = ArchiveMember.objects.filter(archive_sha1=sha1) if not prod_link.file_for_screenshot: file_for_screenshot = select_screenshot_file(archive_members) if file_for_screenshot: prod_link.file_for_screenshot = file_for_screenshot prod_link.is_unresolved_for_screenshotting = False else: prod_link.is_unresolved_for_screenshotting = True prod_link.save() image_extension = prod_link.file_for_screenshot.split('.')[-1].lower() if image_extension in USABLE_IMAGE_FILE_EXTENSIONS: z = blob.as_zipfile() # we encode the filename as iso-8859-1 before retrieving it, because we # decoded it that way on insertion into the database to ensure that it had # a valid unicode string representation - see mirror/models.py try: member_buf = cStringIO.StringIO( z.read(prod_link.file_for_screenshot.encode('iso-8859-1'))) except zipfile.BadZipfile: prod_link.has_bad_image = True prod_link.save() z.close() return z.close() try: img = PILConvertibleImage( member_buf, name_hint=prod_link.file_for_screenshot) except IOError: prod_link.has_bad_image = True prod_link.save() return else: # image is not a usable format return else: try: img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1]) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot = Screenshot(production_id=production_id) basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str( production_link_id) + '.' try: upload_original(img, screenshot, basename, reduced_redundancy=True) upload_standard(img, screenshot, basename) upload_thumb(img, screenshot, basename) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot.save()
def create_screenshot_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.screenshots.count(): # don't create a screenshot if there's one already if prod_link.is_unresolved_for_screenshotting: prod_link.is_unresolved_for_screenshotting = False prod_link.save() return if prod_link.has_bad_image: return # don't create a screenshot if a previous attempt has failed during image processing production_id = prod_link.production_id url = prod_link.download_url blob = fetch_link(prod_link) sha1 = blob.sha1 if prod_link.is_zip_file(): # select the archive member to extract a screenshot from, if we don't have # a candidate already archive_members = ArchiveMember.objects.filter(archive_sha1=sha1) if not prod_link.file_for_screenshot: file_for_screenshot = select_screenshot_file(archive_members) if file_for_screenshot: prod_link.file_for_screenshot = file_for_screenshot prod_link.is_unresolved_for_screenshotting = False else: prod_link.is_unresolved_for_screenshotting = True prod_link.save() image_extension = prod_link.file_for_screenshot.split('.')[-1].lower() if image_extension in USABLE_IMAGE_FILE_EXTENSIONS: z = blob.as_zipfile() # we encode the filename as iso-8859-1 before retrieving it, because we # decoded it that way on insertion into the database to ensure that it had # a valid unicode string representation - see mirror/models.py try: member_buf = cStringIO.StringIO( z.read(prod_link.file_for_screenshot.encode('iso-8859-1')) ) except zipfile.BadZipfile: prod_link.has_bad_image = True prod_link.save() z.close() return z.close() try: img = PILConvertibleImage(member_buf, name_hint=prod_link.file_for_screenshot) except IOError: prod_link.has_bad_image = True prod_link.save() return else: # image is not a usable format return else: try: img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1]) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot = Screenshot(production_id=production_id) basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(production_link_id) + '.' try: upload_original(img, screenshot, basename, reduced_redundancy=True) upload_standard(img, screenshot, basename) upload_thumb(img, screenshot, basename) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot.save()