Example #1
0
def find_zipped_screenshottable_graphics():
    # Return a set of ProductionLink objects that link to archive files,
    # that we can plausibly expect to extract screenshots from, for productions that don't
    # have screenshots already.

    # prods of supertype=graphics that have download links but no screenshots
    prods = Production.objects.annotate(screenshot_count=Count('screenshots')).filter(
        supertype='graphics', screenshot_count=0, links__is_download_link=True).prefetch_related('links', 'types')

    prod_links = []
    for prod in prods:

        # skip ASCII/ANSI prods
        if prod.types.filter(internal_name__in=['ascii', 'ascii-collection', 'ansi']):
            continue

        for link in prod.links.all():

            if not (link.is_download_link and link.is_zip_file()):
                continue

            if link.has_bad_image or not link.is_believed_downloadable():
                continue

            file_for_screenshot = None
            # see if we've already got a best candidate archive member to take the image from
            if link.file_for_screenshot:
                file_for_screenshot = link.file_for_screenshot
            else:
                # failing that, see if we already have a directory listing for this download
                # and can derive a candidate from that
                archive_members = link.archive_members()
                if archive_members:
                    file_for_screenshot = select_screenshot_file(archive_members)
                    if file_for_screenshot:
                        # we've found a candidate (which probably means we've improved select_screenshot_file
                        # since it was last run on this archive) - might as well store it against the
                        # ProductionLink, so it doesn't show up as something to be manually resolved
                        link.file_for_screenshot = file_for_screenshot
                        link.is_unresolved_for_screenshotting = False
                        link.save()
                    else:
                        # we have a directory listing but no clear candidate, so give up on this link
                        link.is_unresolved_for_screenshotting = True
                        link.save()
                        continue

            if file_for_screenshot:
                # we know in advance which file we'd like to extract from the archive -
                # better make sure it's a format we can actually handle, then.
                extension = link.file_for_screenshot.split('.')[-1].lower()
                if extension not in USABLE_IMAGE_FILE_EXTENSIONS:
                    continue

            prod_links.append(link)
            break  # success, so ignore any remaining links for this prod

    return prod_links
Example #2
0
def find_zipped_screenshottable_graphics():
	# Return a set of ProductionLink objects that link to archive files,
	# that we can plausibly expect to extract screenshots from, for productions that don't
	# have screenshots already.

	# prods of supertype=graphics that have download links but no screenshots
	prods = Production.objects.annotate(screenshot_count=Count('screenshots')).filter(
		supertype='graphics', screenshot_count=0, links__is_download_link=True).prefetch_related('links', 'types')

	prod_links = []
	for prod in prods:

		# skip ASCII/ANSI prods
		if prod.types.filter(internal_name__in=['ascii', 'ascii-collection', 'ansi']):
			continue

		for link in prod.links.all():

			if not (link.is_download_link and link.is_zip_file()):
				continue

			if link.has_bad_image or not link.is_believed_downloadable():
				continue

			file_for_screenshot = None
			# see if we've already got a best candidate archive member to take the image from
			if link.file_for_screenshot:
				file_for_screenshot = link.file_for_screenshot
			else:
				# failing that, see if we already have a directory listing for this download
				# and can derive a candidate from that
				archive_members = link.archive_members()
				if archive_members:
					file_for_screenshot = select_screenshot_file(archive_members)
					if file_for_screenshot:
						# we've found a candidate (which probably means we've improved select_screenshot_file
						# since it was last run on this archive) - might as well store it against the
						# ProductionLink, so it doesn't show up as something to be manually resolved
						link.file_for_screenshot = file_for_screenshot
						link.is_unresolved_for_screenshotting = False
						link.save()
					else:
						# we have a directory listing but no clear candidate, so give up on this link
						link.is_unresolved_for_screenshotting = True
						link.save()
						continue

			if file_for_screenshot:
				# we know in advance which file we'd like to extract from the archive -
				# better make sure it's a format we can actually handle, then.
				extension = link.file_for_screenshot.split('.')[-1].lower()
				if extension not in USABLE_IMAGE_FILE_EXTENSIONS:
					continue

			prod_links.append(link)
			break  # success, so ignore any remaining links for this prod

	return prod_links
Example #3
0
def create_screenshot_from_production_link(production_link_id):
    try:
        prod_link = ProductionLink.objects.get(id=production_link_id)
    except ProductionLink.DoesNotExist:
        # guess it was deleted in the meantime, then.
        return

    if prod_link.production.screenshots.count():
        # don't create a screenshot if there's one already
        if prod_link.is_unresolved_for_screenshotting:
            prod_link.is_unresolved_for_screenshotting = False
            prod_link.save()
        return

    if prod_link.has_bad_image:
        return  # don't create a screenshot if a previous attempt has failed during image processing

    production_id = prod_link.production_id
    url = prod_link.download_url
    blob = fetch_link(prod_link)
    sha1 = blob.sha1

    if prod_link.is_zip_file():
        # select the archive member to extract a screenshot from, if we don't have
        # a candidate already
        archive_members = ArchiveMember.objects.filter(archive_sha1=sha1)
        if not prod_link.file_for_screenshot:
            file_for_screenshot = select_screenshot_file(archive_members)
            if file_for_screenshot:
                prod_link.file_for_screenshot = file_for_screenshot
                prod_link.is_unresolved_for_screenshotting = False
            else:
                prod_link.is_unresolved_for_screenshotting = True
            prod_link.save()

        image_extension = prod_link.file_for_screenshot.split('.')[-1].lower()
        if image_extension in USABLE_IMAGE_FILE_EXTENSIONS:
            z = None
            try:
                z = blob.as_zipfile()
                # decode the filename as stored in the db
                filename = unpack_db_zip_filename(
                    prod_link.file_for_screenshot)

                member_buf = io.BytesIO(z.read(filename))
            except zipfile.BadZipfile:
                prod_link.has_bad_image = True
                prod_link.save()
                if z:  # pragma: no cover
                    z.close()
                return

            z.close()
            try:
                img = PILConvertibleImage(
                    member_buf, name_hint=prod_link.file_for_screenshot)
            except IOError:
                prod_link.has_bad_image = True
                prod_link.save()
                return
        else:  # image is not a usable format
            return
    else:
        try:
            img = PILConvertibleImage(blob.as_io_buffer(),
                                      name_hint=url.split('/')[-1])
        except IOError:
            prod_link.has_bad_image = True
            prod_link.save()
            return

    screenshot = Screenshot(production_id=production_id)
    basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(
        production_link_id) + '.'
    try:
        upload_standard(img, screenshot, basename)
        upload_thumb(img, screenshot, basename)
        # leave original until last, because if it's already a websafe format it'll just return
        # the original file handle, and the storage backend might close the file after uploading
        # which screws with PIL's ability to create resized versions...
        upload_original(img, screenshot, basename)
    except IOError:  # pragma: no cover
        prod_link.has_bad_image = True
        prod_link.save()
        return
    screenshot.save()
Example #4
0
def create_screenshot_from_production_link(production_link_id):
    try:
        prod_link = ProductionLink.objects.get(id=production_link_id)
    except ProductionLink.DoesNotExist:
        # guess it was deleted in the meantime, then.
        return

    if prod_link.production.screenshots.count():
        # don't create a screenshot if there's one already
        if prod_link.is_unresolved_for_screenshotting:
            prod_link.is_unresolved_for_screenshotting = False
            prod_link.save()
        return

    if prod_link.has_bad_image:
        return  # don't create a screenshot if a previous attempt has failed during image processing

    production_id = prod_link.production_id
    url = prod_link.download_url
    blob = fetch_link(prod_link)
    sha1 = blob.sha1

    if prod_link.is_zip_file():
        # select the archive member to extract a screenshot from, if we don't have
        # a candidate already
        archive_members = ArchiveMember.objects.filter(archive_sha1=sha1)
        if not prod_link.file_for_screenshot:
            file_for_screenshot = select_screenshot_file(archive_members)
            if file_for_screenshot:
                prod_link.file_for_screenshot = file_for_screenshot
                prod_link.is_unresolved_for_screenshotting = False
            else:
                prod_link.is_unresolved_for_screenshotting = True
            prod_link.save()

        image_extension = prod_link.file_for_screenshot.split('.')[-1].lower()
        if image_extension in USABLE_IMAGE_FILE_EXTENSIONS:
            z = blob.as_zipfile()
            # we encode the filename as iso-8859-1 before retrieving it, because we
            # decoded it that way on insertion into the database to ensure that it had
            # a valid unicode string representation - see mirror/models.py
            try:
                member_buf = cStringIO.StringIO(
                    z.read(prod_link.file_for_screenshot.encode('iso-8859-1')))
            except zipfile.BadZipfile:
                prod_link.has_bad_image = True
                prod_link.save()
                z.close()
                return

            z.close()
            try:
                img = PILConvertibleImage(
                    member_buf, name_hint=prod_link.file_for_screenshot)
            except IOError:
                prod_link.has_bad_image = True
                prod_link.save()
                return
        else:  # image is not a usable format
            return
    else:
        try:
            img = PILConvertibleImage(blob.as_io_buffer(),
                                      name_hint=url.split('/')[-1])
        except IOError:
            prod_link.has_bad_image = True
            prod_link.save()
            return

    screenshot = Screenshot(production_id=production_id)
    basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(
        production_link_id) + '.'
    try:
        upload_original(img, screenshot, basename, reduced_redundancy=True)
        upload_standard(img, screenshot, basename)
        upload_thumb(img, screenshot, basename)
    except IOError:
        prod_link.has_bad_image = True
        prod_link.save()
        return
    screenshot.save()
Example #5
0
def create_screenshot_from_production_link(production_link_id):
	try:
		prod_link = ProductionLink.objects.get(id=production_link_id)
	except ProductionLink.DoesNotExist:
		# guess it was deleted in the meantime, then.
		return

	if prod_link.production.screenshots.count():
		# don't create a screenshot if there's one already
		if prod_link.is_unresolved_for_screenshotting:
			prod_link.is_unresolved_for_screenshotting = False
			prod_link.save()
		return

	if prod_link.has_bad_image:
		return  # don't create a screenshot if a previous attempt has failed during image processing

	production_id = prod_link.production_id
	url = prod_link.download_url
	blob = fetch_link(prod_link)
	sha1 = blob.sha1

	if prod_link.is_zip_file():
		# select the archive member to extract a screenshot from, if we don't have
		# a candidate already
		archive_members = ArchiveMember.objects.filter(archive_sha1=sha1)
		if not prod_link.file_for_screenshot:
			file_for_screenshot = select_screenshot_file(archive_members)
			if file_for_screenshot:
				prod_link.file_for_screenshot = file_for_screenshot
				prod_link.is_unresolved_for_screenshotting = False
			else:
				prod_link.is_unresolved_for_screenshotting = True
			prod_link.save()

		image_extension = prod_link.file_for_screenshot.split('.')[-1].lower()
		if image_extension in USABLE_IMAGE_FILE_EXTENSIONS:
			z = blob.as_zipfile()
			# we encode the filename as iso-8859-1 before retrieving it, because we
			# decoded it that way on insertion into the database to ensure that it had
			# a valid unicode string representation - see mirror/models.py
			try:
				member_buf = cStringIO.StringIO(
					z.read(prod_link.file_for_screenshot.encode('iso-8859-1'))
				)
			except zipfile.BadZipfile:
				prod_link.has_bad_image = True
				prod_link.save()
				z.close()
				return

			z.close()
			try:
				img = PILConvertibleImage(member_buf, name_hint=prod_link.file_for_screenshot)
			except IOError:
				prod_link.has_bad_image = True
				prod_link.save()
				return
		else:  # image is not a usable format
			return
	else:
		try:
			img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1])
		except IOError:
			prod_link.has_bad_image = True
			prod_link.save()
			return

	screenshot = Screenshot(production_id=production_id)
	basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(production_link_id) + '.'
	try:
		upload_original(img, screenshot, basename, reduced_redundancy=True)
		upload_standard(img, screenshot, basename)
		upload_thumb(img, screenshot, basename)
	except IOError:
		prod_link.has_bad_image = True
		prod_link.save()
		return
	screenshot.save()