Example #1
0
    def test_fetch_actual_length_too_big(self):
        link = self.pondlife.links.create(
            link_class='BaseUrl',
            parameter='http://example.com/real-big-file.txt',
            is_download_link=True)
        with self.assertRaises(FileTooBig):
            fetch_link(link)

        self.assertTrue(
            Download.objects.filter(
                parameter='http://example.com/real-big-file.txt',
                error_type='FileTooBig').exists())
Example #2
0
    def test_fetch_from_mirror(self, Session):
        link = self.pondlife.links.create(
            link_class='BaseUrl',
            parameter='http://example.com/pondlife.txt',
            is_download_link=True)
        Download.objects.create(link_class='BaseUrl',
                                parameter='http://example.com/pondlife.txt',
                                downloaded_at=datetime.datetime(
                                    2020, 1, 1, 12, 0, 0),
                                mirror_s3_key='1/2/pondlife.123.txt')
        session = Session.return_value
        s3 = session.resource.return_value
        bucket = s3.Bucket.return_value

        def download_fileobj(filename, f):
            f.write(b'hello from pondlife.txt')

        bucket.download_fileobj = download_fileobj

        download_blob = fetch_link(link)
        Session.assert_called_once_with(aws_access_key_id='AWS_K3Y',
                                        aws_secret_access_key='AWS_S3CR3T')
        self.assertEqual(download_blob.filename, 'pondlife.123.txt')
        self.assertEqual(download_blob.md5, 'ebceeba7ff0d18701e1952cd3865ef22')
        self.assertEqual(download_blob.sha1,
                         '31a1dd3aa79730732bf32f4c8f1e3e4f9ca1aa50')
        self.assertEqual(download_blob.file_size, 23)
Example #3
0
    def test_upload_zipfile(self, Session):
        link = self.pondlife.links.create(
            link_class='BaseUrl', parameter='http://example.com/rubber.zip',
            is_download_link=True
        )
        session = Session.return_value
        s3 = session.resource.return_value
        bucket = s3.Bucket.return_value

        fetch_link(link)
        bucket.put_object.assert_called_once()
        Session.assert_called_once()

        download = Download.objects.get(link_class='BaseUrl', parameter='http://example.com/rubber.zip')
        archive_members = download.get_archive_members()
        self.assertEqual(archive_members.count(), 2)
        self.assertEqual(archive_members.first().filename, '16Kb-RUBBER.txt')
Example #4
0
def create_ansi_from_production_link(production_link_id):
	try:
		prod_link = ProductionLink.objects.get(id=production_link_id)
	except ProductionLink.DoesNotExist:
		# guess it was deleted in the meantime, then.
		return

	if prod_link.production.ansis.count():
		return  # don't create an ANSI if there's one already

	blob = fetch_link(prod_link)
	sha1 = blob.sha1

	file_ext = prod_link.download_file_extension()
	filename = 'ansi/' + sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(production_link_id) + '.' + file_ext
	url = upload_to_s3(blob.as_io_buffer(), filename, file_ext, reduced_redundancy=True)

	Ansi.objects.create(production_id=prod_link.production_id, url=url)
Example #5
0
    def test_upload_to_mirror(self, Session):
        link = self.pondlife.links.create(
            link_class='BaseUrl', parameter='http://example.com/pondlife2.txt',
            is_download_link=True
        )

        session = Session.return_value
        s3 = session.resource.return_value
        bucket = s3.Bucket.return_value

        download_blob = fetch_link(link)
        bucket.put_object.assert_called_once_with(
            Key='8d/f5/211e169bdda5/pondlife2.txt', Body=b"hello from pondlife2.txt"
        )

        Session.assert_called_once()
        self.assertEqual(download_blob.filename, 'pondlife2.txt')
        self.assertEqual(download_blob.file_size, 24)
        download = Download.objects.get(link_class='BaseUrl', parameter='http://example.com/pondlife2.txt')
        self.assertEqual(download.mirror_s3_key, '8d/f5/211e169bdda5/pondlife2.txt')
Example #6
0
    def test_fetch_duplicate_of_existing_file(self):
        link = self.pondlife.links.create(
            link_class='BaseUrl', parameter='http://example.com/pondlife2.txt',
            is_download_link=True
        )
        pondlife3_download = Download.objects.create(
            link_class='BaseUrl', parameter='http://example.com/pondlife3.txt',
            downloaded_at=datetime.datetime(2020, 1, 1, 12, 0, 0),
            mirror_s3_key='1/2/pondlife.123.txt',
            sha1="8df5211e169bdda53f2a4bad98483bd973c3e801"
        )

        download_blob = fetch_link(link)
        self.assertEqual(download_blob.filename, 'pondlife2.txt')
        self.assertEqual(download_blob.sha1, '8df5211e169bdda53f2a4bad98483bd973c3e801')

        # a new Download record pointing to the same mirror entry as pondlife3 should have been created
        pondlife2_download = Download.objects.get(link_class='BaseUrl', parameter='http://example.com/pondlife2.txt')
        self.assertEqual(pondlife2_download.mirror_s3_key, '1/2/pondlife.123.txt')
        self.assertNotEqual(pondlife2_download.pk, pondlife3_download.pk)
Example #7
0
def create_ansi_from_production_link(production_link_id):
    try:
        prod_link = ProductionLink.objects.get(id=production_link_id)
    except ProductionLink.DoesNotExist:
        # guess it was deleted in the meantime, then.
        return

    if prod_link.production.ansis.count():
        return  # don't create an ANSI if there's one already

    blob = fetch_link(prod_link)
    sha1 = blob.sha1

    file_ext = prod_link.download_file_extension()
    filename = 'ansi/' + sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[
        4:8] + '.pl' + str(production_link_id) + '.' + file_ext
    url = upload_to_s3(blob.as_io_buffer(),
                       filename,
                       file_ext,
                       reduced_redundancy=True)

    Ansi.objects.create(production_id=prod_link.production_id, url=url)
Example #8
0
def create_screenshot_from_production_link(production_link_id):
    try:
        prod_link = ProductionLink.objects.get(id=production_link_id)
    except ProductionLink.DoesNotExist:
        # guess it was deleted in the meantime, then.
        return

    if prod_link.production.screenshots.count():
        # don't create a screenshot if there's one already
        if prod_link.is_unresolved_for_screenshotting:
            prod_link.is_unresolved_for_screenshotting = False
            prod_link.save()
        return

    if prod_link.has_bad_image:
        return  # don't create a screenshot if a previous attempt has failed during image processing

    production_id = prod_link.production_id
    url = prod_link.download_url
    blob = fetch_link(prod_link)
    sha1 = blob.sha1

    if prod_link.is_zip_file():
        # select the archive member to extract a screenshot from, if we don't have
        # a candidate already
        archive_members = ArchiveMember.objects.filter(archive_sha1=sha1)
        if not prod_link.file_for_screenshot:
            file_for_screenshot = select_screenshot_file(archive_members)
            if file_for_screenshot:
                prod_link.file_for_screenshot = file_for_screenshot
                prod_link.is_unresolved_for_screenshotting = False
            else:
                prod_link.is_unresolved_for_screenshotting = True
            prod_link.save()

        image_extension = prod_link.file_for_screenshot.split('.')[-1].lower()
        if image_extension in USABLE_IMAGE_FILE_EXTENSIONS:
            z = None
            try:
                z = blob.as_zipfile()
                # decode the filename as stored in the db
                filename = unpack_db_zip_filename(
                    prod_link.file_for_screenshot)

                member_buf = io.BytesIO(z.read(filename))
            except zipfile.BadZipfile:
                prod_link.has_bad_image = True
                prod_link.save()
                if z:  # pragma: no cover
                    z.close()
                return

            z.close()
            try:
                img = PILConvertibleImage(
                    member_buf, name_hint=prod_link.file_for_screenshot)
            except IOError:
                prod_link.has_bad_image = True
                prod_link.save()
                return
        else:  # image is not a usable format
            return
    else:
        try:
            img = PILConvertibleImage(blob.as_io_buffer(),
                                      name_hint=url.split('/')[-1])
        except IOError:
            prod_link.has_bad_image = True
            prod_link.save()
            return

    screenshot = Screenshot(production_id=production_id)
    basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(
        production_link_id) + '.'
    try:
        upload_standard(img, screenshot, basename)
        upload_thumb(img, screenshot, basename)
        # leave original until last, because if it's already a websafe format it'll just return
        # the original file handle, and the storage backend might close the file after uploading
        # which screws with PIL's ability to create resized versions...
        upload_original(img, screenshot, basename)
    except IOError:  # pragma: no cover
        prod_link.has_bad_image = True
        prod_link.save()
        return
    screenshot.save()
Example #9
0
    def handle(self, *args, **kwargs):
        prods = Production.objects.filter(
            platforms__name='ZX Spectrum', supertype='production').exclude(
                id__in=EmulatorConfig.objects.values_list('production_id',
                                                          flat=True)
            ).exclude(id__in=ProductionLink.objects.filter(
                link_class='BaseUrl',
                parameter__startswith='https://files.zxdemo.org/').values_list(
                    'production_id', flat=True)).prefetch_related('links')

        for prod in prods:
            success = False

            for prod_link in prod.links.all():
                if not prod_link.is_download_link:
                    continue

                url = urlparse(prod_link.download_url)
                basename, ext = splitext(url.path)
                ext = ext.lower()
                if ext in ('.sna', '.tzx', '.tap', '.z80', '.szx'):
                    # yay, we can use this directly
                    print("direct link for %s: %s" %
                          (prod.title, prod_link.download_url))
                    try:
                        download = fetch_link(prod_link)
                    except (URLError, FileTooBig, timeout, BadZipFile):
                        print("- broken link :-(")
                    else:
                        sha1 = download.sha1
                        basename, file_ext = splitext(download.filename)
                        filename = 'emulation/' + sha1[0:2] + '/' + sha1[
                            2:4] + '/' + slugify(basename) + file_ext
                        new_url = upload_to_s3(download.as_io_buffer(),
                                               filename)
                        EmulatorConfig.objects.create(production_id=prod.id,
                                                      launch_url=new_url,
                                                      emulator='jsspeccy',
                                                      configuration='{}')
                        print("- successfully mirrored at %s" % new_url)
                        success = True
                    sleep(1)
                elif ext == '.zip':
                    print("zip file for %s: %s" %
                          (prod.title, prod_link.download_url))
                    try:
                        download = fetch_link(prod_link)
                    except (URLError, FileTooBig, timeout, BadZipFile):
                        print("- broken link :-(")
                    else:
                        try:
                            zip = download.as_zipfile()
                        except BadZipFile:  # pragma: no cover
                            print("- bad zip :-(")
                        else:
                            loadable_file_count = 0
                            for filename in zip.namelist():
                                if filename.startswith('__MACOSX'):
                                    continue
                                ext = filename.split('.')[-1].lower()
                                if ext in ('tap', 'tzx', 'sna', 'z80', 'szx'):
                                    loadable_file_count += 1
                            if loadable_file_count == 1:
                                sha1 = download.sha1
                                basename, file_ext = splitext(
                                    download.filename)
                                filename = ('emulation/' + sha1[0:2] + '/' +
                                            sha1[2:4] + '/' +
                                            slugify(basename) + file_ext)
                                new_url = upload_to_s3(download.as_io_buffer(),
                                                       filename)
                                EmulatorConfig.objects.create(
                                    production_id=prod.id,
                                    launch_url=new_url,
                                    emulator='jsspeccy',
                                    configuration='{}')
                                print("- successfully mirrored at %s" %
                                      new_url)
                                success = True
                            elif loadable_file_count == 0:
                                print("- no loadable files :-(")
                            else:
                                print("- multiple loadable files :-/")
                    sleep(1)

                if success:
                    break
Example #10
0
def create_screenshot_from_production_link(production_link_id):
    try:
        prod_link = ProductionLink.objects.get(id=production_link_id)
    except ProductionLink.DoesNotExist:
        # guess it was deleted in the meantime, then.
        return

    if prod_link.production.screenshots.count():
        # don't create a screenshot if there's one already
        if prod_link.is_unresolved_for_screenshotting:
            prod_link.is_unresolved_for_screenshotting = False
            prod_link.save()
        return

    if prod_link.has_bad_image:
        return  # don't create a screenshot if a previous attempt has failed during image processing

    production_id = prod_link.production_id
    url = prod_link.download_url
    blob = fetch_link(prod_link)
    sha1 = blob.sha1

    if prod_link.is_zip_file():
        # select the archive member to extract a screenshot from, if we don't have
        # a candidate already
        archive_members = ArchiveMember.objects.filter(archive_sha1=sha1)
        if not prod_link.file_for_screenshot:
            file_for_screenshot = select_screenshot_file(archive_members)
            if file_for_screenshot:
                prod_link.file_for_screenshot = file_for_screenshot
                prod_link.is_unresolved_for_screenshotting = False
            else:
                prod_link.is_unresolved_for_screenshotting = True
            prod_link.save()

        image_extension = prod_link.file_for_screenshot.split('.')[-1].lower()
        if image_extension in USABLE_IMAGE_FILE_EXTENSIONS:
            z = blob.as_zipfile()
            # we encode the filename as iso-8859-1 before retrieving it, because we
            # decoded it that way on insertion into the database to ensure that it had
            # a valid unicode string representation - see mirror/models.py
            try:
                member_buf = cStringIO.StringIO(
                    z.read(prod_link.file_for_screenshot.encode('iso-8859-1')))
            except zipfile.BadZipfile:
                prod_link.has_bad_image = True
                prod_link.save()
                z.close()
                return

            z.close()
            try:
                img = PILConvertibleImage(
                    member_buf, name_hint=prod_link.file_for_screenshot)
            except IOError:
                prod_link.has_bad_image = True
                prod_link.save()
                return
        else:  # image is not a usable format
            return
    else:
        try:
            img = PILConvertibleImage(blob.as_io_buffer(),
                                      name_hint=url.split('/')[-1])
        except IOError:
            prod_link.has_bad_image = True
            prod_link.save()
            return

    screenshot = Screenshot(production_id=production_id)
    basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(
        production_link_id) + '.'
    try:
        upload_original(img, screenshot, basename, reduced_redundancy=True)
        upload_standard(img, screenshot, basename)
        upload_thumb(img, screenshot, basename)
    except IOError:
        prod_link.has_bad_image = True
        prod_link.save()
        return
    screenshot.save()
Example #11
0
def create_screenshot_from_production_link(production_link_id):
	try:
		prod_link = ProductionLink.objects.get(id=production_link_id)
	except ProductionLink.DoesNotExist:
		# guess it was deleted in the meantime, then.
		return

	if prod_link.production.screenshots.count():
		# don't create a screenshot if there's one already
		if prod_link.is_unresolved_for_screenshotting:
			prod_link.is_unresolved_for_screenshotting = False
			prod_link.save()
		return

	if prod_link.has_bad_image:
		return  # don't create a screenshot if a previous attempt has failed during image processing

	production_id = prod_link.production_id
	url = prod_link.download_url
	blob = fetch_link(prod_link)
	sha1 = blob.sha1

	if prod_link.is_zip_file():
		# select the archive member to extract a screenshot from, if we don't have
		# a candidate already
		archive_members = ArchiveMember.objects.filter(archive_sha1=sha1)
		if not prod_link.file_for_screenshot:
			file_for_screenshot = select_screenshot_file(archive_members)
			if file_for_screenshot:
				prod_link.file_for_screenshot = file_for_screenshot
				prod_link.is_unresolved_for_screenshotting = False
			else:
				prod_link.is_unresolved_for_screenshotting = True
			prod_link.save()

		image_extension = prod_link.file_for_screenshot.split('.')[-1].lower()
		if image_extension in USABLE_IMAGE_FILE_EXTENSIONS:
			z = blob.as_zipfile()
			# we encode the filename as iso-8859-1 before retrieving it, because we
			# decoded it that way on insertion into the database to ensure that it had
			# a valid unicode string representation - see mirror/models.py
			try:
				member_buf = cStringIO.StringIO(
					z.read(prod_link.file_for_screenshot.encode('iso-8859-1'))
				)
			except zipfile.BadZipfile:
				prod_link.has_bad_image = True
				prod_link.save()
				z.close()
				return

			z.close()
			try:
				img = PILConvertibleImage(member_buf, name_hint=prod_link.file_for_screenshot)
			except IOError:
				prod_link.has_bad_image = True
				prod_link.save()
				return
		else:  # image is not a usable format
			return
	else:
		try:
			img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1])
		except IOError:
			prod_link.has_bad_image = True
			prod_link.save()
			return

	screenshot = Screenshot(production_id=production_id)
	basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(production_link_id) + '.'
	try:
		upload_original(img, screenshot, basename, reduced_redundancy=True)
		upload_standard(img, screenshot, basename)
		upload_thumb(img, screenshot, basename)
	except IOError:
		prod_link.has_bad_image = True
		prod_link.save()
		return
	screenshot.save()