def test_fetch_actual_length_too_big(self): link = self.pondlife.links.create( link_class='BaseUrl', parameter='http://example.com/real-big-file.txt', is_download_link=True) with self.assertRaises(FileTooBig): fetch_link(link) self.assertTrue( Download.objects.filter( parameter='http://example.com/real-big-file.txt', error_type='FileTooBig').exists())
def test_fetch_from_mirror(self, Session): link = self.pondlife.links.create( link_class='BaseUrl', parameter='http://example.com/pondlife.txt', is_download_link=True) Download.objects.create(link_class='BaseUrl', parameter='http://example.com/pondlife.txt', downloaded_at=datetime.datetime( 2020, 1, 1, 12, 0, 0), mirror_s3_key='1/2/pondlife.123.txt') session = Session.return_value s3 = session.resource.return_value bucket = s3.Bucket.return_value def download_fileobj(filename, f): f.write(b'hello from pondlife.txt') bucket.download_fileobj = download_fileobj download_blob = fetch_link(link) Session.assert_called_once_with(aws_access_key_id='AWS_K3Y', aws_secret_access_key='AWS_S3CR3T') self.assertEqual(download_blob.filename, 'pondlife.123.txt') self.assertEqual(download_blob.md5, 'ebceeba7ff0d18701e1952cd3865ef22') self.assertEqual(download_blob.sha1, '31a1dd3aa79730732bf32f4c8f1e3e4f9ca1aa50') self.assertEqual(download_blob.file_size, 23)
def test_upload_zipfile(self, Session): link = self.pondlife.links.create( link_class='BaseUrl', parameter='http://example.com/rubber.zip', is_download_link=True ) session = Session.return_value s3 = session.resource.return_value bucket = s3.Bucket.return_value fetch_link(link) bucket.put_object.assert_called_once() Session.assert_called_once() download = Download.objects.get(link_class='BaseUrl', parameter='http://example.com/rubber.zip') archive_members = download.get_archive_members() self.assertEqual(archive_members.count(), 2) self.assertEqual(archive_members.first().filename, '16Kb-RUBBER.txt')
def create_ansi_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.ansis.count(): return # don't create an ANSI if there's one already blob = fetch_link(prod_link) sha1 = blob.sha1 file_ext = prod_link.download_file_extension() filename = 'ansi/' + sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(production_link_id) + '.' + file_ext url = upload_to_s3(blob.as_io_buffer(), filename, file_ext, reduced_redundancy=True) Ansi.objects.create(production_id=prod_link.production_id, url=url)
def test_upload_to_mirror(self, Session): link = self.pondlife.links.create( link_class='BaseUrl', parameter='http://example.com/pondlife2.txt', is_download_link=True ) session = Session.return_value s3 = session.resource.return_value bucket = s3.Bucket.return_value download_blob = fetch_link(link) bucket.put_object.assert_called_once_with( Key='8d/f5/211e169bdda5/pondlife2.txt', Body=b"hello from pondlife2.txt" ) Session.assert_called_once() self.assertEqual(download_blob.filename, 'pondlife2.txt') self.assertEqual(download_blob.file_size, 24) download = Download.objects.get(link_class='BaseUrl', parameter='http://example.com/pondlife2.txt') self.assertEqual(download.mirror_s3_key, '8d/f5/211e169bdda5/pondlife2.txt')
def test_fetch_duplicate_of_existing_file(self): link = self.pondlife.links.create( link_class='BaseUrl', parameter='http://example.com/pondlife2.txt', is_download_link=True ) pondlife3_download = Download.objects.create( link_class='BaseUrl', parameter='http://example.com/pondlife3.txt', downloaded_at=datetime.datetime(2020, 1, 1, 12, 0, 0), mirror_s3_key='1/2/pondlife.123.txt', sha1="8df5211e169bdda53f2a4bad98483bd973c3e801" ) download_blob = fetch_link(link) self.assertEqual(download_blob.filename, 'pondlife2.txt') self.assertEqual(download_blob.sha1, '8df5211e169bdda53f2a4bad98483bd973c3e801') # a new Download record pointing to the same mirror entry as pondlife3 should have been created pondlife2_download = Download.objects.get(link_class='BaseUrl', parameter='http://example.com/pondlife2.txt') self.assertEqual(pondlife2_download.mirror_s3_key, '1/2/pondlife.123.txt') self.assertNotEqual(pondlife2_download.pk, pondlife3_download.pk)
def create_ansi_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.ansis.count(): return # don't create an ANSI if there's one already blob = fetch_link(prod_link) sha1 = blob.sha1 file_ext = prod_link.download_file_extension() filename = 'ansi/' + sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[ 4:8] + '.pl' + str(production_link_id) + '.' + file_ext url = upload_to_s3(blob.as_io_buffer(), filename, file_ext, reduced_redundancy=True) Ansi.objects.create(production_id=prod_link.production_id, url=url)
def create_screenshot_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.screenshots.count(): # don't create a screenshot if there's one already if prod_link.is_unresolved_for_screenshotting: prod_link.is_unresolved_for_screenshotting = False prod_link.save() return if prod_link.has_bad_image: return # don't create a screenshot if a previous attempt has failed during image processing production_id = prod_link.production_id url = prod_link.download_url blob = fetch_link(prod_link) sha1 = blob.sha1 if prod_link.is_zip_file(): # select the archive member to extract a screenshot from, if we don't have # a candidate already archive_members = ArchiveMember.objects.filter(archive_sha1=sha1) if not prod_link.file_for_screenshot: file_for_screenshot = select_screenshot_file(archive_members) if file_for_screenshot: prod_link.file_for_screenshot = file_for_screenshot prod_link.is_unresolved_for_screenshotting = False else: prod_link.is_unresolved_for_screenshotting = True prod_link.save() image_extension = prod_link.file_for_screenshot.split('.')[-1].lower() if image_extension in USABLE_IMAGE_FILE_EXTENSIONS: z = None try: z = blob.as_zipfile() # decode the filename as stored in the db filename = unpack_db_zip_filename( prod_link.file_for_screenshot) member_buf = io.BytesIO(z.read(filename)) except zipfile.BadZipfile: prod_link.has_bad_image = True prod_link.save() if z: # pragma: no cover z.close() return z.close() try: img = PILConvertibleImage( member_buf, name_hint=prod_link.file_for_screenshot) except IOError: prod_link.has_bad_image = True prod_link.save() return else: # image is not a usable format return else: try: img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1]) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot = Screenshot(production_id=production_id) basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str( production_link_id) + '.' try: upload_standard(img, screenshot, basename) upload_thumb(img, screenshot, basename) # leave original until last, because if it's already a websafe format it'll just return # the original file handle, and the storage backend might close the file after uploading # which screws with PIL's ability to create resized versions... upload_original(img, screenshot, basename) except IOError: # pragma: no cover prod_link.has_bad_image = True prod_link.save() return screenshot.save()
def handle(self, *args, **kwargs): prods = Production.objects.filter( platforms__name='ZX Spectrum', supertype='production').exclude( id__in=EmulatorConfig.objects.values_list('production_id', flat=True) ).exclude(id__in=ProductionLink.objects.filter( link_class='BaseUrl', parameter__startswith='https://files.zxdemo.org/').values_list( 'production_id', flat=True)).prefetch_related('links') for prod in prods: success = False for prod_link in prod.links.all(): if not prod_link.is_download_link: continue url = urlparse(prod_link.download_url) basename, ext = splitext(url.path) ext = ext.lower() if ext in ('.sna', '.tzx', '.tap', '.z80', '.szx'): # yay, we can use this directly print("direct link for %s: %s" % (prod.title, prod_link.download_url)) try: download = fetch_link(prod_link) except (URLError, FileTooBig, timeout, BadZipFile): print("- broken link :-(") else: sha1 = download.sha1 basename, file_ext = splitext(download.filename) filename = 'emulation/' + sha1[0:2] + '/' + sha1[ 2:4] + '/' + slugify(basename) + file_ext new_url = upload_to_s3(download.as_io_buffer(), filename) EmulatorConfig.objects.create(production_id=prod.id, launch_url=new_url, emulator='jsspeccy', configuration='{}') print("- successfully mirrored at %s" % new_url) success = True sleep(1) elif ext == '.zip': print("zip file for %s: %s" % (prod.title, prod_link.download_url)) try: download = fetch_link(prod_link) except (URLError, FileTooBig, timeout, BadZipFile): print("- broken link :-(") else: try: zip = download.as_zipfile() except BadZipFile: # pragma: no cover print("- bad zip :-(") else: loadable_file_count = 0 for filename in zip.namelist(): if filename.startswith('__MACOSX'): continue ext = filename.split('.')[-1].lower() if ext in ('tap', 'tzx', 'sna', 'z80', 'szx'): loadable_file_count += 1 if loadable_file_count == 1: sha1 = download.sha1 basename, file_ext = splitext( download.filename) filename = ('emulation/' + sha1[0:2] + '/' + sha1[2:4] + '/' + slugify(basename) + file_ext) new_url = upload_to_s3(download.as_io_buffer(), filename) EmulatorConfig.objects.create( production_id=prod.id, launch_url=new_url, emulator='jsspeccy', configuration='{}') print("- successfully mirrored at %s" % new_url) success = True elif loadable_file_count == 0: print("- no loadable files :-(") else: print("- multiple loadable files :-/") sleep(1) if success: break
def create_screenshot_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.screenshots.count(): # don't create a screenshot if there's one already if prod_link.is_unresolved_for_screenshotting: prod_link.is_unresolved_for_screenshotting = False prod_link.save() return if prod_link.has_bad_image: return # don't create a screenshot if a previous attempt has failed during image processing production_id = prod_link.production_id url = prod_link.download_url blob = fetch_link(prod_link) sha1 = blob.sha1 if prod_link.is_zip_file(): # select the archive member to extract a screenshot from, if we don't have # a candidate already archive_members = ArchiveMember.objects.filter(archive_sha1=sha1) if not prod_link.file_for_screenshot: file_for_screenshot = select_screenshot_file(archive_members) if file_for_screenshot: prod_link.file_for_screenshot = file_for_screenshot prod_link.is_unresolved_for_screenshotting = False else: prod_link.is_unresolved_for_screenshotting = True prod_link.save() image_extension = prod_link.file_for_screenshot.split('.')[-1].lower() if image_extension in USABLE_IMAGE_FILE_EXTENSIONS: z = blob.as_zipfile() # we encode the filename as iso-8859-1 before retrieving it, because we # decoded it that way on insertion into the database to ensure that it had # a valid unicode string representation - see mirror/models.py try: member_buf = cStringIO.StringIO( z.read(prod_link.file_for_screenshot.encode('iso-8859-1'))) except zipfile.BadZipfile: prod_link.has_bad_image = True prod_link.save() z.close() return z.close() try: img = PILConvertibleImage( member_buf, name_hint=prod_link.file_for_screenshot) except IOError: prod_link.has_bad_image = True prod_link.save() return else: # image is not a usable format return else: try: img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1]) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot = Screenshot(production_id=production_id) basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str( production_link_id) + '.' try: upload_original(img, screenshot, basename, reduced_redundancy=True) upload_standard(img, screenshot, basename) upload_thumb(img, screenshot, basename) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot.save()
def create_screenshot_from_production_link(production_link_id): try: prod_link = ProductionLink.objects.get(id=production_link_id) except ProductionLink.DoesNotExist: # guess it was deleted in the meantime, then. return if prod_link.production.screenshots.count(): # don't create a screenshot if there's one already if prod_link.is_unresolved_for_screenshotting: prod_link.is_unresolved_for_screenshotting = False prod_link.save() return if prod_link.has_bad_image: return # don't create a screenshot if a previous attempt has failed during image processing production_id = prod_link.production_id url = prod_link.download_url blob = fetch_link(prod_link) sha1 = blob.sha1 if prod_link.is_zip_file(): # select the archive member to extract a screenshot from, if we don't have # a candidate already archive_members = ArchiveMember.objects.filter(archive_sha1=sha1) if not prod_link.file_for_screenshot: file_for_screenshot = select_screenshot_file(archive_members) if file_for_screenshot: prod_link.file_for_screenshot = file_for_screenshot prod_link.is_unresolved_for_screenshotting = False else: prod_link.is_unresolved_for_screenshotting = True prod_link.save() image_extension = prod_link.file_for_screenshot.split('.')[-1].lower() if image_extension in USABLE_IMAGE_FILE_EXTENSIONS: z = blob.as_zipfile() # we encode the filename as iso-8859-1 before retrieving it, because we # decoded it that way on insertion into the database to ensure that it had # a valid unicode string representation - see mirror/models.py try: member_buf = cStringIO.StringIO( z.read(prod_link.file_for_screenshot.encode('iso-8859-1')) ) except zipfile.BadZipfile: prod_link.has_bad_image = True prod_link.save() z.close() return z.close() try: img = PILConvertibleImage(member_buf, name_hint=prod_link.file_for_screenshot) except IOError: prod_link.has_bad_image = True prod_link.save() return else: # image is not a usable format return else: try: img = PILConvertibleImage(blob.as_io_buffer(), name_hint=url.split('/')[-1]) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot = Screenshot(production_id=production_id) basename = sha1[0:2] + '/' + sha1[2:4] + '/' + sha1[4:8] + '.pl' + str(production_link_id) + '.' try: upload_original(img, screenshot, basename, reduced_redundancy=True) upload_standard(img, screenshot, basename) upload_thumb(img, screenshot, basename) except IOError: prod_link.has_bad_image = True prod_link.save() return screenshot.save()