def find_zipped_screenshottable_graphics(): # Return a set of ProductionLink objects that link to archive files, # that we can plausibly expect to extract screenshots from, for productions that don't # have screenshots already. # prods of supertype=graphics that have download links but no screenshots from django.db.models import Count prods = Production.objects.annotate(screenshot_count=Count('screenshots')).filter( supertype='graphics', screenshot_count=0, links__is_download_link=True).prefetch_related('links', 'platforms', 'types') prod_links = [] for prod in prods: for link in prod.links.all(): if not (link.is_download_link and link.is_zip_file()): continue # skip ASCII and executable graphics if prod.types.filter(internal_name__in=['ascii', 'ascii-collection', 'ansi', 'exe-graphics', '4k-exe-graphics']): continue # skip prods for a specific platform other than DOS/Windows if prod.platforms.exclude(name__in=['MS-Dos', 'Windows']): continue file_for_screenshot = None # see if we've already got a best candidate archive member to take the image from if link.file_for_screenshot: file_for_screenshot = link.file_for_screenshot else: # failing that, see if we already have a directory listing for this download # and can derive a candidate from that download = Download.last_mirrored_download_for_url(link.download_url) if download and download.archive_members.count(): file_for_screenshot = download.select_screenshot_file() if file_for_screenshot: # we've found a candidate (which probably means we've improved select_screenshot_file # since it was last run on this archive) - might as well store it against the # ProductionLink, so it doesn't show up as something to be manually resolved link.file_for_screenshot = file_for_screenshot link.is_unresolved_for_screenshotting = False link.save() else: # we have a directory listing but no clear candidate, so give up on this link link.is_unresolved_for_screenshotting = True link.save() continue if file_for_screenshot: # we know in advance which file we'd like to extract from the archive - # better make sure it's a format we can actually handle, then. extension = link.file_for_screenshot.split('.')[-1].lower() if extension not in USABLE_IMAGE_FILE_EXTENSIONS: continue prod_links.append(link) break # success, so ignore any remaining links for this prod return prod_links
def find_zipped_screenshottable_graphics(): # Return a set of ProductionLink objects that link to archive files, # that we can plausibly expect to extract screenshots from, for productions that don't # have screenshots already. # prods of supertype=graphics that have download links but no screenshots from django.db.models import Count prods = Production.objects.annotate(screenshot_count=Count('screenshots')).filter( supertype='graphics', screenshot_count=0, links__is_download_link=True).prefetch_related('links', 'platforms', 'types') prod_links = [] for prod in prods: for link in prod.links.all(): if not (link.is_download_link and link.is_zip_file()): continue # skip ASCII and executable graphics if prod.types.filter(internal_name__in=['ascii', 'ascii-collection', 'ansi', 'exe-graphics', '4k-exe-graphics']): continue # skip prods for a specific platform other than DOS/Windows if prod.platforms.exclude(name__in=['MS-Dos', 'Windows']): continue file_for_screenshot = None # see if we've already got a best candidate archive member to take the image from if link.file_for_screenshot: file_for_screenshot = link.file_for_screenshot else: # failing that, see if we already have a directory listing for this download # and can derive a candidate from that download = Download.last_mirrored_download_for_url(link.download_url) if download and download.archive_members.count(): file_for_screenshot = download.select_screenshot_file() if file_for_screenshot: # we've found a candidate (which probably means we've improved select_screenshot_file # since it was last run on this archive) - might as well store it against the # ProductionLink, so it doesn't show up as something to be manually resolved link.file_for_screenshot = file_for_screenshot link.is_unresolved_for_screenshotting = False link.save() else: # we have a directory listing but no clear candidate, so give up on this link link.is_unresolved_for_screenshotting = True link.save() continue if file_for_screenshot: # we know in advance which file we'd like to extract from the archive - # better make sure it's a format we can actually handle, then. extension = link.file_for_screenshot.split('.')[-1].lower() if extension not in USABLE_IMAGE_FILE_EXTENSIONS: continue prod_links.append(link) break # success, so ignore any remaining links for this prod return prod_links
def unresolved_screenshots(request): links = ProductionLink.objects.filter(is_unresolved_for_screenshotting=True).select_related('production') entries = [] for link in links[:100]: download = Download.last_mirrored_download_for_url(link.download_url) if download: entries.append((link, download, download.archive_members.all())) return render(request, 'maintenance/unresolved_screenshots.html', { 'title': 'Unresolved screenshots', 'link_count': links.count(), 'entries': entries, 'report_name': 'unresolved_screenshots', })
def unresolved_screenshots(request): links = ProductionLink.objects.filter( is_unresolved_for_screenshotting=True).select_related('production') entries = [] for link in links[:100]: download = Download.last_mirrored_download_for_url(link.download_url) if download: entries.append((link, download, download.archive_members.all())) return render( request, 'maintenance/unresolved_screenshots.html', { 'title': 'Unresolved screenshots', 'link_count': links.count(), 'entries': entries, 'report_name': 'unresolved_screenshots', })
def fetch_url(url): # Fetch our mirrored copy of the given URL if available; # if not, mirror and return the original file download = Download.last_mirrored_download_for_url(url) if download: # existing download was found; fetch it return download, download.fetch_from_s3() else: # no mirrored copy exists - fetch and mirror the origin file try: remote_filename, file_content = fetch_origin_url(url) except (urllib2.URLError, FileTooBig) as ex: Download.objects.create(url=url, downloaded_at=datetime.datetime.now(), error_type=ex.__class__.__name__) raise download = upload_to_mirror(url, remote_filename, file_content) return download, file_content
def fetch_url(url): # Fetch our mirrored copy of the given URL if available; # if not, mirror and return the original file download = Download.last_mirrored_download_for_url(url) if download: # existing download was found; fetch it return download, download.fetch_from_s3() else: # no mirrored copy exists - fetch and mirror the origin file try: remote_filename, file_content = fetch_origin_url(url) except (urllib2.URLError, FileTooBig) as ex: Download.objects.create( url=url, downloaded_at=datetime.datetime.now(), error_type=ex.__class__.__name__ ) raise download = upload_to_mirror(url, remote_filename, file_content) return download, file_content