def create_perseus_zip(ccnode, exercise_data, write_to_path): with zipfile.ZipFile(write_to_path, "w") as zf: try: exercise_context = { 'exercise': json.dumps(exercise_data, sort_keys=True, indent=4) } exercise_result = render_to_string('perseus/exercise.json', exercise_context) write_to_zipfile("exercise.json", exercise_result, zf) for question in ccnode.assessment_items.prefetch_related('files').all().order_by('order'): try: for image in question.files.filter(preset_id=format_presets.EXERCISE_IMAGE).order_by('checksum'): image_name = "images/{}.{}".format(image.checksum, image.file_format_id) if image_name not in zf.namelist(): with storage.open(ccmodels.generate_object_storage_name(image.checksum, str(image)), 'rb') as content: write_to_zipfile(image_name, content.read(), zf) for image in question.files.filter(preset_id=format_presets.EXERCISE_GRAPHIE).order_by('checksum'): svg_name = "images/{0}.svg".format(image.original_filename) json_name = "images/{0}-data.json".format(image.original_filename) if svg_name not in zf.namelist() or json_name not in zf.namelist(): with storage.open(ccmodels.generate_object_storage_name(image.checksum, str(image)), 'rb') as content: content = content.read() content = content.split(exercises.GRAPHIE_DELIMITER) write_to_zipfile(svg_name, content[0], zf) write_to_zipfile(json_name, content[1], zf) write_assessment_item(question, zf) except Exception as e: logging.error("Publishing error: {}".format(str(e))) finally: zf.close()
def create_perseus_zip(ccnode, exercise_data, write_to_path): with zipfile.ZipFile(write_to_path, "w") as zf: try: exercise_context = { 'exercise': json.dumps(exercise_data, sort_keys=True, indent=4) } exercise_result = render_to_string('perseus/exercise.json', exercise_context) write_to_zipfile("exercise.json", exercise_result, zf) for question in ccnode.assessment_items.prefetch_related( 'files').all().order_by('order'): try: for image in question.files.filter( preset_id=format_presets.EXERCISE_IMAGE).order_by( 'checksum'): image_name = "images/{}.{}".format( image.checksum, image.file_format_id) if image_name not in zf.namelist(): with storage.open( ccmodels.generate_object_storage_name( image.checksum, str(image)), 'rb') as content: write_to_zipfile(image_name, content.read(), zf) for image in question.files.filter( preset_id=format_presets.EXERCISE_GRAPHIE ).order_by('checksum'): svg_name = "images/{0}.svg".format( image.original_filename) json_name = "images/{0}-data.json".format( image.original_filename) if svg_name not in zf.namelist( ) or json_name not in zf.namelist(): with storage.open( ccmodels.generate_object_storage_name( image.checksum, str(image)), 'rb') as content: content = content.read() # in Python 3, delimiter needs to be in bytes format content = content.split( exercises.GRAPHIE_DELIMITER.encode( 'ascii')) write_to_zipfile(svg_name, content[0], zf) write_to_zipfile(json_name, content[1], zf) write_assessment_item(question, zf) except Exception as e: logging.error("Publishing error: {}".format(str(e))) logging.error(traceback.format_exc()) # In production, these errors have historically been handled silently. # Retain that behavior for now, but raise an error locally so we can # better understand the cases in which this might happen. report_exception(e) if os.environ.get('BRANCH_ENVIRONMENT', '') != "master": raise finally: zf.close()
def test_can_upload_file_to_presigned_url(self): """ Test that we can get a 200 OK when we upload a file to the URL returned by get_presigned_upload_url. """ file_contents = b"blahfilecontents" file = BytesIO(file_contents) # S3 expects a base64-encoded MD5 checksum md5 = hashlib.md5(file_contents) md5_checksum = md5.hexdigest() md5_checksum_base64 = codecs.encode(codecs.decode(md5_checksum, "hex"), "base64").decode() filename = "blahfile.jpg" filepath = generate_object_storage_name(md5_checksum, filename) ret = get_presigned_upload_url(filepath, md5_checksum_base64, 1000, len(file_contents)) url = ret["uploadURL"] content_type = ret["mimetype"] resp = requests.put(url, data=file, headers={ "Content-Type": content_type, }) resp.raise_for_status()
def map_files_to_assessment_item(user, assessment_item, data): """ Generate files referenced in given assesment item (a.k.a. question). """ if settings.DEBUG: # assert that our parameters match expected values assert isinstance(user, User) assert isinstance(assessment_item, AssessmentItem) assert isinstance(data, list) # filter out file that are empty valid_data = filter_out_nones(data) for file_data in valid_data: filename = file_data["filename"] checksum, ext = filename.split(".") file_path = generate_object_storage_name(checksum, filename) storage = default_storage if not storage.exists(file_path): raise IOError('{} not found'.format(file_path)) resource_obj = File( checksum=checksum, assessment_item=assessment_item, file_format_id=ext, original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size=file_data['size'], preset_id=file_data["preset"], # assessment_item-files always have a preset uploaded_by=user, ) resource_obj.file_on_disk.name = file_path resource_obj.save()
def map_files_to_assessment_item(user, assessment_item, data): """ Generate files referenced in given assesment item (a.k.a. question). """ if settings.DEBUG: # assert that our parameters match expected values assert isinstance(user, User) assert isinstance(assessment_item, AssessmentItem) assert isinstance(data, list) # filter out file that are empty valid_data = filter_out_nones(data) for file_data in valid_data: filename = file_data["filename"] checksum, ext = filename.split(".") file_path = generate_object_storage_name(checksum, filename) storage = default_storage if not storage.exists(file_path): raise IOError('{} not found'.format(file_path)) resource_obj = File( checksum=checksum, assessment_item=assessment_item, file_format_id=ext, original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size=file_data['size'], preset_id=file_data[ "preset"], # assessment_item-files always have a preset uploaded_by=user, ) resource_obj.file_on_disk.name = file_path resource_obj.save()
def map_files_to_slideshow_slide_item(user, node, slides, files): """ Generate files referenced in given slideshow slide """ for file_data in files: filename = file_data["filename"] checksum, ext = filename.split(".") matching_slide = next( (slide for slide in slides if slide.metadata["checksum"] == checksum), None) if not matching_slide: # TODO(Jacob) Determine proper error type... raise it. print("NO MATCH") file_path = generate_object_storage_name(checksum, filename) storage = default_storage if not storage.exists(file_path): raise IOError('{} not found'.format(file_path)) file_obj = File(slideshow_slide=matching_slide, checksum=checksum, file_format_id=ext, original_filename=file_data.get("original_filename") or "file", source_url=file_data.get("source_url"), file_size=file_data["size"], preset_id=file_data["preset"], uploaded_by=user) file_obj.file_on_disk.name = file_path file_obj.save()
def update(self, instance, validated_data): ret = [] nodes_to_check = [] with transaction.atomic(): # Get files that have the same contentnode, preset, and language as the files that are now attached to this node for item in validated_data: file_obj = File.objects.get(pk=item['id']) file_obj.language_id = item.get('language') and item['language']['id'] file_obj.contentnode = item['contentnode'] if item['contentnode'] not in nodes_to_check: nodes_to_check.append(item['contentnode']) # Make sure file exists file_path = generate_object_storage_name(file_obj.checksum, str(file_obj)) if not default_storage.exists(file_path): raise OSError("Error: file {} was not found".format(str(file_obj))) # Replace existing files files_to_replace = item['contentnode'].files.exclude(pk=file_obj.pk)\ .filter(preset_id=file_obj.preset_id, language_id=file_obj.language_id) files_to_replace.delete() file_obj.save() ret.append(file_obj) # Remove items that are not in the validated data (file has been removed) for node in nodes_to_check: file_ids = [f['id'] for f in validated_data if f['contentnode'].pk == node.pk] node.files.exclude(pk__in=file_ids).delete() return ret
def process_image_strings(content, zf): image_list = [] content = content.replace(exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR) for match in re.finditer(ur'!\[(?:[^\]]*)]\(([^\)]+)\)', content): img_match = re.search( ur'(.+/images/[^\s]+)(?:\s=([0-9\.]+)x([0-9\.]+))*', match.group(1)) if img_match: # Add any image files that haven't been written to the zipfile filename = img_match.group(1).split('/')[-1] checksum, ext = os.path.splitext(filename) image_name = "images/{}.{}".format(checksum, ext[1:]) if image_name not in zf.namelist(): with storage.open( ccmodels.generate_object_storage_name( checksum, filename), 'rb') as imgfile: write_to_zipfile(image_name, imgfile.read(), zf) # Add resizing data if img_match.group(2) and img_match.group(3): image_data = {'name': img_match.group(1)} image_data.update({'width': float(img_match.group(2))}) image_data.update({'height': float(img_match.group(3))}) image_list.append(image_data) content = content.replace(match.group(1), img_match.group(1))
def write_file_to_storage(fobj, check_valid=False, name=None): fobj.seek(0) # Make sure reading file from beginning # Check that hash is valid checksum = hashlib.md5() for chunk in iter(lambda: fobj.read(4096), b""): checksum.update(chunk) name = name or fobj._name or "" filename, ext = os.path.splitext(name) hashed_filename = checksum.hexdigest() full_filename = "{}{}".format(hashed_filename, ext.lower()) fobj.seek(0) if check_valid and hashed_filename != filename: raise SuspiciousOperation("Failed to upload file {0}: hash is invalid".format(name)) # Get location of file file_path = models.generate_object_storage_name(hashed_filename, full_filename) # Write file storage = default_storage if storage.exists(file_path): logging.info("{} exists in Google Cloud Storage, so it's not saved again.".format(file_path)) else: storage.save(file_path, fobj) return full_filename
def convert_channel_thumbnail(channel): """ encode_thumbnail: gets base64 encoding of thumbnail Args: thumbnail (str): file path or url to channel's thumbnail Returns: base64 encoding of thumbnail """ encoding = None if not channel.thumbnail or channel.thumbnail == '' or 'static' in channel.thumbnail: return "" if channel.thumbnail_encoding: thumbnail_data = ast.literal_eval(channel.thumbnail_encoding) if thumbnail_data.get("base64"): return thumbnail_data["base64"] checksum, ext = os.path.splitext(channel.thumbnail) with storage.open( ccmodels.generate_object_storage_name(checksum, channel.thumbnail), 'rb') as file_obj: with Image.open(file_obj) as image, tempfile.NamedTemporaryFile( suffix=ext, delete=False) as tempf: cover = resizeimage.resize_cover( image, [THUMBNAIL_DIMENSION, THUMBNAIL_DIMENSION]) cover.save(tempf.name, image.format) encoding = base64.b64encode(tempf.read()).decode('utf-8') tempname = tempf.name os.unlink(tempname) return "data:image/png;base64," + encoding
def setUp(self): super(FileThumbnailTestCase, self).setUp() self.thumbnail_fobj = create_thumbnail_from_base64(base64encoding()) filepath = generate_object_storage_name(self.thumbnail_fobj.checksum, str(self.thumbnail_fobj)) with default_storage.open(filepath, 'rb') as fobj: self.thumbnail_contents = fobj.read()
def download_file(filename, download_url=None, contentnode=None, assessment_item=None, preset=None, file_size=None, lang_id=None): checksum, extension = os.path.splitext(filename) extension = extension.lstrip('.') filepath = models.generate_object_storage_name(checksum, filename) # Download file if it hasn't already been downloaded if download_url and not default_storage.exists(filepath): buffer = BytesIO() response = requests.get('{}/content/storage/{}/{}/{}'.format( download_url, filename[0], filename[1], filename)) for chunk in response: buffer.write(chunk) checksum, _, filepath = write_raw_content_to_storage(buffer.getvalue(), ext=extension) buffer.close() # Save values to new file object file_obj = models.File( file_format_id=extension, file_size=file_size or default_storage.size(filepath), contentnode=contentnode, assessment_item=assessment_item, language_id=lang_id, preset_id=preset or "", ) file_obj.file_on_disk.name = filepath file_obj.save()
def fileobj_video(contents=None): """ Create an "mp4" video file on storage, and then create a File model pointing to it. if contents is given and is a string, then write said contents to the file. If not given, a random string is generated and set as the contents of the file. """ if contents: filecontents = contents else: filecontents = "".join(random.sample(string.printable, 20)) fileobj = StringIO(filecontents) digest = md5.new(filecontents).hexdigest() filename = "{}.mp4".format(digest) storage_file_path = cc.generate_object_storage_name(digest, filename) # Write out the file bytes on to object storage, with a filename specified with randomfilename default_storage.save(storage_file_path, fileobj) # then create a File object with that db_file_obj = mixer.blend(cc.File, file_format=fileformat_mp4(), preset=preset_video(), file_on_disk=storage_file_path) yield db_file_obj
def create_temp_file(filebytes, kind='text', ext='txt', mimetype='text/plain'): """ Create a file and store it in Django's object db temporarily for tests. :param filebytes: The data to be stored in the file, as a series of bytes :param kind: String identifying the kind of file :param ext: File extension, omitting the initial period :param mimetype: Mimetype of the file :return: A dict containing the keys name (filename), data (actual bytes), file (StringIO obj) and db_file (File object in db) of the temp file. """ fileobj = StringIO(filebytes) checksum = hashlib.md5(filebytes) digest = checksum.hexdigest() filename = "{}.{}".format(digest, ext) storage_file_path = cc.generate_object_storage_name(digest, filename) # Write out the file bytes on to object storage, with a filename specified with randomfilename default_storage.save(storage_file_path, fileobj) assert default_storage.exists(storage_file_path) file_kind = mixer.blend(cc.ContentKind, kind=kind) file_format = mixer.blend(cc.FileFormat, extension=ext, mimetype=mimetype) preset = mixer.blend(cc.FormatPreset, id=ext, kind=file_kind) # then create a File object with that db_file_obj = mixer.blend(cc.File, file_format=file_format, preset=preset, file_on_disk=storage_file_path) return {'name': os.path.basename(storage_file_path), 'data': filebytes, 'file': fileobj, 'db_file': db_file_obj}
def create_test_file(filebytes, ext='pdf'): """ Create a temporary file with contents of `filebytes` for use in tests. :param filebytes: The data to be stored in the file (as bytes) :param ext: File extension, omitting the initial period Returns a dict containing the following: - checksum (str): md5 hash of file contents - name (str): the filename within the content storage system (= checksum + . + ext ) - storagepath (str): the relative storage path for this file storage/c/h/checksum.ext - data (bytes): file content (echo of `filebytes`) - file (file): an instance of TemporaryFile object that you can read/write """ hash = hashlib.md5(filebytes) checksum = hash.hexdigest() filename = "{}.{}".format(checksum, ext) storage_file_path = cc.generate_object_storage_name(checksum, filename) fileobj = TemporaryFile() fileobj.write(filebytes) fileobj.seek(0) return { 'checksum': checksum, 'name': os.path.basename(storage_file_path), 'storagepath': storage_file_path, 'data': filebytes, 'file': fileobj }
def create_temp_file(filebytes, preset='document', ext='pdf', original_filename=None): """ Create a file and store it in Django's object db temporarily for tests. :param filebytes: The data to be stored in the file (as bytes) :param preset: String identifying the format preset (defaults to ``document``) :param ext: File extension, omitting the initial period :param original_filename: Original filename (needed for exercise_images) :return: A dict containing the keys name (filename), data (actual bytes), file (StringIO obj) and db_file (File object in db) of the temp file. """ fileobj = StringIO(filebytes) hash = hashlib.md5(filebytes) checksum = hash.hexdigest() filename = "{}.{}".format(checksum, ext) storage_file_path = cc.generate_object_storage_name(checksum, filename) # 1. Write out the file bytes on to object storage default_storage.save(storage_file_path, fileobj) assert default_storage.exists(storage_file_path) # 2. Get the minimum required Studio meta fields for a File object preset = cc.FormatPreset.objects.get(id=preset) file_format = cc.FileFormat.objects.get(extension=ext) if original_filename is None: original_filename = 'somefile.' + ext # 3. Create a File object db_file_obj = mixer.blend(cc.File, checksum=checksum, file_format=file_format, preset=preset, original_filename=original_filename, file_on_disk=storage_file_path) return {'name': os.path.basename(storage_file_path), 'data': filebytes, 'file': fileobj, 'db_file': db_file_obj}
def setUp(self): super(GetFileDiffTestCase, self).setUp() storage = default_storage # Upload some pieces of content, as our test data self.existing_content = "dowereallyexist.jpg" self.existing_content_path = generate_object_storage_name("dowereallyexist", self.existing_content) storage.save(self.existing_content_path, StringIO("maybe"))
def check_file_url(f): filepath = generate_object_storage_name(os.path.splitext(f)[0], f) url = "/".join([ settings.AWS_S3_ENDPOINT_URL, settings.AWS_S3_BUCKET_NAME, filepath ]) resp = session.head(url) if resp.status_code != 200: ret.append(f)
def map_files_to_node(user, node, data): """ Generate files that reference the content node. """ if settings.DEBUG: # assert that our parameters match expected values assert isinstance(user, User) assert isinstance(node, ContentNode) assert isinstance(data, list) # filter out file that are empty valid_data = filter_out_nones(data) for file_data in valid_data: filename = file_data["filename"] checksum, ext1 = os.path.splitext(filename) ext = ext1.lstrip(".") # Determine a preset if none is given kind_preset = FormatPreset.get_preset(file_data["preset"]) or FormatPreset.guess_format_preset(filename) file_path = generate_object_storage_name(checksum, filename) storage = default_storage if not storage.exists(file_path): raise IOError('{} not found'.format(file_path)) try: if file_data.get('language'): # TODO: Remove DB call per file? file_data['language'] = Language.objects.get(pk=file_data['language']) except ObjectDoesNotExist: invalid_lang = file_data.get('language') logging.warning("file_data with language {} does not exist.".format(invalid_lang)) return ValidationError("file_data given was invalid; expected string, got {}".format(invalid_lang)) resource_obj = File( checksum=checksum, contentnode=node, file_format_id=ext, original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size=file_data['size'], preset=kind_preset, language_id=file_data.get('language'), uploaded_by=user, ) resource_obj.file_on_disk.name = file_path resource_obj.save() # Handle thumbnail if resource_obj.preset and resource_obj.preset.thumbnail: node.thumbnail_encoding = json.dumps({ 'base64': get_thumbnail_encoding(str(resource_obj)), 'points': [], 'zoom': 0 }) node.save()
def setUp(self): super(GetFileDiffTestCase, self).setUp() storage = default_storage # Upload some pieces of content, as our test data self.existing_content = "dowereallyexist.jpg" self.existing_content_path = generate_object_storage_name( "dowereallyexist", self.existing_content) storage.save(self.existing_content_path, StringIO("maybe"))
def create_studio_file(filebytes, preset='document', ext='pdf', original_filename=None): """ Create a file with contents of `filebytes` and the associated cc.File object for it. :param filebytes: The data to be stored in the file (as bytes) :param preset: String identifying the format preset (defaults to ``document``) :param ext: File extension, omitting the initial period :param original_filename: Original filename (needed for exercise_images) Returns a dict containing the following: - name (str): the filename within the content storage system (= md5 hash of the contents + .ext ) - data (bytes): file content (echo of `filebytes`) - file (file): a basic BytesIO file-like object that you can read/write - db_file (cc.File): a Studio File object saved in DB """ try: filebytes = filebytes.encode('utf-8') except: # noqa pass fileobj = BytesIO(filebytes) # Every time the BytesIO object is read from or appended to, we need to reset the seek position, # otherwise, it will start reading from the end of the file. fileobj.seek(0) hash = hashlib.md5(filebytes) checksum = hash.hexdigest() filename = "{}.{}".format(checksum, ext) storage_file_path = cc.generate_object_storage_name(checksum, filename) # 1. Write out the file bytes on to object storage fileobj.seek(0) default_storage.save(storage_file_path, fileobj) fileobj.seek(0) assert default_storage.exists(storage_file_path) # 2. Get the minimum required Studio meta fields for a File object preset = cc.FormatPreset.objects.get(id=preset) file_format = cc.FileFormat.objects.get(extension=ext) if original_filename is None: original_filename = 'somefile.' + ext # 3. Create a File object db_file_obj = mixer.blend(cc.File, checksum=checksum, file_format=file_format, preset=preset, original_filename=original_filename, file_on_disk=storage_file_path) return { 'name': os.path.basename(storage_file_path), 'data': filebytes, 'file': fileobj, 'db_file': db_file_obj }
def test_generate_object_storage_name(object_storage_name_tests): for filename, checksum, file_format_id, expected_name in object_storage_name_tests: default_ext = '' if file_format_id: default_ext = '.{}'.format(file_format_id) actual_name = generate_object_storage_name(checksum, filename, default_ext) assert actual_name == expected_name, \ "Storage names don't match: Expected: '{}' Actual '{}'".format(expected_name, actual_name)
def debug_serve_file(request, path): # There's a problem with loading exercise images, so use this endpoint # to serve the image files to the /content/storage url filename = os.path.basename(path) checksum, _ext = os.path.splitext(filename) filepath = generate_object_storage_name(checksum, filename) if not default_storage.exists(filepath): raise Http404("The object requested does not exist.") with default_storage.open(filepath, 'rb') as fobj: response = HttpResponse(FileWrapper(fobj), content_type="application/octet-stream") return response
def map_files_to_node(user, node, data): """ Generate files that reference the content node """ # filter for file data that's not empty; valid_data = (d for d in data if d) for file_data in valid_data: file_name_parts = file_data['filename'].split(".") # Determine a preset if none is given kind_preset = None if file_data['preset'] is None: kind_preset = FormatPreset.objects.filter( kind=node.kind, allowed_formats__extension__contains=file_name_parts[1], display=True).first() else: kind_preset = FormatPreset.objects.get(id=file_data['preset']) file_path = generate_object_storage_name(file_name_parts[0], file_data['filename']) storage = default_storage if not storage.exists(file_path): return IOError('{} not found'.format(file_path)) try: if file_data.get('language'): # TODO: Remove DB call per file? file_data['language'] = Language.objects.get( pk=file_data['language']) except ObjectDoesNotExist as e: invalid_lang = file_data.get('language') logging.warning( "file_data with language {} does not exist.".format( invalid_lang)) return ValidationError( "file_data given was invalid; expected string, got {}".format( invalid_lang)) resource_obj = File( checksum=file_name_parts[0], contentnode=node, file_format_id=file_name_parts[1], original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size=file_data['size'], file_on_disk=DjFile(storage.open(file_path, 'rb')), preset=kind_preset, language_id=file_data.get('language'), uploaded_by=user, ) resource_obj.file_on_disk.name = file_path resource_obj.save()
def create_files(cursor, contentnode, indent=0, download_url=None): """ create_files: Get license Args: cursor (sqlite3.Connection): connection to export database contentnode (models.ContentNode): node file references indent (int): How far to indent print statements Returns: None """ # Parse database for files referencing content node and make file models sql_command = 'SELECT checksum, extension, file_size, contentnode_id, '\ 'lang_id, preset FROM {table} WHERE contentnode_id=\'{id}\';'\ .format(table=FILE_TABLE, id=contentnode.node_id) query = cursor.execute(sql_command).fetchall() for checksum, extension, file_size, contentnode_id, lang_id, preset in query: filename = "{}.{}".format(checksum, extension) print("{indent} * FILE {filename}...".format(indent=" |" * indent, filename=filename)) try: filepath = models.generate_object_storage_name(checksum, filename) # Download file first if download_url and not default_storage.exists(filepath): buffer = StringIO() response = requests.get('{}/content/storage/{}/{}/{}'.format( download_url, filename[0], filename[1], filename)) for chunk in response: buffer.write(chunk) create_file_from_contents(buffer.getvalue(), ext=extension, node=contentnode, preset_id=preset or "") buffer.close() else: # Save values to new or existing file object file_obj = models.File( file_format_id=extension, file_size=file_size, contentnode=contentnode, language_id=lang_id, preset_id=preset or "", ) file_obj.file_on_disk.name = filepath file_obj.save() except IOError as e: logging.warning("\b FAILED (check logs for more details)") sys.stderr.write( "Restoration Process Error: Failed to save file object {}: {}". format(filename, os.strerror(e.errno))) continue
def get_thumbnail_encoding(filename, dimension=THUMBNAIL_DIMENSION): """ Generates a base64 encoding for a thumbnail Args: filename (str): thumbnail to generate encoding from (must be in storage already) dimension (int): how big resized image should be Returns base64 encoding of resized thumbnail """ if filename.startswith("data:image"): return filename checksum, ext = os.path.splitext(filename.split("?")[0]) inbuffer = StringIO.StringIO() outbuffer = StringIO.StringIO() try: if not filename.startswith(settings.STATIC_ROOT): filename = generate_object_storage_name(checksum, filename) with default_storage.open(filename) as localtempf: inbuffer.write(localtempf.read()) else: with open(filename, 'rb') as fobj: inbuffer.write(fobj.read()) with Image.open(inbuffer) as image: image_format = image.format width, height = image.size dimension = min([dimension, width, height]) size = [dimension, dimension] ratio = Fraction(*size) # Crop image the aspect ratio is different if width > ratio * height: x, y = (width - ratio * height) // 2, 0 else: x, y = 0, (height - width / ratio) // 2 image = image.crop((x, y, width - x, height - y)) if image.size > size: image.thumbnail(size, Image.ANTIALIAS) else: image.thumbnail((dimension, dimension), Image.ANTIALIAS) image.save(outbuffer, image_format) return "data:image/{};base64,{}".format( ext[1:], base64.b64encode(outbuffer.getvalue())) finally: inbuffer.close() outbuffer.close()
def get_thumbnail_encoding(filename, dimension=THUMBNAIL_DIMENSION): """ Generates a base64 encoding for a thumbnail Args: filename (str): thumbnail to generate encoding from (must be in storage already) dimension (int): how big resized image should be Returns base64 encoding of resized thumbnail """ if filename.startswith("data:image"): return filename checksum, ext = os.path.splitext(filename.split("?")[0]) inbuffer = StringIO.StringIO() outbuffer = StringIO.StringIO() try: if not filename.startswith(settings.STATIC_ROOT): filename = generate_object_storage_name(checksum, filename) with default_storage.open(filename) as localtempf: inbuffer.write(localtempf.read()) else: with open(filename, 'rb') as fobj: inbuffer.write(fobj.read()) with Image.open(inbuffer) as image: image_format = image.format width, height = image.size dimension = min([dimension, width, height]) size = [dimension, dimension] ratio = Fraction(*size) # Crop image the aspect ratio is different if width > ratio * height: x, y = (width - ratio * height) // 2, 0 else: x, y = 0, (height - width / ratio) // 2 image = image.crop((x, y, width - x, height - y)) if image.size > size: image.thumbnail(size, Image.ANTIALIAS) else: image.thumbnail((dimension, dimension), Image.ANTIALIAS) image.save(outbuffer, image_format) return "data:image/{};base64,{}".format(ext[1:], base64.b64encode(outbuffer.getvalue())) finally: inbuffer.close() outbuffer.close()
def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH): """ Generates a base64 encoding for a thumbnail Args: filename (str): thumbnail to generate encoding from (must be in storage already) dimension (int, optional): desired width of thumbnail. Defaults to 400. Returns base64 encoding of resized thumbnail """ if filename.startswith("data:image"): return filename checksum, ext = os.path.splitext(filename.split("?")[0]) outbuffer = BytesIO() # make sure the aspect ratio between width and height is 16:9 thumbnail_size = [dimension, round(dimension / 1.77)] try: if not filename.startswith(settings.STATIC_ROOT): filename = generate_object_storage_name(checksum, filename) inbuffer = default_storage.open(filename, 'rb') else: inbuffer = open(filename, 'rb') assert inbuffer with Image.open(inbuffer) as image: image_format = image.format # Note: Image.thumbnail ensures that the image will fit in the # specified thumbnail size, but it retains the original image's # aspect ratio. So a square image will remain square rather # than being distorted to a 16:9 aspect ratio. This removes # the need to make any changes like cropping the image. image.thumbnail(thumbnail_size, Image.ANTIALIAS) image.save(outbuffer, image_format) return "data:image/{};base64,{}".format( ext[1:], base64.b64encode(outbuffer.getvalue()).decode('utf-8')) finally: # Try to close the inbuffer if it has been created try: inbuffer.close() except UnboundLocalError: pass outbuffer.close()
def write_raw_content_to_storage(contents, ext=None): # Check that hash is valid checksum = hashlib.md5() checksum.update(contents) hashed_filename = checksum.hexdigest() full_filename = "{}.{}".format(hashed_filename, ext.lower()) # Get location of file file_path = models.generate_object_storage_name(hashed_filename, full_filename) # Write file storage = default_storage storage.save(file_path, StringIO(contents)) return hashed_filename, full_filename, file_path
def get_file_diff(files): """Given a list of filenames as strings, find the filenames that aren't in our storage, and return. """ storage = default_storage # Try to be storage system agnostic, in case we're using either the Object Storage, # or FileSystemStorage ret = [] for f in files: filepath = generate_object_storage_name(os.path.splitext(f)[0], f) if not storage.exists(filepath) or storage.size(filepath) == 0: ret.append(f) return ret
def upload_url(self, request): try: size = request.data["size"] checksum = request.data["checksum"] filename = request.data["name"] file_format = request.data["file_format"] preset = request.data["preset"] except KeyError: raise HttpResponseBadRequest( reason= "Must specify: size, checksum, name, file_format, and preset") try: request.user.check_space(float(size), checksum) except PermissionDenied as e: return HttpResponseBadRequest(reason=str(e), status=418) might_skip = File.objects.filter(checksum=checksum).exists() filepath = generate_object_storage_name(checksum, filename) checksum_base64 = codecs.encode(codecs.decode(checksum, "hex"), "base64").decode() retval = get_presigned_upload_url(filepath, checksum_base64, 600, content_length=size) file = File( file_size=size, checksum=checksum, original_filename=filename, file_on_disk=filepath, file_format_id=file_format, preset_id=preset, uploaded_by=request.user, ) # Avoid using our file_on_disk attribute for checks file.save(set_by_file_on_disk=False) retval.update({ "might_skip": might_skip, "file": self.serialize_object(id=file.id) }) return Response(retval)
def write_raw_content_to_storage(contents, ext=None): # Check that hash is valid checksum = hashlib.md5() checksum.update(contents) hashed_filename = checksum.hexdigest() full_filename = "{}.{}".format(hashed_filename, ext.lower()) # Get location of file file_path = models.generate_object_storage_name(hashed_filename, full_filename) # Write file storage = default_storage if storage.exists(file_path): logging.info("{} exists in Google Cloud Storage, so it's not saved again.".format(file_path)) else: storage.save(file_path, StringIO(contents)) return hashed_filename, full_filename, file_path
def create_temp_file(filebytes, preset='document', ext='pdf', original_filename=None): """ Create a file and store it in Django's object db temporarily for tests. :param filebytes: The data to be stored in the file (as bytes) :param preset: String identifying the format preset (defaults to ``document``) :param ext: File extension, omitting the initial period :param original_filename: Original filename (needed for exercise_images) :return: A dict containing the keys name (filename), data (actual bytes), file (StringIO obj) and db_file (File object in db) of the temp file. """ fileobj = StringIO(filebytes) hash = hashlib.md5(filebytes) checksum = hash.hexdigest() filename = "{}.{}".format(checksum, ext) storage_file_path = cc.generate_object_storage_name(checksum, filename) # 1. Write out the file bytes on to object storage default_storage.save(storage_file_path, fileobj) assert default_storage.exists(storage_file_path) # 2. Get the minimum required Studio meta fields for a File object preset = cc.FormatPreset.objects.get(id=preset) file_format = cc.FileFormat.objects.get(extension=ext) if original_filename is None: original_filename = 'somefile.' + ext # 3. Create a File object db_file_obj = mixer.blend(cc.File, checksum=checksum, file_format=file_format, preset=preset, original_filename=original_filename, file_on_disk=storage_file_path) return { 'name': os.path.basename(storage_file_path), 'data': filebytes, 'file': fileobj, 'db_file': db_file_obj }
def process_image_strings(content, zf): image_list = [] content = content.replace(exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR) for match in re.finditer(ur'!\[(?:[^\]]*)]\(([^\)]+)\)', content): img_match = re.search(ur'(.+/images/[^\s]+)(?:\s=([0-9\.]+)x([0-9\.]+))*', match.group(1)) if img_match: # Add any image files that haven't been written to the zipfile filename = img_match.group(1).split('/')[-1] checksum, ext = os.path.splitext(filename) image_name = "images/{}.{}".format(checksum, ext[1:]) if image_name not in zf.namelist(): with storage.open(ccmodels.generate_object_storage_name(checksum, filename), 'rb') as imgfile: write_to_zipfile(image_name, imgfile.read(), zf) # Add resizing data if img_match.group(2) and img_match.group(3): image_data = {'name': img_match.group(1)} image_data.update({'width': float(img_match.group(2))}) image_data.update({'height': float(img_match.group(3))}) image_list.append(image_data) content = content.replace(match.group(1), img_match.group(1))
def get_node_data_from_file(file_name): file_path = generate_object_storage_name(file_name.split('.')[0], file_name) if not default_storage.exists(file_path): raise IOError('{} not found.'.format(file_path)) with default_storage.open(file_path, 'rb') as file_obj: node_data = json.loads(file_obj.read().decode('utf-8')) if node_data is None: raise IOError('{} is empty or could not be read.'.format(file_path)) # Make sure license is valid license = None license_name = node_data['license'] if license_name is not None: try: license = License.objects.get(license_name__iexact=license_name) except ObjectDoesNotExist: raise ObjectDoesNotExist('Invalid license found') node_data['license'] = license return node_data
def map_files_to_assessment_item(user, question, data): """ Generate files that reference the content node's assessment items """ for file_data in data: file_name_parts = file_data['filename'].split(".") file_path = generate_object_storage_name(file_name_parts[0], file_data['filename']) if not os.path.isfile(file_path): return IOError('{} not found'.format(file_path)) resource_obj = File( checksum=file_name_parts[0], assessment_item=question, file_format_id=file_name_parts[1], original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size=file_data['size'], file_on_disk=DjFile(open(file_path, 'rb')), preset_id=file_data['preset'], uploaded_by=user, ) resource_obj.file_on_disk.name = file_path resource_obj.save()
def fileobj_video(contents=None): """ Create an "mp4" video file on storage, and then create a File model pointing to it. if contents is given and is a string, then write said contents to the file. If not given, a random string is generated and set as the contents of the file. """ if contents: filecontents = contents else: filecontents = "".join(random.sample(string.printable, 20)) fileobj = StringIO(filecontents) digest = md5.new(filecontents).hexdigest() filename = "{}.mp4".format(digest) storage_file_path = cc.generate_object_storage_name(digest, filename) # Write out the file bytes on to object storage, with a filename specified with randomfilename default_storage.save(storage_file_path, fileobj) # then create a File object with that db_file_obj = mixer.blend(cc.File, file_format=fileformat_mp4(), preset=preset_video(), file_on_disk=storage_file_path) return db_file_obj
def check_file_url(f): filepath = generate_object_storage_name(os.path.splitext(f)[0], f) url = "/".join([settings.AWS_S3_ENDPOINT_URL, settings.AWS_S3_BUCKET_NAME, filepath]) resp = session.head(url) if resp.status_code != 200: ret.append(f)
def get(self, request, zipped_filename, embedded_filepath): """ Handles GET requests and serves a static file from within the zip file. """ if not VALID_STORAGE_FILENAME.match(zipped_filename): return HttpResponseNotFound("'{}' is not a valid URL for this zip file".format(zipped_filename)) storage = default_storage # calculate the local file path to the zip file filename, ext = os.path.splitext(zipped_filename) zipped_path = generate_object_storage_name(filename, zipped_filename) # file size file_size = 0 # if the zipfile does not exist on disk, return a 404 if not storage.exists(zipped_path): return HttpResponseNotFound('"%(filename)s" does not exist in storage' % {'filename': zipped_path}) # if client has a cached version, use that (we can safely assume nothing has changed, due to MD5) if request.META.get('HTTP_IF_MODIFIED_SINCE'): return HttpResponseNotModified() zf_obj = storage.open(zipped_path) try: with zipfile.ZipFile(zf_obj) as zf: # if no path, or a directory, is being referenced, look for an index.html file if not embedded_filepath or embedded_filepath.endswith("/"): embedded_filepath += "index.html" # get the details about the embedded file, and ensure it exists try: info = zf.getinfo(embedded_filepath) except KeyError: return HttpResponseNotFound('"{}" does not exist inside "{}"'.format(embedded_filepath, zipped_filename)) # try to guess the MIME type of the embedded file being referenced content_type = mimetypes.guess_type(embedded_filepath)[0] or 'application/octet-stream' if not os.path.splitext(embedded_filepath)[1] == '.json': # generate a streaming response object, pulling data from within the zip file response = FileResponse(zf.open(info), content_type=content_type) file_size = info.file_size else: # load the stream from json file into memory, replace the path_place_holder. content = zf.open(info).read() str_to_be_replaced = ('$' + exercises.IMG_PLACEHOLDER).encode() zipcontent = ('/' + request.resolver_match.url_name + "/" + zipped_filename).encode() content_with_path = content.replace(str_to_be_replaced, zipcontent) response = HttpResponse(content_with_path, content_type=content_type) file_size = len(content_with_path) except zipfile.BadZipfile: just_downloaded = getattr(zf_obj, 'just_downloaded', "Unknown (Most likely local file)") client.captureMessage("Unable to open zip file. File info: name={}, size={}, mode={}, just_downloaded={}".format( zf_obj.name, zf_obj.size, zf_obj.mode, just_downloaded)) return HttpResponseServerError("Attempt to open zip file failed. Please try again, and if you continue to receive this message, please check that the zip file is valid.") # set the last-modified header to the date marked on the embedded file if info.date_time: response["Last-Modified"] = http_date(time.mktime(datetime.datetime(*info.date_time).timetuple())) # cache these resources forever; this is safe due to the MD5-naming used on content files response["Expires"] = "Sun, 17-Jan-2038 19:14:07 GMT" # set the content-length header to the size of the embedded file if file_size: response["Content-Length"] = file_size # ensure the browser knows not to try byte-range requests, as we don't support them here response["Accept-Ranges"] = "none" _add_access_control_headers(request, response) # restrict CSP to only allow resources to be loaded from the Studio host, to prevent info leakage # (e.g. via passing user info out as GET parameters to an attacker's server), or inadvertent data usage host = request.build_absolute_uri('/').strip("/") response["Content-Security-Policy"] = "default-src 'self' 'unsafe-inline' 'unsafe-eval' data: " + host return response