def create_perseus_zip(ccnode, exercise_data, write_to_path): with zipfile.ZipFile(write_to_path, "w") as zf: try: exercise_context = { 'exercise': json.dumps(exercise_data, sort_keys=True, indent=4) } exercise_result = render_to_string('perseus/exercise.json', exercise_context) write_to_zipfile("exercise.json", exercise_result, zf) for question in ccnode.assessment_items.prefetch_related('files').all().order_by('order'): for image in question.files.filter(preset_id=format_presets.EXERCISE_IMAGE).order_by('checksum'): image_name = "images/{}.{}".format(image.checksum, image.file_format_id) if image_name not in zf.namelist(): with open(ccmodels.generate_file_on_disk_name(image.checksum, str(image)), 'rb') as content: write_to_zipfile(image_name, content.read(), zf) for image in question.files.filter(preset_id=format_presets.EXERCISE_GRAPHIE).order_by('checksum'): svg_name = "images/{0}.svg".format(image.original_filename) json_name = "images/{0}-data.json".format(image.original_filename) if svg_name not in zf.namelist() or json_name not in zf.namelist(): with open(ccmodels.generate_file_on_disk_name(image.checksum, str(image)), 'rb') as content: content = content.read() content = content.split(exercises.GRAPHIE_DELIMITER) write_to_zipfile(svg_name, content[0], zf) write_to_zipfile(json_name, content[1], zf) for item in ccnode.assessment_items.all().order_by('order'): write_assessment_item(item, zf) finally: zf.close()
def api_file_upload(request): """ Upload a file to the storage system """ try: fobj = request.FILES["file"] # Check that hash is valid hash_check = hashlib.md5() for chunk in iter(lambda: fobj.read(4096), b""): hash_check.update(chunk) filename = os.path.splitext(fobj._name)[0] fobj.seek(0) if hash_check.hexdigest() != filename: raise SuspiciousOperation("Failed to upload file {0}: hash is invalid".format(fobj._name)) # Get location of file file_path = generate_file_on_disk_name(filename, fobj._name) # Write file if it doesn't already exist if not os.path.isfile(file_path): with open(file_path, 'wb') as destf: shutil.copyfileobj(fobj, destf) return HttpResponse(json.dumps({ "success": True, })) except KeyError: raise SuspiciousOperation("Invalid file upload request")
def map_files_to_node(node, data): """ Generate files that reference the content node """ for file_data in data: file_hash = file_data['filename'].split(".") # Determine a preset if none is given kind_preset = None if file_data['preset'] is None: kind_preset = FormatPreset.objects.filter(kind=node.kind, allowed_formats__extension__contains=file_hash[1], display=True).first() else: kind_preset = FormatPreset.objects.get(id=file_data['preset']) language = None if file_data.get('language'): language = Language.objects.get(pk=file_data['language']) file_path=generate_file_on_disk_name(file_hash[0], file_data['filename']) if not os.path.isfile(file_path): raise IOError('{} not found'.format(file_path)) file_obj = File( checksum=file_hash[0], contentnode=node, file_format_id=file_hash[1], original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size = file_data['size'], file_on_disk=DjFile(open(file_path, 'rb')), preset=kind_preset, language=language, ) file_obj.save()
def process_image_strings(content, zf): image_list = [] content = content.replace(exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR) for match in re.finditer(ur'!\[(?:[^\]]*)]\(([^\)]+)\)', content): img_match = re.search( ur'(.+/images/[^\s]+)(?:\s=([0-9\.]+)x([0-9\.]+))*', match.group(1)) if img_match: # Add any image files that haven't been written to the zipfile filename = img_match.group(1).split('/')[-1] checksum, ext = os.path.splitext(filename) image_name = "images/{}.{}".format(checksum, ext[1:]) if image_name not in zf.namelist(): with open( ccmodels.generate_file_on_disk_name( checksum, filename), 'rb') as imgfile: write_to_zipfile(image_name, imgfile.read(), zf) # Add resizing data if img_match.group(2) and img_match.group(3): image_data = {'name': img_match.group(1)} image_data.update({'width': float(img_match.group(2))}) image_data.update({'height': float(img_match.group(3))}) image_list.append(image_data) content = content.replace(match.group(1), img_match.group(1))
def map_files_to_node(user, node, data): """ Generate files that reference the content node """ # filter for file data that's not empty; valid_data = (d for d in data if d) for file_data in valid_data: file_name_parts = file_data['filename'].split(".") # Determine a preset if none is given kind_preset = None if file_data['preset'] is None: kind_preset = FormatPreset.objects.filter( kind=node.kind, allowed_formats__extension__contains=file_name_parts[1], display=True).first() else: kind_preset = FormatPreset.objects.get(id=file_data['preset']) file_path = generate_file_on_disk_name(file_name_parts[0], file_data['filename']) if not os.path.isfile(file_path): return IOError('{} not found'.format(file_path)) try: if file_data.get('language'): # TODO: Remove DB call per file? file_data['language'] = Language.objects.get( pk=file_data['language']) except ObjectDoesNotExist as e: invalid_lang = file_data.get('language') logging.warning( "file_data with language {} does not exist.".format( invalid_lang)) return ValidationError( "file_data given was invalid; expected string, got {}".format( invalid_lang)) resource_obj = File( checksum=file_name_parts[0], contentnode=node, file_format_id=file_name_parts[1], original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size=file_data['size'], file_on_disk=DjFile(open(file_path, 'rb')), preset=kind_preset, language_id=file_data.get('language'), uploaded_by=user, ) resource_obj.file_on_disk.name = file_path resource_obj.save()
def convert_channel_thumbnail(thumbnail): """ encode_thumbnail: gets base64 encoding of thumbnail Args: thumbnail (str): file path or url to channel's thumbnail Returns: base64 encoding of thumbnail """ encoding = None if thumbnail is None or thumbnail=='' or 'static' in thumbnail: return "" with open(ccmodels.generate_file_on_disk_name(thumbnail.split('.')[0], thumbnail), 'rb') as file_obj: encoding = base64.b64encode(file_obj.read()).decode('utf-8') return "data:image/png;base64," + encoding
def convert_channel_thumbnail(thumbnail): """ encode_thumbnail: gets base64 encoding of thumbnail Args: thumbnail (str): file path or url to channel's thumbnail Returns: base64 encoding of thumbnail """ encoding = None if thumbnail is None or thumbnail == '' or 'static' in thumbnail: return "" with open( ccmodels.generate_file_on_disk_name( thumbnail.split('.')[0], thumbnail), 'rb') as file_obj: encoding = base64.b64encode(file_obj.read()).decode('utf-8') return "data:image/png;base64," + encoding
def write_raw_content_to_storage(contents, ext=None): # Check that hash is valid checksum = hashlib.md5() checksum.update(contents) filename = checksum.hexdigest() full_filename = "{}.{}".format(filename, ext.lower()) # Get location of file file_path = models.generate_file_on_disk_name(filename, full_filename) # Write file with open(file_path, 'wb') as destf: destf.write(contents) return filename, full_filename, file_path
def compress_video_wrapper(file_object): with tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.MP4)) as tempf: tempf.close() compress_video(str(file_object.file_on_disk), tempf.name, overwrite=True) filename = write_file_to_storage(open(tempf.name, 'rb'), name=tempf.name) checksum, ext = os.path.splitext(filename) file_location = generate_file_on_disk_name(checksum, filename) low_res_object = File( file_on_disk=DjFile(open(file_location, 'rb')), file_format_id=file_formats.MP4, original_filename=file_object.original_filename, contentnode=file_object.contentnode, file_size=os.path.getsize(file_location), preset_id=format_presets.VIDEO_LOW_RES, ) low_res_object.save() return low_res_object
def compress_video_wrapper(file_object): with tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.MP4)) as tempf: tempf.close() compress_video(str(file_object.file_on_disk), tempf.name, overwrite=True) filename = write_file_to_storage(open(tempf.name, 'rb'), name=tempf.name) checksum, ext = os.path.splitext(filename) file_location = generate_file_on_disk_name(checksum, filename) low_res_object = File( file_on_disk=DjFile(open(file_location, 'rb')), file_format_id=file_formats.MP4, original_filename = file_object.original_filename, contentnode=file_object.contentnode, file_size=os.path.getsize(file_location), preset_id=format_presets.VIDEO_LOW_RES, ) low_res_object.save() return low_res_object
def map_files_to_assessment_item(question, data): """ Generate files that reference the content node's assessment items """ for file_data in data: file_hash = file_data['filename'].split(".") kind_preset = FormatPreset.objects.get(id=file_data['preset']) file_obj = File( checksum=file_hash[0], assessment_item=question, file_format_id=file_hash[1], original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size = file_data['size'], file_on_disk=DjFile(open(generate_file_on_disk_name(file_hash[0], file_data['filename']), 'rb')), preset=kind_preset, ) file_obj.save()
def file_diff(request): """ Determine which files don't exist on server """ logging.debug("Entering the file_diff endpoint") data = json.loads(request.body) # Filter by file objects first to save on performance in_db_list = File.objects.annotate(filename=Concat('checksum', Value('.'), 'file_format')).filter(filename__in=data).values_list('filename', flat=True) to_return = [] # Add files that don't exist in storage for f in list(set(data) - set(in_db_list)): file_path = generate_file_on_disk_name(os.path.splitext(f)[0],f) # Write file if it doesn't already exist if not os.path.isfile(file_path): to_return += [f] return HttpResponse(json.dumps(to_return))
def create_files(cursor, contentnode, indent=0): """ create_files: Get license Args: cursor (sqlite3.Connection): connection to export database contentnode (models.ContentNode): node file references indent (int): How far to indent print statements Returns: None """ # Parse database for files referencing content node and make file models sql_command = 'SELECT checksum, extension, file_size, contentnode_id, '\ 'lang_id, preset FROM {table} WHERE contentnode_id=\'{id}\';'\ .format(table=FILE_TABLE, id=contentnode.node_id) # Build up list of files file_list = [] query = cursor.execute(sql_command).fetchall() for checksum, extension, file_size, contentnode_id, lang_id, preset in query: filename = "{}.{}".format(checksum, extension) logging.info("{indent} * FILE {filename}...".format(indent=" |" * indent, filename=filename)) file_path = models.generate_file_on_disk_name(checksum, filename) try: # Save values to new or existing file object with open(file_path, 'rb') as fobj: file_obj = models.File.objects.create( file_on_disk=DJFile(fobj), file_format_id=extension, file_size=file_size, contentnode=contentnode, lang_id=lang_id, preset_id=preset or "", ) # Update file stat global FILE_COUNT FILE_COUNT += 1 except IOError as e: logging.warning("\b FAILED (check logs for more details)") sys.stderr.write( "Restoration Process Error: Failed to save file object {}: {}". format(filename, os.strerror(e.errno))) continue
def file_diff(request): """ Determine which files don't exist on server """ logging.debug("Entering the file_diff endpoint") data = json.loads(request.body) to_return = [] # Might want to use this once assumption that file exists is true (save on performance) # in_db_list = File.objects.annotate(filename=Concat('checksum', Value('.'), 'file_format')).filter(filename__in=data).values_list('filename', flat=True) # for f in list(set(data) - set(in_db_list)): # Add files that don't exist in storage for f in data: file_path = generate_file_on_disk_name(os.path.splitext(f)[0],f) # Add file if it doesn't already exist if not os.path.isfile(file_path) or os.path.getsize(file_path) == 0: to_return.append(f) return HttpResponse(json.dumps(to_return))
def file_diff(request): """ Determine which files don't exist on server """ logging.debug("Entering the file_diff endpoint") data = json.loads(request.body) to_return = [] # Might want to use this once assumption that file exists is true (save on performance) # in_db_list = File.objects.annotate(filename=Concat('checksum', Value('.'), 'file_format')).filter(filename__in=data).values_list('filename', flat=True) # for f in list(set(data) - set(in_db_list)): # Add files that don't exist in storage for f in data: file_path = generate_file_on_disk_name(os.path.splitext(f)[0], f) # Add file if it doesn't already exist if not os.path.isfile(file_path) or os.path.getsize(file_path) == 0: to_return.append(f) return HttpResponse(json.dumps(to_return))
def convert_channel_thumbnail(channel): """ encode_thumbnail: gets base64 encoding of thumbnail Args: thumbnail (str): file path or url to channel's thumbnail Returns: base64 encoding of thumbnail """ encoding = None if not channel.thumbnail or channel.thumbnail=='' or 'static' in channel.thumbnail: return "" if channel.thumbnail_encoding: thumbnail_data = ast.literal_eval(channel.thumbnail_encoding) if thumbnail_data.get("base64"): return thumbnail_data["base64"] with open(ccmodels.generate_file_on_disk_name(channel.thumbnail.split('.')[0], channel.thumbnail), 'rb') as file_obj: encoding = base64.b64encode(file_obj.read()).decode('utf-8') return "data:image/png;base64," + encoding
def map_files_to_assessment_item(question, data): """ Generate files that reference the content node's assessment items """ for file_data in data: file_hash = file_data['filename'].split(".") file_path = generate_file_on_disk_name(file_hash[0], file_data['filename']) if not os.path.isfile(file_path): raise IOError('{} not found'.format(file_path)) file_obj = File( checksum=file_hash[0], assessment_item=question, file_format_id=file_hash[1], original_filename=file_data.get('original_filename') or 'file', source_url=file_data.get('source_url'), file_size = file_data['size'], file_on_disk=DjFile(open(file_path, 'rb')), preset_id=file_data['preset'], ) file_obj.save()
def create_files(cursor, contentnode, indent=0): """ create_files: Get license Args: cursor (sqlite3.Connection): connection to export database contentnode (models.ContentNode): node file references indent (int): How far to indent print statements Returns: None """ # Parse database for files referencing content node and make file models sql_command = 'SELECT checksum, extension, file_size, contentnode_id, '\ 'lang_id, preset FROM {table} WHERE contentnode_id=\'{id}\';'\ .format(table=FILE_TABLE, id=contentnode.node_id) # Build up list of files file_list = [] query = cursor.execute(sql_command).fetchall() for checksum, extension, file_size, contentnode_id, lang_id, preset in query: filename = "{}.{}".format(checksum, extension) logging.info("{indent} * FILE {filename}...".format(indent=" |" * indent, filename=filename)) file_path = models.generate_file_on_disk_name(checksum, filename) try: # Save values to new or existing file object with open(file_path, 'rb') as fobj: file_obj = models.File.objects.create( file_on_disk = DJFile(fobj), file_format_id = extension, file_size = file_size, contentnode = contentnode, lang_id = lang_id, preset_id = preset or "", ) # Update file stat global FILE_COUNT FILE_COUNT += 1 except IOError as e: logging.warning("\b FAILED (check logs for more details)") sys.stderr.write("Restoration Process Error: Failed to save file object {}: {}".format(filename, os.strerror(e.errno))) continue
def encode_base64(value): if value.startswith("data:image"): return value try: checksum, ext = os.path.splitext(value) filepath = generate_file_on_disk_name(checksum, value) buffer = cStringIO.StringIO() with Image.open(filepath) as image: width, height = image.size dimension = min([THUMBNAIL_DIMENSION, width, height]) image.thumbnail((dimension, dimension), Image.ANTIALIAS) image.save(buffer, image.format) return "data:image/{};base64,{}".format(ext[1:], base64.b64encode(buffer.getvalue())) except IOError: filepath = os.path.join(settings.STATIC_ROOT, 'img', 'kolibri_placeholder.png') with open(filepath, 'rb') as image_file: _, ext = os.path.splitext(value) return "data:image/{};base64,{}".format(ext[1:], base64.b64encode(image_file.read()))
def write_file_to_storage(fobj, check_valid = False, name=None): # Check that hash is valid checksum = hashlib.md5() for chunk in iter(lambda: fobj.read(4096), b""): checksum.update(chunk) name = name or fobj._name or "" filename, ext = os.path.splitext(name) hashed_filename = checksum.hexdigest() full_filename = "{}{}".format(hashed_filename, ext.lower()) fobj.seek(0) if check_valid and hashed_filename != filename: raise SuspiciousOperation("Failed to upload file {0}: hash is invalid".format(name)) # Get location of file file_path = models.generate_file_on_disk_name(hashed_filename, full_filename) # Write file with open(file_path, 'wb') as destf: shutil.copyfileobj(fobj, destf) return full_filename
def write_file_to_storage(fobj, check_valid=False, name=None): # Check that hash is valid checksum = hashlib.md5() for chunk in iter(lambda: fobj.read(4096), b""): checksum.update(chunk) name = name or fobj._name or "" filename, ext = os.path.splitext(name) hashed_filename = checksum.hexdigest() full_filename = "{}{}".format(hashed_filename, ext.lower()) fobj.seek(0) if check_valid and hashed_filename != filename: raise SuspiciousOperation("Failed to upload file {0}: hash is invalid".format(name)) # Get location of file file_path = models.generate_file_on_disk_name(hashed_filename, full_filename) # Write file with open(file_path, 'wb') as destf: shutil.copyfileobj(fobj, destf) return full_filename
def generate_thumbnail(channel): THUMBNAIL_DIMENSION = 200 if channel.icon_encoding: return channel.icon_encoding elif channel.thumbnail_encoding: return ast.literal_eval(channel.thumbnail_encoding).get('base64') elif channel.thumbnail: try: checksum, ext = os.path.splitext(channel.thumbnail) filepath = generate_file_on_disk_name(checksum, channel.thumbnail) buffer = StringIO.StringIO() with Image.open(filepath) as image: width, height = image.size dimension = min([THUMBNAIL_DIMENSION, width, height]) image.thumbnail((dimension, dimension), Image.ANTIALIAS) image.save(buffer, image.format) return "data:image/{};base64,{}".format( ext[1:], base64.b64encode(buffer.getvalue())) except IOError: pass
def generate_thumbnail(channel): THUMBNAIL_DIMENSION = 200 if channel.icon_encoding: return channel.icon_encoding elif channel.thumbnail_encoding: return ast.literal_eval(channel.thumbnail_encoding).get('base64') elif channel.thumbnail: try: checksum, ext = os.path.splitext(channel.thumbnail) filepath = generate_file_on_disk_name(checksum, channel.thumbnail) buffer = StringIO.StringIO() with Image.open(filepath) as image: width, height = image.size dimension = min([THUMBNAIL_DIMENSION, width, height]) image.thumbnail((dimension, dimension), Image.ANTIALIAS) image.save(buffer, image.format) return "data:image/{};base64,{}".format(ext[1:], base64.b64encode(buffer.getvalue())) except IOError: client.captureMessage("Failed to generate thumbnail for channel id={}, filepath={}".format( channel.id, filepath)) pass
def get_node_data_from_file(file_name): file_path = generate_file_on_disk_name(file_name.split('.')[0], file_name) if not os.path.isfile(file_path): raise IOError('{} not found.'.format(file_path)) with open(file_path, 'rb') as file_obj: node_data = json.loads(file_obj.read().decode('utf-8')) if node_data is None: raise IOError('{} is empty or could not be read.'.format(file_path)) # Make sure license is valid license = None license_name = node_data['license'] if license_name is not None: try: license = License.objects.get(license_name__iexact=license_name) except ObjectDoesNotExist: raise ObjectDoesNotExist('Invalid license found') node_data['license'] = license return node_data
def get(self, request, zipped_filename, embedded_filepath): """ Handles GET requests and serves a static file from within the zip file. """ assert VALID_STORAGE_FILENAME.match( zipped_filename ), "'{}' is not a valid content storage filename".format( zipped_filename) # calculate the local file path to the zip file filename, ext = os.path.splitext(zipped_filename) zipped_path = generate_file_on_disk_name(filename, zipped_filename) # file size file_size = 0 # if the zipfile does not exist on disk, return a 404 if not os.path.exists(zipped_path): raise Http404('"%(filename)s" does not exist locally' % {'filename': zipped_path}) # if client has a cached version, use that (we can safely assume nothing has changed, due to MD5) if request.META.get('HTTP_IF_MODIFIED_SINCE'): return HttpResponseNotModified() with zipfile.ZipFile(zipped_path) as zf: # if no path, or a directory, is being referenced, look for an index.html file if not embedded_filepath or embedded_filepath.endswith("/"): embedded_filepath += "index.html" # get the details about the embedded file, and ensure it exists try: info = zf.getinfo(embedded_filepath) except KeyError: raise Http404('"{}" does not exist inside "{}"'.format( embedded_filepath, zipped_filename)) # try to guess the MIME type of the embedded file being referenced content_type = mimetypes.guess_type( embedded_filepath)[0] or 'application/octet-stream' if not os.path.splitext(embedded_filepath)[1] == '.json': # generate a streaming response object, pulling data from within the zip file response = FileResponse(zf.open(info), content_type=content_type) file_size = info.file_size else: # load the stream from json file into memory, replace the path_place_holder. content = zf.open(info).read() str_to_be_replaced = ('$' + exercises.IMG_PLACEHOLDER).encode() zipcontent = ('/' + request.resolver_match.url_name + "/" + zipped_filename).encode() content_with_path = content.replace(str_to_be_replaced, zipcontent) response = HttpResponse(content_with_path, content_type=content_type) file_size = len(content_with_path) # set the last-modified header to the date marked on the embedded file if info.date_time: response["Last-Modified"] = http_date( time.mktime(datetime.datetime(*info.date_time).timetuple())) #cache these resources forever; this is safe due to the MD5-naming used on content files response["Expires"] = "Sun, 17-Jan-2038 19:14:07 GMT" # set the content-length header to the size of the embedded file if file_size: response["Content-Length"] = file_size # ensure the browser knows not to try byte-range requests, as we don't support them here response["Accept-Ranges"] = "none" # allow all origins so that content can be read from within zips within sandboxed iframes response["Access-Control-Allow-Origin"] = "*" return response
def get(self, request, zipped_filename, embedded_filepath): """ Handles GET requests and serves a static file from within the zip file. """ assert VALID_STORAGE_FILENAME.match(zipped_filename), "'{}' is not a valid content storage filename".format(zipped_filename) # calculate the local file path to the zip file filename, ext = os.path.splitext(zipped_filename) zipped_path = generate_file_on_disk_name(filename, zipped_filename) # file size file_size = 0 # if the zipfile does not exist on disk, return a 404 if not os.path.exists(zipped_path): raise Http404('"%(filename)s" does not exist locally' % {'filename': zipped_path}) # if client has a cached version, use that (we can safely assume nothing has changed, due to MD5) if request.META.get('HTTP_IF_MODIFIED_SINCE'): return HttpResponseNotModified() with zipfile.ZipFile(zipped_path) as zf: # if no path, or a directory, is being referenced, look for an index.html file if not embedded_filepath or embedded_filepath.endswith("/"): embedded_filepath += "index.html" # get the details about the embedded file, and ensure it exists try: info = zf.getinfo(embedded_filepath) except KeyError: raise Http404('"{}" does not exist inside "{}"'.format(embedded_filepath, zipped_filename)) # try to guess the MIME type of the embedded file being referenced content_type = mimetypes.guess_type(embedded_filepath)[0] or 'application/octet-stream' if not os.path.splitext(embedded_filepath)[1] == '.json': # generate a streaming response object, pulling data from within the zip file response = FileResponse(zf.open(info), content_type=content_type) file_size = info.file_size else: # load the stream from json file into memory, replace the path_place_holder. content = zf.open(info).read() str_to_be_replaced = ('$' + exercises.IMG_PLACEHOLDER).encode() zipcontent = ('/' + request.resolver_match.url_name + "/" + zipped_filename).encode() content_with_path = content.replace(str_to_be_replaced, zipcontent) response = HttpResponse(content_with_path, content_type=content_type) file_size = len(content_with_path) # set the last-modified header to the date marked on the embedded file if info.date_time: response["Last-Modified"] = http_date(time.mktime(datetime.datetime(*info.date_time).timetuple())) #cache these resources forever; this is safe due to the MD5-naming used on content files response["Expires"] = "Sun, 17-Jan-2038 19:14:07 GMT" # set the content-length header to the size of the embedded file if file_size: response["Content-Length"] = file_size # ensure the browser knows not to try byte-range requests, as we don't support them here response["Accept-Ranges"] = "none" # allow all origins so that content can be read from within zips within sandboxed iframes response["Access-Control-Allow-Origin"] = "*" return response