async def get_mimetype(self): """Return the mimetype for the file.""" if self._mimetype: return self._mimetype try: results = puremagic.magic_string(await self.get_file_bytes()) except puremagic.PureError: # If no results return none return "" # If for some reason we get a len 0 list if not results: # pragma: nocover return "" # If we only have one result use it. if len(results) == 1: # pragma: nocover return results[0].mime_type # If we have multiple matches with the same confidence, pick one that # actually has a mime_type. confidence = results[0].confidence results = filter(lambda x: x.confidence == confidence, results) results = list(filter(lambda x: bool(x.mime_type), results)) return results[0].mime_type
def test_magic_string_with_filename_hint(self): """String identification: magic_string with hint|""" filename = os.path.join(OFFICE_DIR, "test.xlsx") with open(filename, "rb") as f: data = f.read() ext = puremagic.magic_string(data, filename=filename) self.assertEqual(".xlsx", ext[0].extension)
def test_magic_string_with_filename_hint(self): """String identification: magic_string with hint|""" filename = os.path.join(OFFICE_DIR, "test.xlsx") with open(filename, "rb") as f: data = f.read() ext = puremagic.magic_string(data, filename=filename) self.assertEqual(".xlsx", ext[0][0])
def mime_type(self): if self._mime_type: return self._mime_type else: if self.auto_detect_mime_type: if self.path and os.path.isfile(self.path) \ and os.access(self.path, os.R_OK): m = find_mime_type_recursive( puremagic.magic_file(self.path)) if m: return m else: logger.warning( "Can't detect mime type of file '%s'. Using default mime type: %s" % (self.path, self.default_mime_type)) return self.default_mime_type elif not self.path and self.body: try: m = find_mime_type_recursive( puremagic.magic_string(self.body)) except PureError: m = None if m: return m else: logger.warning( "Can't detect mime type of body. Using default mime type: %s" % self.default_mime_type) return self.default_mime_type else: logger.warning( "File '%s' isn't readable. Skipping mime type auto detection, using default mime type: %s" % (self.path, self.default_mime_type)) return self.default_mime_type else: return self.default_mime_type
def test_string_with_confidence(self): """String identification: magic_string |""" ext = puremagic.magic_string(bytes(self.mp4magic)) self.assertEqual(self.expect_ext, ext[0][0]) self.assertRaises(ValueError, puremagic.magic_string, "")
def test_string_with_confidence(self): """String identification: magic_string |""" ext = puremagic.magic_string(bytes(self.mp4magic)) self.assertEqual(self.expect_ext, ext[0].extension) self.assertRaises(ValueError, puremagic.magic_string, "")
def test_string_with_confidence(self): """String identification: magic_string |""" ext = puremagic.magic_string(bytes(self.mp4magic)) self.assertEqual(self.expect_ext, ext[1][0][0])
def fetch_remote_file_to_storage(remote_url, upload_to='', allowed_mime_types=()): """ Fetches a remote url, and stores it in DefaultStorage :return: (status_code, new_storage_name) """ SVG_MIME_TYPE = 'image/svg+xml' if not allowed_mime_types: raise SuspiciousFileOperation("allowed mime types must be passed in") magic_strings = None content = None status_code = None if _is_data_uri(remote_url): # data:[<MIME-type>][;charset=<encoding>][;base64],<data> # finds the end of the substring 'base64' adds one more to get the comma as well. base64_image_from_data_uri = remote_url[ (re.search('base64', remote_url).end()) + 1:] content = decoded_test = base64.b64decode(base64_image_from_data_uri) magic_strings = puremagic.magic_string(decoded_test) status_code = 200 store = DefaultStorage() if magic_strings is None: r = requests.get(remote_url, stream=True) if r.status_code == 200: magic_strings = puremagic.magic_string(r.content) content = r.content status_code = r.status_code if magic_strings and content: derived_mime_type = None derived_ext = None stripped_svg_string = None for magic_string in magic_strings: if getattr(magic_string, 'mime_type', None) in allowed_mime_types: derived_mime_type = getattr(magic_string, 'mime_type', None) derived_ext = getattr(magic_string, 'extension', None) break if not derived_mime_type and re.search( b'<svg', content[:1024]) and content.strip()[-6:] == b'</svg>': derived_mime_type = SVG_MIME_TYPE derived_ext = '.svg' if derived_mime_type == SVG_MIME_TYPE: stripped_svg_element = ET.fromstring(content) scrubSvgElementTree(stripped_svg_element) stripped_svg_string = ET.tostring(stripped_svg_element) if derived_mime_type not in allowed_mime_types: raise SuspiciousFileOperation( "{} is not an allowed mime type for upload".format( derived_mime_type)) if not derived_ext: raise SuspiciousFileOperation( "could not determine a file extension") string_to_write_to_file = stripped_svg_string or content storage_name = '{upload_to}/cached/{filename}{ext}'.format( upload_to=upload_to, filename=hashlib.sha256(string_to_write_to_file).hexdigest(), ext=derived_ext) if not store.exists(storage_name): buf = io.BytesIO(string_to_write_to_file) store.save(storage_name, buf) return status_code, storage_name return status_code, None