def __call__(self, file): __, ext = os.path.splitext(file.name) detected_content_type = detect_content_type(file) logger.debug('detected content type: %s | file: %s', detected_content_type, file) if getattr(file, 'content_type', None) is not None: file_content_type = file.content_type logger.debug('file content type: %s | file: %s', file_content_type, file) exts_for_detected_content_type = mimetypes.guess_all_extensions(detected_content_type) exts_for_file_content_type = mimetypes.guess_all_extensions(file_content_type) logger.debug( 'file content type: %s | exts for detected content type: %s | ' 'exts_for_file_content_type: %s | file: %s', file_content_type, exts_for_detected_content_type, exts_for_file_content_type, file ) is_valid_content_type = bool( ( ext in exts_for_detected_content_type and ext in exts_for_file_content_type ) or ( detected_content_type == 'application/CDFV2-unknown' and file_content_type == mimetypes.guess_type('.doc') and ext == "doc" ) ) params = { 'extension': ext, 'content_type': file_content_type, 'detected_content_type': detected_content_type } else: is_valid_content_type = bool( ( ext in mimetypes.guess_all_extensions(detected_content_type) ) or ( detected_content_type == 'application/CDFV2-unknown' and ext == "doc" ) ) params = { 'extension': ext, 'content_type': None, 'detected_content_type': detected_content_type } if not is_valid_content_type: raise ValidationError( self.message, code=self.code, params=params )
async def _download_helper(path, url, session): """Downloads media from the given URL. Returns the file path to the downloaded item. `path` (str) - path with filename of location to download, no extension\n `url` (str) - url to the item to be downloaded\n `session` (aiohttp ClientSession) """ try: async with session.get(url) as response: # from https://stackoverflow.com/questions/29674905/convert-content-type-header-into-file-extension content_type = response.headers['content-type'].partition( ';')[0].strip() if content_type.partition("/")[0] == "image": try: ext = "." + \ (set(ext[1:] for ext in guess_all_extensions( content_type)).intersection(valid_image_extensions)).pop() except KeyError: raise GenericError( f"No valid extensions found. Extensions: {guess_all_extensions(content_type)}" ) elif content_type.partition("/")[0] == "audio": try: ext = "." + (set( ext[1:] for ext in guess_all_extensions(content_type)). intersection(valid_audio_extensions)).pop() except KeyError: raise GenericError( f"No valid extensions found. Extensions: {guess_all_extensions(content_type)}" ) else: ext = guess_extension(content_type) if ext is None: raise GenericError(f"No extensions found.") filename = f"{path}{ext}" # from https://stackoverflow.com/questions/38358521/alternative-of-urllib-urlretrieve-in-python-3-5 with open(filename, 'wb') as out_file: block_size = 1024 * 8 while True: block = await response.content.read(block_size) # pylint: disable=no-member if not block: break out_file.write(block) return filename except aiohttp.ClientError as e: logger.info(f"Client Error with url {url} and path {path}") capture_exception(e) raise
def get_file_extension(file_content): try: mimetypes.add_type('text/csv', '.csv', strict=True) mimetypes.add_type('image', '.*', strict=True) mtype = mimetypes.guess_type(file_content.name) if os.path.splitext( file_content.name)[1] in mimetypes.guess_all_extensions( mtype[0]): file_extension = os.path.splitext(file_content.name)[1] else: file_extension = mimetypes.guess_all_extensions(mtype[0])[-1] except Exception as e: raise Exception({'error': 'Unable to fetch file extension'}) return file_extension
def __init__(self, fname=None): super(LogProvider, self) self.minimum_occurrences = 250 self.percentage = 10 self.logs = list() self.filters = collections.defaultdict(list) self.negative_filters = collections.defaultdict(list) self.filters_regexp = collections.defaultdict(list) self.negative_filters_regexp = collections.defaultdict(list) self.fname = fname try: ftype = mimetypes.guess_all_extensions(self.fname)[0] except AttributeError: # `self.fname` is None self.__transform_logs(fileinput.input("-")) except IndexError: # `self.fname` has no guessable mimtype self.__transform_logs(fileinput.input(self.fname)) else: if ftype == 'application/zip': # zip file! with zipfile.ZipFile(self.fname) as f: for name in f.namelist(): self.__transform_logs(f.read(name)) elif ftype == 'application/tar': # tar file! with tarfile.open(self.fname) as f: for name in f.namelist(): self.__transform_logs(f.read(name))
def get_filedialog_kwargs(): """This is a way to run tkinter dialogs that display the filetypes and ext\ ensions that Porcupine supports. This function returns a dictionary of keyword arguments suitable for functions in ``tkinter.filedialog``. Example:: from tkinter import filedialog from porcupine.filetypes import get_filedialog_kwargs filenames = filedialog.askopenfilenames(**get_filedialog_kwargs()) for filename in filenames: print("Opening", filename) You can use this function with other ``tkinter.filedialog`` functions as well. """ result = [("All files", "*")] for filetype in get_all_filetypes(): patterns = list(filetype.filename_patterns) if filetype.name not in {'Plain Text', 'Porcupine filetypes.ini'}: for mimetype in filetype.mimetypes: patterns.extend(mimetypes.guess_all_extensions(mimetype)) result.append((filetype.name, tuple(patterns))) widget = porcupine.get_main_window() # any widget would do if len(result) == 1 and widget.tk.call('tk', 'windowingsystem') == 'aqua': # there's a bug that makes python crash with this list on osx, and osx # creates a huge error message that complains about an empty parameter # list... so it seems like osx ignores ("All files", "*") and disallows # empty filetypes lists return {} return {'filetypes': result}
def clean_url(self): url = self.cleaned_data.get('url') if not url: return '' filename, headers = urllib.urlretrieve(url) if not mimetypes.guess_all_extensions(headers.get('Content-Type')): raise forms.ValidationError(_('The file type is invalid: %s' % type)) return SimpleUploadedFile(filename, open(filename, 'rb').read(), content_type=headers.get('Content-Type'))
def uploads_endpoint(request): """ Endpoint for file uploads """ username = request.matchdict["username"] requested_user = LocalUser.query.filter( LocalUser.username == username).first() if requested_user is None: return json_error("No such 'user' with id '{0}'".format(username), 404) if request.method == "POST": # Ensure that the user is only able to upload to their own # upload endpoint. if requested_user.id != request.user.id: return json_error("Not able to post to another users feed.", status=403) # Wrap the data in the werkzeug file wrapper if "Content-Type" not in request.headers: return json_error( "Must supply 'Content-Type' header to upload media.") mimetype = request.headers["Content-Type"] filename = mimetypes.guess_all_extensions(mimetype) filename = 'unknown' + filename[0] if filename else filename file_data = FileStorage(stream=io.BytesIO(request.data), filename=filename, content_type=mimetype) # Find media manager entry = new_upload_entry(request.user) entry.media_type = IMAGE_MEDIA_TYPE return api_upload_request(request, file_data, entry) return json_error("Not yet implemented", 501)
def guess_all_extensions(self, content_type): l = mimetypes.guess_all_extensions(content_type) try: l.append(extensions[content_type]) except: pass return l
def __guess_extension(mime, forward, filename): forward_ext = '.' + forward extensions = mimetypes.guess_all_extensions(type=mime) o_ext = os.path.splitext(filename)[-1] guess_extensions = difflib.get_close_matches(word=o_ext, possibilities=extensions, n=1) if len(guess_extensions) > 0: return guess_extensions[0] guess_extension = None guess_extensions = difflib.get_close_matches(word=forward, possibilities=extensions, n=1) if len(guess_extensions) > 0: return guess_extensions[0] length = 0 for ext in extensions: if forward == ext or forward == forward_ext: return ext this_length = len(ext) if this_length > length: length = this_length guess_extension = ext return guess_extension
def do_POST(self): # retrieve post data form = cgi.FieldStorage(fp=self.rfile, headers=self.headers, environ={'REQUEST_METHOD': 'POST'}) filesize = len(form['fileToUpload'].value) if filesize > 0: ofile = myfile() # create encode file amime = mimetypes.guess_all_extensions( form['fileToUpload'].headers['Content-Type']) ofile.set_mime(amime[0]) data = ofile.encrypt(form['passphrase'].value, form['fileToUpload'].value) fname = ofile.get_file_name() ofile.write_("ab", fname, data) # create encode filemeta mf = metafile() if 'burnafterreading' in form: mf.set_burnafterreading(1) if 'expiration' in form: mf.set_expiration(form['expiration'].value) ofile.write_("a", fname + ".meta", mf.get_json_metafile()) # send response data = {} data['url'] = self.make_url(ofile.get_file_name(), ofile.get_file_key()) self.send_html(self.content(data)) else: data = {} data['error_msg'] = "File empty !!" content = self.load_assets("error.html") content = content.replace('<%error_msg%>', data["error_msg"]) self.send_html(content)
def _save_audio(response, data: StepData, config: dict): post_generate = config.get("post_generate", {}) extension = data.format(post_generate["file_extension"]) if post_generate.get("file_extension", None) is not None else None # if multiple requests were used, get only the request with the audio file if config["generate"]["type"].startswith("request_multiple"): audio_idx = data.format(config["generate"]["audio_idx"]) response = response[audio_idx] content_type = response["headers"]["content-type"] audio = response["content"] # if content type is JSON, try to decode JSON string with base64 if content_type.startswith("application/json"): # get audio string audio = data_get_pattern(data.format(post_generate["audio_key"]), audio) # decode Audio Key with base64 audio = base64.b64decode(audio) elif extension is None: # check if content type is an audio type if not content_type.startswith("audio"): raise InvalidContentTypeError(None, content_type, "'audio/*'") # get file extention from mime type: extension = mimetypes.guess_all_extensions(content_type)[0].replace(".", "") audio_path = resources.new_temp_resource_path(data.data["_pipe_id"], extension) with open(audio_path, "wb") as fp: fp.write(audio) return audio_path
def url2path(self): """ computes a unique filename based on the url and the consumed tasks list It automatically adds the file extension based on mimetype. """ lock_id = get_lock_id(url=self.url, pipeline=self.consumed) fn = md5(lock_id).hexdigest() # no longer used local_path = self.url if len(self.consumed) > 0: local_path += u"..%s" % '..'.join(self.consumed) # For the final filename, we want the requested extension, # so that it gets saved on a predictable location if len(self.to_go) == 0: if len(self.consumed) == 0: pass elif self.target_ext: ext = self.target_ext # Yet we do check if the mimetype of the produced result fits with the # requested extension: if ext.lower() not in mimetypes.guess_all_extensions(self.mime, strict=False): raise TypeError local_path += ext else: ext = mimetypes.guess_extension(self.mime, strict=False) local_path += ext print "%d steps to go in pipeline" % len(self.to_go) local_folder, local_filename = os.path.split(local_path) if not os.path.exists(os.path.join(CACHE_PATH,local_folder)): os.makedirs(os.path.join(CACHE_PATH,local_folder)) return os.path.join(CACHE_PATH, local_path)
def guess_extention(bin_data): """Guess the extention of the passed in binary data. Will default to '.dat' if it can't work it out and will pick more common extentions when they are availabe. :param bin_data: The binary data as if you had read it in from a file. eg open("example.unknown").read() :returns: The guessed extention of the file includes a '.' """ mime_type = magic.from_buffer(bin_data, mime=True) current_app.logger.debug('Guessed Mime is {}'.format(mime_type)) ext_list = mimetypes.guess_all_extensions(mime_type) current_app.logger.debug('Possible extentions are {}'.format(ext_list)) if len(ext_list) == 0: # Default to '.dat' if we don't get anything current_app.logger.debug('No extentions, returning .dat') return '.dat' # Pick the more common extentions when they are available PREFERED_EXTS = ['.txt'] for prefered_ext in PREFERED_EXTS: if prefered_ext in ext_list: current_app.logger.debug('Using prefered extentions ' '{}'.format(prefered_ext)) return prefered_ext # If not use the first in the list current_app.logger.debug('Returning {}'.format(ext_list[0])) return ext_list[0]
def save(self, force_insert=False, force_update=False, using=None): if not self.image and self.image_url: filename, headers = urllib.urlretrieve( self.image_url.encode('utf-8')) type = headers.get('Content-Type') if not type or not mimetypes.guess_all_extensions(type): raise ValidationError('Broken image') self.image = SimpleUploadedFile(filename, open(filename).read(), content_type=type) self.image_url = None super(Demot, self).save(force_insert, force_update, using) # generate demot if self.image and not self.final_image: demotivator = Demotivator(self.image, self.line1, self.line2, get_address()) demotivator.create(self.demot_path['path']) # generate default thumbnail self.get_thumbnail() self.final_image = self.demot_path['url'] self.save() # create/update activity self.save_activity()
def save( self, force_insert=False, force_update=False, using=None ): if not self.image and self.image_url: filename, headers = urllib.urlretrieve( self.image_url.encode( 'utf-8' ) ) type = headers.get( 'Content-Type' ) if not type or not mimetypes.guess_all_extensions( type ): raise ValidationError( 'Broken image' ) self.image = SimpleUploadedFile( filename, open( filename ).read(), content_type = type ) self.image_url = None super( Demot, self ).save( force_insert, force_update, using ) # generate demot if self.image and not self.final_image: demotivator = Demotivator( self.image, self.line1, self.line2, get_address() ) demotivator.create( self.demot_path['path'] ) # generate default thumbnail self.get_thumbnail() self.final_image = self.demot_path['url'] self.save() # create/update activity self.save_activity()
def update_extension(self): extensions = mimetypes.guess_all_extensions(self.MIMETYPE, strict=True) for extension in extensions: if extension in self.ALLOWED_EXTENSIONS: return extension raise ValueError('Undefined extension')
def get_proper_ext(expected_ext, full_path): return_ext = expected_ext # we use magic library to really use the binary information from the file # to guess at the mime type mime_type = magic.from_file(full_path, mime=True) if mime_type: # If we can find a mime type then we find out valid # extensions and fix them. For most things this will be a no op # # DNGs are a special case. They technically are tiff files but they should # have a mime of 'image/x-adobe-dng' but magic doesn't produce that. if mime_type == 'image/tiff' and expected_ext == '.dng': pass # AAEs are also a special case. They technically are XML files but we # want to maintain the aae extension elif mime_type == 'text/xml' and expected_ext == '.aae': pass else: valid_ext_list = guess_all_extensions(mime_type) if not expected_ext in valid_ext_list and len(valid_ext_list) > 0: # Try to pick the extension we expect familiar_exts = extensions.intersection(set(valid_ext_list)) if familiar_exts: return_ext = familiar_exts.pop() else: # Otherwise just pick a random extesion return_ext = valid_ext_list[0].lower() return return_ext
def extract_response_meta(response, guess_encoding=True, guess_extension=True): meta = {} # Guessing mime type mimetype, _ = mimetypes.guess_type(response.geturl()) if mimetype is None: mimetype = 'text/html' # Guessing extension # TODO: maybe move to utils if guess_extension: exts = mimetypes.guess_all_extensions(mimetype) if not exts: ext = '.html' elif '.html' in exts: ext = '.html' else: ext = max(exts, key=len) meta['mime'] = mimetype meta['ext'] = ext # Guessing encoding if guess_encoding: meta['encoding'] = guess_response_encoding(response, is_xml=True, use_chardet=True) return meta
def post(req): mime = req.headers['Content-type'] candidateext = guess_all_extensions(mime) ext = candidateext[0] if len(candidateext) > 0 else "" filename = 'file' + ext fileobj = BytesIO(req.data) return postImgRespondUrl(filename, fileobj)
def format_matches_extension(self, distribution, attribute): """Chequea si una extensión podría corresponder a un formato dado.""" if attribute in distribution and "format" in distribution: if "/" in distribution['format']: possible_format_extensions = mimetypes.guess_all_extensions( distribution['format']) else: possible_format_extensions = [ '.' + distribution['format'].lower() ] file_name = urlparse(distribution[attribute]).path extension = os.path.splitext(file_name)[-1].lower() if attribute == 'downloadURL' and not extension: return True # hay extensiones exceptuadas porque enmascaran otros formatos if extension.lower().replace(".", "") in EXTENSIONS_EXCEPTIONS: return True if extension not in possible_format_extensions: return False return True
def generate_filename(self, instance, filename): if not self.random_filename: return super(WebDAVMixin, self).generate_filename(instance, filename) uuid_string = unicode(uuid.uuid4()) file = getattr(instance, self.attname) if hasattr(file._file, 'content_type') and file._file.content_type in self.valid_content_types: content_type = file._file.content_type else: try: file._file.seek(0) if self.custom_magic_file: content_type = magic.Magic(mime=True, magic_file=self.custom_magic_file).from_buffer(file._file.read(1024)) else: content_type = magic.from_buffer(file._file.read(1024), mime=True) except TypeError as e: content_type = 'application/x-unknown' #Receiving all extensions and checking if file extension matches MIME Type extensions = mimetypes.guess_all_extensions(content_type) try: file_ext = re.findall(r'\.[^.]+$', filename)[0] except IndexError: file_ext = None if file_ext in extensions: ext = file_ext elif extensions: ext = extensions[0] else: ext = '.bin' return os.path.join(self.upload_to, uuid_string[:2], uuid_string[2:4], '%s%s' % (uuid_string, ext))
def extract_response_meta(response, guess_encoding=True, guess_extension=True): meta = {} # Guessing extension if guess_extension: # Guessing mime type mimetype, _ = mimetypes.guess_type(response.geturl()) if mimetype is None: mimetype = 'text/html' if 'Content-Type' in response.headers: mimetype = response.headers['Content-Type'] exts = mimetypes.guess_all_extensions(mimetype) if not exts: ext = '.html' elif '.html' in exts: ext = '.html' else: ext = max(exts, key=len) meta['mime'] = mimetype meta['ext'] = ext # Guessing encoding if guess_encoding: meta['encoding'] = guess_response_encoding(response, is_xml=True, use_chardet=True) return meta
def __call__(self, file): __, ext = os.path.splitext(file.name) detected_content_type = detect_content_type(file) is_valid_content_type = bool( ( ext in mimetypes.guess_all_extensions(file.content_type) ) or ( detected_content_type == 'application/CDFV2-unknown' and file.content_type == mimetypes.guess_type('.doc') ) or ( detected_content_type == file.content_type ) ) if not is_valid_content_type: raise ValidationError( self.message, code=self.code, params={ 'extension': ext, 'content_type': file.content_type, 'detected_content_type': detected_content_type } )
def test_as_message_attachments(self): msg_in = Message() msg_in["From"] = "*****@*****.**" msg_in["Message-ID"] = "<msg>" msg_in.attach(MIMEText("Dummy message")) msg_in.attach(MIMEText("<html><body>Dummy message</body></html>", _subtype="html")) add_to_list("*****@*****.**", msg_in) email = Email.objects.get(message_id="msg") msg = email.as_message() self.assertEqual(msg["From"], "dummy at example.com") self.assertEqual(msg["Message-ID"], "<msg>") self.assertTrue(msg.is_multipart()) payload = msg.get_payload() self.assertEqual(len(payload), 2) self.assertEqual( payload[0].get_payload(decode=True).strip(), "Dummy message") # The filename extension detection from content type is a bit random # (depends on the PYTHON_HASHSEED), make sure we get the right one # here for testing. expected_ext = guess_all_extensions("text/html", strict=False)[0] self.assertEqual(payload[1].get_content_type(), "text/html") self.assertEqual(payload[1]["Content-Disposition"], 'attachment; filename="attachment%s"' % expected_ext) self.assertEqual( payload[1].get_payload(decode=True), "<html><body>Dummy message</body></html>")
async def _download_helper(path, url, session): async with session.get(url) as response: # from https://stackoverflow.com/q/29674905/7941251 content_type = response.headers["content-type"].partition( ";")[0].strip() if content_type.partition("/")[0] == "image": try: ext = "." + (set(ext[1:] for ext in guess_all_extensions( content_type)).intersection(VALID_IMAGE_EXTENSIONS)).pop() except KeyError: raise InvalidExtensionError( f"No valid extensions found. Extensions: {guess_all_extensions(content_type)}" ) else: raise InvalidExtensionError("No extensions found.") filename = f"{path}{ext}" # from https://stackoverflow.com/q/38358521/7941251 async with aiofiles.open(filename, "wb") as out_file: block_size = 1024 * 8 while True: block = await response.content.read(block_size) # pylint: disable=no-member if not block: break await out_file.write(block) return filename
def getAllowedConversionFormatList(source_mimetype): """Returns a list content_type and their titles which are supported by enabled handlers. [('application/vnd.oasis.opendocument.text', 'ODF Text Document'), ('application/pdf', 'PDF - Portable Document Format'), ... ] """ # XXX please never guess extension from mimetype output_set = set() if "/" in source_mimetype: parsed_mimetype_type = parseContentType(source_mimetype).gettype() # here `guess_all_extensions` never handles mimetype parameters # (even for `text/plain;charset=UTF-8` which is standard) extension_list = mimetypes.guess_all_extensions(parsed_mimetype_type) # XXX never guess else: extension_list = [source_mimetype] for ext in extension_list: for ext, title in mimemapper.getAllowedExtensionList(extension=ext.replace(".", "")): if ext in ("fodt", ".fodt"): # BBB output_set.add(("application/vnd.oasis.opendocument.text-flat-xml", title)) continue if ext: mimetype, _ = mimetypes.guess_type("a." + ext) # XXX never guess if mimetype: output_set.add((mimetype, title)) return list(output_set)
def uploads_endpoint(request): """ Endpoint for file uploads """ username = request.matchdict["username"] requested_user = User.query.filter_by(username=username).first() if requested_user is None: return json_error("No such 'user' with id '{0}'".format(username), 404) if request.method == "POST": # Ensure that the user is only able to upload to their own # upload endpoint. if requested_user.id != request.user.id: return json_error("Not able to post to another users feed.", status=403) # Wrap the data in the werkzeug file wrapper if "Content-Type" not in request.headers: return json_error("Must supply 'Content-Type' header to upload media.") mimetype = request.headers["Content-Type"] filename = mimetypes.guess_all_extensions(mimetype) filename = "unknown" + filename[0] if filename else filename file_data = FileStorage(stream=io.BytesIO(request.data), filename=filename, content_type=mimetype) # Find media manager entry = new_upload_entry(request.user) entry.media_type = IMAGE_MEDIA_TYPE return api_upload_request(request, file_data, entry) return json_error("Not yet implemented", 501)
def valid_mime_extensions(request): """ Returns the valid mime-types as well as the file extension for each. """ if 'lokp.file_mime_extensions' not in request.registry.settings: # file extensions can be added to registry by changing development.ini return {} vfme = {} fme = request.registry.settings['lokp.file_mime_extensions'] for row in aslist(fme, flatten=False): mime, extension = row.split(' ') # Make sure that the mime type defined in the ini is valid. try: mimetypes.types_map[extension] except KeyError: continue # Make sure that the extension defined in the ini is valid for its # mime type if extension not in mimetypes.guess_all_extensions(mime): continue vfme[mime] = extension # Add special types by Internet Explorer # http://msdn.microsoft.com/en-us/library/ms775147%28v=vs.85%29. # aspx#_replace if 'image/jpeg' in vfme.keys(): vfme['image/pjpeg'] = '.jpg' if 'image/png' in vfme.keys(): vfme['image/x-png'] = '.png' return vfme
def __init__(self, file_obj, orig_filename): ''' Init file object, set the mimetype ''' super(File, self).__init__(file_obj, orig_filename) self.is_recursive = False if not self.has_mimetype(): # No mimetype, should not happen. self.make_dangerous() if not self.has_extension(): self.make_dangerous() if self.extension in mal_ext: self.log_details.update({'malicious_extension': self.extension}) self.make_dangerous() if self.is_dangerous(): return self.log_details.update({ 'maintype': self.main_type, 'subtype': self.sub_type, 'extension': self.extension }) # Check correlation known extension => actual mime type if propertype.get(self.extension) is not None: expected_mimetype = propertype.get(self.extension) else: # mimetypes.guess_type is *SUPER* basic and kindof unreliable (.eml.xz => mail) expected_mimetype = mimetypes.types_map.get(self.extension) if aliases.get(expected_mimetype) is not None: expected_mimetype = aliases.get(expected_mimetype) is_known_extension = self.extension in mimetypes.types_map.keys() if is_known_extension and expected_mimetype != self.mimetype: self.log_details.update({'expected_mimetype': expected_mimetype}) self.make_dangerous() # check correlation actual mime type => known extensions if aliases.get(self.mimetype) is not None: mimetype = aliases.get(self.mimetype) else: mimetype = self.mimetype expected_extensions = set( mimetypes.guess_all_extensions(mimetype, strict=False)) if expected_extensions: extra_ext = [ aliases_ext.get(ext) for ext in expected_extensions if aliases_ext.get(ext, None) ] expected_extensions.update(extra_ext) if len(self.extension ) > 0 and self.extension not in expected_extensions: self.log_details.update( {'expected_extensions': list(expected_extensions)}) # self.make_dangerous() else: # there are no known extensions associated to this mimetype. pass
def validate_attachment(self, upload): if not self.allowed_file_extensions: return '' # Checking if file extension is within allowed extension list allowed_exts = self.allowed_file_extensions.split() allowed_exts = [x if x.startswith('.') else '.{}'.format(x) for x in allowed_exts] filename, ext = os.path.splitext(upload.file_name) if ext not in allowed_exts: error_msg = "{} - Error: Unsupported file format. Supported file formats are: {}".format( upload.file_name, ', '.join(allowed_exts)) return error_msg # Checking whether file contents comply with the allowed file extensions (if the file has data). # This ensures that file types not allowed are rejected even if they are renamed. if upload.file_size != 0: file_mime = magic.from_file(upload.file_path, mime=True) if set(mimetypes.guess_all_extensions(file_mime)).isdisjoint(set(allowed_exts)): # In case our check for extensions didn't pass we check if the file type (not mimetype) # is white-listed. If so, we can allow the file to be uploaded. allowed_types = self.allowed_file_types.split('\n') file_type = magic.from_file(upload.file_path, mime=False) if file_type not in allowed_types: error_msg = "{} - Error: The extension for this file is valid, but the content is not. Please verify the file content has been updated and save it again before attempting upload.".format( upload.file_name) return error_msg return ''
def getAllowedExtensionList(self, request_dict={}): """List types which can be generated from given type Type can be given as: - filename extension - document type ('text', 'spreadsheet', 'presentation' or 'drawing') e.g self.getAllowedMimetypeList(dict(document_type="text")) return extension_list """ mimetype = request_dict.get('mimetype') extension = request_dict.get('extension') document_type = request_dict.get('document_type') if mimetype: allowed_extension_list = [] for ext in guess_all_extensions(mimetype): ext = ext.replace('.', '') extension_list = mimemapper.getAllowedExtensionList(extension=ext, document_type=document_type) for extension in extension_list: if extension not in allowed_extension_list: allowed_extension_list.append(extension) return allowed_extension_list elif extension: extension = extension.replace('.', '') return mimemapper.getAllowedExtensionList(extension=extension, document_type=document_type) elif document_type: return mimemapper.getAllowedExtensionList(document_type=document_type) else: return [('', '')]
def _setupFileTypes(defines): if "mimetypes" in defines: defines["file_types"] = set(defines.get("file_types", set())) mimetypes.init() for t in defines["mimetypes"]: types = set(mimetypes.guess_all_extensions(t)) if not types: raise Exception(f"Unsupported mime type {t}") defines["file_types"].update(types) # remove reserved associations (packaging woud fail) defines["file_types"] -= {".bat", ".com", ".exe"} del defines["mimetypes"] if "file_types" in defines: CraftCore.log.info( "The package will support the following file types:") CraftCore.log.info(defines["file_types"]) defines["file_types"] = "\n".join([ f"""<uap:FileType>{t}</uap:FileType>""" for t in set(defines["file_types"]) ]) defines.setdefault("extensions", AppxPackager.Extensions) else: defines.setdefault("file_types", "") defines.setdefault("extensions", "")
def get_type_extension(content_type): filetypes = set(mimetypes.guess_all_extensions(content_type)) - AUTOCALCULATED_FILENAME_EXTENSION_BLACKLIST if len(filetypes) > 0: return sorted(list(filetypes))[0] else: return None
def mirror_entity_image(self, tweet, entity_index, url): response = requests.get(url) if response.status_code != httplib.OK: log.warn("Failed to download image {0}", url) return content_type = response.headers.get('content-type') parsed_url = urlparse.urlparse(url) (_base, extension) = os.path.splitext(parsed_url.path) extension = None if not extension: extensions = [ext for ext in mimetypes.guess_all_extensions(content_type) if ext != '.jpe'] extension = extensions[0] if extensions else '' log.debug("Possible mime types: {0}, chose {1}", extensions, extension) filename = "{tweet}-{index}{extension}".format(tweet=tweet.get('id'), index=entity_index, extension=extension) with NamedTemporaryFile(mode='wb', prefix='twoops', delete=True) as fil: fil.write(response.content) fil.flush() new_url = self.upload_image(fil.name, filename, content_type) if new_url: self.record_tweet_image(tweet, new_url)
def get_extensions(self, strict_flag=False, all_ext_flag=False): """ Function to get the extension(s) of the requested url. Args: strict_flag (bool): The strict flag specifies the list of known MIME types registered with IANA. If False is given some additional non-standard commonly used MIME types are also considered. all_ext_flag (bool): Flag to ensure to get all the extensions for the requested url path """ extension_list = list() try: # Getting the content-type from the response object content_type = self.__response.headers.get('Content-Type', None) if content_type is not None: if all_ext_flag: extension_list = mimetypes.guess_all_extensions( content_type, strict_flag) else: extension_list = mimetypes.guess_extension( content_type, strict_flag).split() except Exception as e: print("Error: In getting the extension(s) of url path: ", self.__url_file_path, "\nException: ", e) return extension_list
def extension_correct_for_mimetype(extension, mimetype): """ Check if the given filename extension (e.g. ".ogg") is a possible extension for a given mimetype (e.g. "application/ogg") and return a boolean value (True if it's possible, False if not). Also do >>> extension_correct_for_mimetype('.ogg', 'application/ogg') True >>> extension_correct_for_mimetype('.ogv', 'video/ogg') True >>> extension_correct_for_mimetype('.ogg', 'audio/mpeg') False >>> extension_correct_for_mimetype('mp3', 'audio/mpeg') Traceback (most recent call last): ... ValueError: "mp3" is not an extension (missing .) >>> extension_correct_for_mimetype('.mp3', 'audio mpeg') Traceback (most recent call last): ... ValueError: "audio mpeg" is not a mimetype (missing /) """ if not '/' in mimetype: raise ValueError('"%s" is not a mimetype (missing /)' % mimetype) if not extension.startswith('.'): raise ValueError('"%s" is not an extension (missing .)' % extension) # Create a "default" extension from the mimetype, e.g. "application/ogg" # becomes ".ogg", "audio/mpeg" becomes ".mpeg", etc... default = ['.'+mimetype.split('/')[-1]] return extension in default+mimetypes.guess_all_extensions(mimetype)
def valid_mime_extensions(request): """ Returns the valid mime-types as well as the file extension for each. """ if 'lmkp.file_mime_extensions' in request.registry.settings: fme = request.registry.settings['lmkp.file_mime_extensions'] # Create a new dict which contains only the entries recognized as valid # mime types by python's own mimetypes module. vfme = {} for mt in fme: # Make sure that the mime type defined in the ini is valid. try: mimetypes.types_map[fme[mt]] except KeyError: continue # Make sure that the extension defined in the ini is valid for its # mime type if fme[mt] not in mimetypes.guess_all_extensions(mt): continue # Copy it vfme[mt] = fme[mt] # Add special types by Internet Explorer # http://msdn.microsoft.com/en-us/library/ms775147%28v=vs.85%29.aspx#_replace if 'image/jpeg' in vfme: vfme['image/pjpeg'] = '.jpg' if 'image/png' in vfme: vfme['image/x-png'] = '.png' return vfme return {}
def fetcher(q): mime_type_ranks = ['.jpg', '.png', '.tif', '.bmp'] while 1: task = q.get() if task is None: break (url, cookie, path) = task response = None try: response = urlopen(url, cookie=cookie) except Exception as ex: sys.stderr.write(repr(ex)); sys.stderr.flush() if response and response.getcode() == 200: data = response.read() ext = "".join(sorted(mimetypes.guess_all_extensions(response.headers['Content-Type']), key=lambda ext: mime_type_ranks.index(ext.lower()) if ext.lower() in mime_type_ranks else len(mime_type_ranks))[:1]) if len(data) > 0: path_with_ext = path + ext if not os.path.exists(path_with_ext): path_parent = os.path.dirname(path_with_ext) try: os.makedirs(path_parent) except OSError as ex: if not (ex.errno == errno.EEXIST and os.path.isdir(path_parent)): raise with open(path_with_ext, 'w+b') as f: f.write(data) f.close() sys.stdout.write("< " + path_with_ext + "\n"); sys.stdout.flush() else: sys.stderr.write("! " + path_with_ext + "\n"); sys.stderr.flush() q.task_done()
def clean_url(self): url = self.cleaned_data.get('url') if not url: return '' filename, headers = urllib.urlretrieve(url) if not mimetypes.guess_all_extensions(headers.get('Content-Type')): raise forms.ValidationError(_('The file type is invalid: %s' % type)) return SimpleUploadedFile(filename, open(filename).read(), content_type=headers.get('Content-Type'))
def mirror_entity_image(self, tweet, entity_index, url): response = requests.get(url, allow_redirects=True, timeout=15) if response.status_code != http.client.OK: log.warn("Failed to download image {0}", url) return content_type = response.headers.get('content-type') parsed_url = urlparse.urlparse(url) (_base, extension) = os.path.splitext(parsed_url.path) extension = None if not extension: extensions = [ ext for ext in mimetypes.guess_all_extensions(content_type) if ext != '.jpe' ] extension = extensions[0] if extensions else '' log.debug("Possible mime types: {0}, chose {1}", extensions, extension) filename = "{tweet}-{index}{extension}".format(tweet=tweet.get('id'), index=entity_index, extension=extension) with NamedTemporaryFile(mode='wb', prefix='twoops', delete=True) as fil: fil.write(response.content) fil.flush() new_url = self.upload_image(fil.name, filename, content_type) if new_url: self.record_tweet_image(tweet, new_url)
def random_extension(*args, **kwargs): # mimetypes.guess_extension can return any of the values in # mimetypes.guess_all_extensions. it depends on the system. # we're using this to make sure our code is robust enough to handle the # different possible extensions exts = mimetypes.guess_all_extensions(*args, **kwargs) return random.choice(exts)
def create_url_filename(url_str, content_type): # See also: http://stackoverflow.com/a/7406369/1391325 split_url = urlsplit(url_str) netloc = split_url[1] netloc_dirname = os.path.sep.join(reversed(netloc.split('.'))) path = split_url[2] stripped_url_str = "".join((netloc_dirname, path)) url_without_ext, existing_ext = os.path.splitext(stripped_url_str) filename_without_ext = url_without_ext.translate(URL_FILENAME_TRANSLATION_TABLE) if filename_without_ext.endswith(os.path.sep): filename_without_ext = filename_without_ext[:-len(os.path.sep)] if existing_ext: acceptable_filename_exts = mimetypes.guess_all_extensions(content_type) if existing_ext in acceptable_filename_exts: # Re-concatenate the now-normalized filename base with the original extension result = filename_without_ext + existing_ext else: canonical_ext = mimetypes.guess_extension(content_type) if canonical_ext: # If a canonical extension was found for the given content type, concatenate it to the now-normalized filename base result = filename_without_ext + canonical_ext else: # If no canonical extension was found, re-concatenate the original extension after normalizing it normalized_existing_ext = normalize_url_component(existing_ext, ".") result = filename_without_ext + normalized_existing_ext else: # Concatenate the canonical extension for the given content type to the result filename in order to avoid potential clashes with other URLs canonical_ext = mimetypes.guess_extension(content_type) if canonical_ext: result = filename_without_ext + canonical_ext else: # Just add some extention result = filename_without_ext + DEFAULT_OUTPATH_SUFFIX return result
def dl_jpg(url, file_path, file_name, mime): full_path = file_path + file_name # create full path to save file and name to urllib.urlretrieve(url, full_path) # retrieve the file from url mimes = mime.from_file(full_path) # get the mime type of the file global ext #zetten van globaal variabelen voor later gebruik ext = mimetypes.guess_all_extensions(mimes)[0] #guess the extension based on mimes os.rename(full_path, full_path+ext) #rename file to file name + guess filetype extension
def parse_render(render): """Parse render URL parameter. >>> parse_render(None) 'png' >>> parse_render('html') 'png' >>> parse_render('png') 'png' >>> parse_render('jpg') 'jpeg' >>> parse_render('gif') 'gif' """ formats = { "jpeg": guess_all_extensions("image/jpeg"), "png": guess_all_extensions("image/png"), "gif": guess_all_extensions("image/gif"), "bmp": guess_all_extensions("image/x-ms-bmp"), "tiff": guess_all_extensions("image/tiff"), "xbm": guess_all_extensions("image/x-xbitmap"), "pdf": guess_all_extensions("application/pdf"), } if not render: render = "png" else: render = render.lower() for k, v in formats.items(): if ".%s" % render in v: render = k break else: render = "png" return render
def parse_render(render): """Parse render URL parameter. >>> parse_render(None) 'png' >>> parse_render('html') 'png' >>> parse_render('png') 'png' >>> parse_render('jpg') 'jpeg' >>> parse_render('gif') 'gif' """ formats = { 'jpeg': guess_all_extensions('image/jpeg'), 'png': guess_all_extensions('image/png'), 'gif': guess_all_extensions('image/gif'), 'bmp': guess_all_extensions('image/x-ms-bmp'), 'tiff': guess_all_extensions('image/tiff'), 'xbm': guess_all_extensions('image/x-xbitmap'), 'pdf': guess_all_extensions('application/pdf') } if not render: render = 'png' else: render = render.lower() for k, v in formats.items(): if '.%s' % render in v: render = k break else: render = 'png' return render
def clean_url(self): url = self.cleaned_data.get("url") if not url: return "" filename, headers = urllib.urlretrieve(url) type = headers.get("Content-Type") if not type or not mimetypes.guess_all_extensions(type): raise forms.ValidationError(_("The file type is invalid: %s" % type)) return SimpleUploadedFile(filename, open(filename).read(), content_type=headers.get("Content-Type"))
def save( self, *args, **kwargs ): super( Image, self ).save( *args, **kwargs ) if not self.image and self.image_url: filename, headers = urllib.urlretrieve( self.image_url.encode( 'utf-8' ) ) type = headers.get( 'Content-Type' ) if not type or not mimetypes.guess_all_extensions( type ): raise ValidationError( 'Broken image !' ) self.image = SimpleUploadedFile( filename, open( filename ).read(), content_type=type ) self.save()
def guess_extension(ctype, ext): # mimetypes maps multiple extensions to the same type, e.g. .doc, .dot, # and .wiz are all mapped to application/msword. This sucks for finding # the best reverse mapping. If the extension is one of the giving # mappings, we'll trust that, otherwise we'll just guess. :/ all = guess_all_extensions(ctype, strict=False) if ext in all: return ext return all and all[0]
def _is_an_exclusive_format(mimeType): exclusive = Configuration.get("formats_exclusive") if not exclusive: return True if format not in Document._exclusive_formats: possible_exts = set(mimetypes.guess_all_extensions(mimeType, strict=False)) result = bool(possible_exts.intersection(exclusive)) Document._exclusive_formats[format] = result return Document._exclusive_formats[format]
def properExtensionForMimetype(mimetype, currentExten): """Finds the correct extension for a mimetype, trying to preserve the current extension, if it's valid.""" # NOTE: we currently specify strict here, thereby forcing a rename of even slightly dodgy extensions # this could be changed, but could make players fail for some types they would otherwise handle. validExtens = mimetypes.guess_all_extensions(mimetype, strict=True) if currentExten in validExtens: return currentExten else: return mimetypes.guess_extension(mimetype)
def _extension(self, mime_type=DEFAULT_EXT): """ Get the common-law file extension for a given MIME type.""" exts = mimetypes.guess_all_extensions(mime_type) if '.jpe' in exts: exts.remove('.jpe') # WHO USES THAT. ext = bool(exts) and \ exts[0] or \ URLRetrievalStorage.DEFAULT_EXT return ext
def get_filter(cls, mimetype): """ Returns a filter string for the file dialog. The filter is based on the mime type. :param mimetype: path from which the filter must be derived. :return: Filter string """ filters = " ".join(["*%s" % ext for ext in mimetypes.guess_all_extensions(mimetype)]) return "%s (%s)" % (mimetype, filters)
def make_filename(url, imgdata, resp=None): mime_type = magic.from_buffer(imgdata, mime=True) # ... last extension in the list tends to be better (and longer) mime_ext = mimetypes.guess_all_extensions(mime_type)[-1] # # TODO: filename from `resp` # if resp is not None: ... datahash = _hash(imgdata) urlhash = _hash(url) urlv = url.rstrip('/').rsplit('/', 1)[1] return '%s%s' % (urlv, mime_ext) # NOTE: cannot check for existing here.
def clean_url(self): url = self.cleaned_data.get('url') if not url: return '' try: filename, headers = urllib.urlretrieve(url) except: raise forms.ValidationError(forms.fields.URLField.default_error_messages['invalid_link']) if not mimetypes.guess_all_extensions(headers.get('Content-Type')): raise forms.ValidationError(_('The file type is invalid: %s' % type)) return SimpleUploadedFile(filename, open(filename).read(), content_type=headers.get('Content-Type'))
def get_cover(digest): cover = "" try: url = "http://www.napiprojekt.pl/okladka_pobierz.php?id=%s&oceny=-1" % (urllib2.quote(digest)) f = urllib2.urlopen(url) cover = f.read() f.close() content_type = f.info()["Content-Type"] extension = mimetypes.guess_all_extensions(content_type)[-1] except Exception, e: return False
def get_extension(self): if self.mime_type == 'image/jpeg': return '.jpg' elif self.purpose is Purpose.raw: return '.' + self.mime_type.split('/')[1] else: extensions = mimetypes.guess_all_extensions(self.mime_type) if len(extensions) == 0: return '' else: return extensions[-1]