def __youtube_download(self, link, output=None, noplaylist=True, overwrite=False): """ youtube download """ try: if os.path.isfile(output) is False or overwrite is True: youtube_dl.YoutubeDL({ 'outtmpl': output, 'noplaylist': True, 'socket_timeout': 60 }).download([link]) shutil.move(OSUtils.fileplusextension(output), output) if bool(filetype.guess_mime(output)) is True: print(link, output) return True else: return False else: print("File " + output + " exists.") return False except KeyboardInterrupt: if os.path.isfile(OSUtils.fileplusextension(output)): os.remove(OSUtils.fileplusextension(output)) raise except Exception: pass
def guessMimetype(filename): """Return the mime-type for `filename`.""" path = pathlib.Path(filename) if not isinstance(filename, pathlib.Path) else filename with path.open("rb") as signature: # Since filetype only reads 262 of file many mp3s starting with null bytes will not find # a header, so ignoring null bytes and using the bytes interface... buf = b"" while not buf: data = signature.read(_NUM_SIGNATURE_BYTES) if not data: break data = data.lstrip(b"\x00") if data: data_len = len(data) if data_len >= _NUM_SIGNATURE_BYTES: buf = data[:_NUM_SIGNATURE_BYTES] else: buf = data + signature.read(_NUM_SIGNATURE_BYTES - data_len) # Special casing .id3/.tag because extended filetype with add_type() prepends, meaning # all mp3 would be labeled mimetype id3, while appending would mean each .id3 would be # mime mpeg. if path.suffix in ID3_MIME_TYPE_EXTENSIONS: if Id3Tag().match(buf) or Id3TagExt().match(buf): return Id3TagExt.MIME return filetype.guess_mime(buf)
async def guess_mime(file: File) -> str: """Return the file's mimetype, or `application/octet-stream` if unknown.""" if isinstance(file, io.IOBase): file.seek(0, 0) elif isinstance(file, AsyncBufferedReader): await file.seek(0, 0) try: first_chunk: bytes async for first_chunk in async_generator_from_data(file): break else: return "inode/x-empty" # empty file # TODO: plaintext mime = filetype.guess_mime(first_chunk) return mime or ( "image/svg+xml" if await is_svg(file) else "application/octet-stream" ) finally: if isinstance(file, io.IOBase): file.seek(0, 0) elif isinstance(file, AsyncBufferedReader): await file.seek(0, 0)
def send_file_helper(context, chat_id, filename, file_content, caption): """ Helper for handle generic files. If the file is a pdf, it will be converted and send as image/s. If the file is already an image it will not be processed. If the file is any other type, it will be send as document. """ mime = filetype.guess_mime(file_content) if mime == 'application/pdf': pages = convert_from_bytes(file_content, 500) images = [] for page in pages: file = io.BytesIO() page.save(file, 'png') file.name = filename file.seek(0) images.append(file) send_images_helper(context, chat_id, images, caption, '') elif filetype.image(file_content) is not None: send_images_helper(context, chat_id, [io.BytesIO(file_content)], caption, '') else: content = io.BytesIO(file_content) context.bot.send_document(chat_id=chat_id, document=content, filename=filename, parse_mode=ParseMode.MARKDOWN, caption=caption)
def _detect_content_type(self, content_type): if content_type == 'application/octet-stream': # Too general, try to detect detected = filetype.guess_mime(self.data) if detected: return detected return content_type
def is_gtf(infile): u""" check if input file is gtf :param infile: path to input file :return: """ is_gtf = 0 if filetype.guess_mime(infile) == "application/gzip": is_gtf += 10 r = gzip.open(infile) else: r = open(infile) for line in r: if line.startswith("#"): continue lines = re.split(r"\s+", line) if len(lines) < 8: break if re.search(r"([\w-]+ \"[\w.\s\-%,:]+\";? ?)+", " ".join(lines[8:])): is_gtf += 1 break r.close() return is_gtf
def get_type(filepath=None, data=None): try: if filepath is not None: ans = filetype.guess_mime(filepath) logger.debug('filetype guess: {} {}'.format(filepath, ans)) if ans is None: if filepath[-5:] == '.json': with open(filepath, 'r') as f: data = json.load(f) if type(data) == list: return 'Sequence' return "Graph" elif filepath[-4:] == '.mat': return 'Graph' elif filepath[-4:] == '.csv': return "DataFrame" return "Text" if ans.find('image') != -1: return "Image" if ans.find('video') != -1: return 'Video' if ans.find('audio') != -1: return 'Audio' raise NotImplementedError except Exception as e: logger.exception('Guess Type Error : {} {}'.format(filepath, e))
def guess(name: StrOrPath, file: bytes | IO[bytes] | None = None) -> str: """ Guess file media type. If optional ``file`` argument is provided, then try to guess media type by checking the magic number signature, otherwise fallback to the filename extension. Args: name (StrOrPath): Filename or path. file (bytes | IO[bytes | None, optional): File-obj. Defaults to None. Returns: str: Guessed media type. For unknown files returns 'application/octet-stream'. """ if mime := filetype.guess_mime(file): return cast(str, mime)
def is_pdf(file_path): """ 判断文件是否为pdf :param file_path: :return: """ r = filetype.guess_mime(file_path) return r == 'application/pdf'
def convert_in_zip(src, workdir='.', workers=None, ext_name=None, strict_mode=False, verbose=False): """convert non-webp picture inside zip file""" lgr = logging.get_logger(convert_in_zip.__name__, 'INFO' if verbose else 'ERROR', fmt=logging.LOG_FMT_MESSAGE_ONLY) dirs, files = resolve_path_to_dirs_files(src) if not files: files = [] [files.extend(resolve_path_to_dirs_files(path_join(dp, '**'), glob_recurse=True)[-1]) for dp in dirs] for fp in files: need_to_convert = False if not fstk.does_file_mime_has(fp, 'zip'): continue with zipfile.ZipFile(fp) as zf: for i in zf.infolist(): if i.is_dir(): continue with zf.open(i.filename) as af: mime = filetype.guess_mime(af.read(512)) if mime and 'image' in mime: if mime == 'image/gif': continue elif mime == 'image/webp': need_to_convert = False if strict_mode: continue else: break else: need_to_convert = True if strict_mode: break else: continue if not need_to_convert: continue unzip_dir = path_join(workdir, split_path_dir_base_ext(fp)[1]) try: zf.extractall(unzip_dir) except zipfile.BadZipFile: if path_is_dir(unzip_dir): shutil.rmtree(unzip_dir) continue try: old_size = path_get_size(fp) auto_cvt(unzip_dir, recursive=True, clean=True, cbz=False, workers=workers, verbose=verbose) new_zip = shutil.make_archive(unzip_dir, 'zip', unzip_dir, verbose=verbose) if ext_name: new_zip = fstk.rename_file_ext(new_zip, ext_name) fp = fstk.rename_file_ext(fp, ext_name) new_size = path_get_size(new_zip) fstk.move_as(new_zip, fp) lgr.info(fp) lgr.info(f'{new_size / old_size:.1%} ({naturalsize(new_size, True)} / {naturalsize(old_size, True)})') except KeyboardInterrupt: sys.exit(2) finally: shutil.rmtree(unzip_dir)
def upload_file(self, dataset_id, path, annotation_task=None, folder_id=None): name = os.path.basename(path) files = {'files': (name, open(path, 'rb'), filetype.guess_mime(path))} data = {} if annotation_task: data['annotation_task'] = annotation_task url = self.url(backend.dataset_upload.format(dataset_id), folder_id=folder_id) return self.post(url, files=files, data=data).json()
def upload_file(): logging.debug(request.headers) if request.method == 'POST': if 'file' not in request.files: logging.debug('No file part') flash('No file part', 'danger') return redirect(request.url) file = request.files['file'] if file.filename == '': logging.debug('No selected file') flash('No selected file', 'danger') return redirect(request.url) if not allowed_file(file.filename): logging.debug(f'Invalid file extension of file: {file.filename}') flash('Invalid file extension', 'danger') return redirect(request.url) if file.content_type != "image/gif": logging.debug(f'Invalid Content type: {file.content_type}') flash('Content type is not "image/gif"', 'danger') return redirect(request.url) if not bool(re.match("^[a-zA-Z0-9_\-. '\"\=\$\(\)\|]*$", file.filename)) or ".." in file.filename: logging.debug(f'Invalid symbols in filename: {file.content_type}') flash('Invalid filename', 'danger') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename)) mime_type = filetype.guess_mime(f'uploads/{file.filename}') if mime_type != "image/gif": logging.debug(f'Invalid Mime type: {mime_type}') flash('Mime type is not "image/gif"', 'danger') return redirect(request.url) uid = str(uuid.uuid4()) os.mkdir(f"uploads/{uid}") logging.debug( f"Created: {uid}. Command: ffmpeg -i 'uploads/{file.filename}' \"uploads/{uid}/%03d.png\"" ) command = subprocess.Popen( f"ffmpeg -i 'uploads/{file.filename}' \"uploads/{uid}/%03d.png\"", shell=True) command.wait(timeout=15) logging.debug(command.stdout) flash('Successfully saved', 'success') return redirect(url_for('result', uid=uid)) return render_template("form.html")
def get(self): _type = self.get_query_argument("type", None) if _type == "image": data = self.get_image() self.set_header("content-type", filetype.guess_mime(data)) self.write(data) else: resp = yield self.douban_data() self.write(resp)
def get_file_kind(filepath): ''' 获取文件类型(简单文件分类) :param filepath: :return: 支持文件类型返回文件类型 image,video,audio, application ''' file_mine = filetype.guess_mime(filepath) index = file_mine.find('/') if index != -1: file_kind = file_mine[0:index] return file_kind
def run_add(args): """ Add a new episode to a channel's episode list Requires the channel was initialized by `ipfspod new` Accepts ------- args: a Namespace resulting from ArgumentParser.parse_args """ home = get_channel_dir(args) channel_db = TinyDB(home.joinpath("channel.json").as_posix()) channel = channel_db.all()[0] client = ipfshttpclient.connect() # Add any videos or audio to IPFS before writing episode metadata for file in args.file: res = client.add(file,pin=False) file_hash = res['Hash'] file_len = Path(file).stat().st_size file_type = filetype.guess_mime(file) enclosure = dict( file_hash=file_hash, file_len=file_len, file_type=file_type) filename = os.path.splitext(os.path.basename(file))[0] with open(file, "rb") as f: file_hash = hashlib.md5() while chunk := f.read(8192): file_hash.update(chunk) #print(file_hash.digest()) hash_md5 = file_hash.hexdigest() # to get a printable str instead of bytes # Build the episode metadata JSON object episode = dict( title=args.title or filename, description=args.description or args.title or filename, link=args.link, author=args.author or channel['managing_editor'], categories=args.category, date=datetime.utcnow().strftime(r"%a, %d %b %Y %H:%M:%S +0000"), # <!-- TODO: substitute with file ts--> # Name the fields and include any we just indexed enclosure = enclosure, hash_md5=hash_md5, source=args.source ) episode_db = TinyDB(home.joinpath("episodes.json").as_posix()) episode_db.insert(episode) print(f"added {filename}")
async def download_async(url: str, name: str): resp= await aiorequests.get(url, stream=True) if resp.status_code == 404: raise ValueError('文件不存在') content = await resp.content try: extension = filetype.guess_mime(content).split('/')[1] except: raise ValueError('不是有效文件类型') abs_path = os.path.join(imgpath, f'{name}.{extension}') with open(abs_path, 'wb') as f: f.write(content)
async def download_async(url: str, save_path: str, save_name: str) -> None: timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(url) as resp: content = await resp.read() try: suffix = filetype.guess_mime(content).split('/')[1] except: raise ValueError('不是有效文件类型') abs_path = path.join(save_path, f'{save_name}.{suffix}') with open(abs_path, 'wb') as f: f.write(content) return abs_path
def _put_binary_file_to_azure( self, url: str, local_path: str, data: Dict[str, Any], ) -> None: with open(local_path, "rb") as fp: file_type = filetype.guess_mime(local_path) request_headers = { "x-ms-blob-content-type": file_type, "x-ms-blob-type": data["x-ms-blob-type"], } self._client.do("PUT", url, data=fp, headers=request_headers)
def fileTypeCheck(self, IMG_PATH): #if image path exists if os.path.exists(IMG_PATH): #getting mime information of file file_type, ext = filetype.guess_mime(IMG_PATH).split('/') if file_type == 'image': #loading Image in RGB and BGR self._image_bgr = cv2.imread(IMG_PATH) self.BGR2RGB() if ext.lower() in self._accepted_extensions: if self._image_bgr is not None: if (self._image_bgr.shape[1] == self._imageWidthCheck ) and (self._image_bgr.shape[0] == self._imageHeightCheck): if 'FILE TYPE CHECK PASSED' in self.statusDescription: return True else: self.statusDescription.append( 'FILE TYPE CHECK PASSED') return True else: if ((self._image_bgr.shape[1] / self._imageWidthCheck) > 0.5) \ and ( (self._image_bgr.shape[0] / self._imageHeightCheck) > 0.6): if 'FILE TYPE CHECK PASSED' in self.statusDescription: pass else: self.statusDescription.append( 'FILE TYPE CHECK PASSED') return True, else: self.statusDescription.append( 'DIMENSION ISSUE') return False else: self.statusDescription.append('IMAGE NOT LOADED') return False else: self.statusDescription.append('FILE EXTENSION ERROR') return False else: self.statusDescription.append('WRONG FILE: ' + file_type) return False else: self.statusDescription.append('NO FILE EXISTS') return False
def __decompress_to_data_list(data): if not data: return [], "" data = convert_to_bytes(data) mime = guess_mime(data) try: if mime == "application/x-tar": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "tar" if mime == "application/gzip": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "gzip+tar" if mime == "application/x-xz": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "xz+tar" if mime == "application/x-bzip2": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "bzip2+tar" except: pass try: if mime == "application/gzip": return [gzip.decompress(data)], "gzip" if mime in ("application/zip", "application/epub+zip"): with zipfile.ZipFile(io.BytesIO(data)) as zf: return [zf.read(f) for f in zf.infolist()], "zip" if mime == "application/x-brotli": return [brotli.decompress(data)], "brotli" if mime == "application/x-bzip2": return [bz2.decompress(data)], "bzip2" if mime == "application/x-xz": return [lzma.decompress(data)], "xz" if mime in ("application/x-lzip", "application/x-lzma"): return [lzma.decompress(data)], "lzma" except: pass try: # brotli has no standard magic numbers yet, try decompress data anyway return [brotli.decompress(data)], "brotli" except: pass return [data], ""
def PostUpdate(self, status, media=None, media_additional_owners=None, media_category=None, in_reply_to_status_id=None, auto_populate_reply_metadata=False, exclude_reply_user_ids=None, latitude=None, longitude=None, place_id=None, display_coordinates=False, trim_user=False, verify_status_length=True, attachment_url=None): # if this is [file], single photo or media if media and len(media) == 1: media_type = filetype.guess_mime(media[0].name) if media_type and "video" in media_type: # we'll first try the ordinary one, if that fails, execute new method in exception try: return super(NewApi, self).PostUpdate( status, media, media_additional_owners, media_category, in_reply_to_status_id, auto_populate_reply_metadata, exclude_reply_user_ids, latitude, longitude, place_id, display_coordinates, trim_user, verify_status_length, attachment_url) except TwitterError: logging.warning("long video perhaps") video_id = self.UploadMediaChunked( media=media[0], media_category='tweet_video') logging.info("video id is %s,status is %s", video_id, status) time.sleep(20) # Waits until the async processing of the uploaded media finishes and `video_id` becomes valid. status = super(NewApi, self).PostUpdate( status=status, media=video_id, in_reply_to_status_id=in_reply_to_status_id) return status return super(NewApi, self).PostUpdate(status, media, media_additional_owners, media_category, in_reply_to_status_id, auto_populate_reply_metadata, exclude_reply_user_ids, latitude, longitude, place_id, display_coordinates, trim_user, verify_status_length, attachment_url)
def assertFileType(content_type=None, name=None): if content_type == "application/zip" or content_type == "application/x-zip-compressed": return "zip", ".zip" elif content_type and content_type.find("image") == 0: return "img", get_type_obj(content_type, getExtName(name)).extension else: try: if filetype.guess_mime(name) == "application/zip": return "zip", ".zip" elif filetype.image(name): return "img", get_type_obj(content_type, getExtName(name)).extension except FileNotFoundError: pass raise RequestHandleFailException(415, "输入的文件不是支持的图片或zip类型!")
def do_GET(self): "HTTP GET" try: path, sep, arg = self.path.partition("?") paths = path[1:].split("/") if paths[0] != "content" and paths[0] in sites: self.send_response(HTTPStatus.OK) self.send_header("Content-type", "text/html; charset=utf-8") self.end_headers() self.wfile.write(sites[paths[0]].encode("utf-8")) elif paths[0] == "articles" and os.path.isfile( "./assets/articles/" + paths[1]): self.send_response(HTTPStatus.OK) self.send_header("Content-type", "text/html; charset=utf-8") self.end_headers() f = open("./assets/articles/" + paths[1], "r", encoding="utf-8") content = f.read() if paths[1].endswith(".md"): content = md2html(content) content = wraphtml(content) self.wfile.write(content.encode("utf-8")) f.close() elif paths[0] == "static" and os.path.isfile("./assets/" + '/'.join(paths[1:])): # mimetype = imghdr.what("./assets/" + '/'.join(paths[1:])) # "image/{}".format(mimetype) mimetype = filetype.guess_mime("./assets/" + '/'.join(paths[1:])) self.send_response(HTTPStatus.OK) self.send_header("Content-type", mimetype) self.send_header("Cache-Control", "max-age=3600") self.end_headers() f = open("./assets/" + '/'.join(paths[1:]), "rb") shutil.copyfileobj(f, self.wfile) f.close() elif paths[0] == "robots.txt" and enable_robotstxt: self.send_response(HTTPStatus.OK) self.send_header("Content-type", "text/html; charset=utf-8") self.end_headers() self.wfile.write(ROBOTS_TXT.encode("utf-8")) else: self.send_response(HTTPStatus.NOT_FOUND) self.end_headers() except ConnectionError as e: self.log_error("Error: [%d] %s", e.errno, e.strerror) pass return
async def download_async(url: str, save_path: str, save_name: str, auto_extension=False) -> None: timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(url) as resp: content = await resp.read() if auto_extension: #没有指定后缀,自动识别后缀名 try: extension = filetype.guess_mime(content).split('/')[1] except: raise ValueError('不是有效文件类型') abs_path = path.join(save_path, f'{save_name}.{extension}') else: abs_path = path.join(save_path, save_name) with open(abs_path, 'wb') as f: f.write(content) return abs_path
async def download_async(url: str, save_path: str, save_name: str, suffix=None) -> None: timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(url) as resp: content = await resp.read() if not suffix: #没有指定后缀,自动识别后缀名 try: suffix = filetype.guess_mime(content).split('/')[1] except: raise ValueError('不是有效文件类型') abs_path = path.join(save_path, f'{save_name}.{suffix}') open(abs_path, 'wb+').write(content + bytes("jneth", encoding="utf8")) return abs_path
def _post_multipart_formdata( self, url: str, local_path: str, remote_path: str, data: Dict[str, Any], ) -> None: with open(local_path, "rb") as fp: file_type = filetype.guess_mime(local_path) if "x-amz-date" in data: data["Content-Type"] = file_type data["file"] = (remote_path, fp, file_type) multipart = MultipartEncoder(data) self._client.do("POST", url, data=multipart, headers={"Content-Type": multipart.content_type})
def __request_download(cls, url, overwrite=False): for im in cls.__urlImageGenerator(url): try: if "base64," in im: continue lo = cls.__lastocc(im, "/") + 1 if lo < len(im) - 1: output = im[cls.__lastocc(im, "/") + 1:] else: output = im[cls.__lastocc(im[:-1], "/") + 1:-1] # random name if output == "" or len(output) > 80: output = str(random.randint(1, 10000000000000)) try: if os.path.isfile(output) is False or overwrite is True: open(output, "wb").write(requests.get(im).content) if bool(filetype.guess_mime(output)) is True: print(im, output) else: print("File " + output + " exists.") return False except KeyboardInterrupt: if os.path.isfile(cls.fileplusextension(output)): os.remove(cls.fileplusextension(output)) raise except Exception as e: print(e) raise except requests.exceptions.ConnectionError as e: print(e) continue except requests.exceptions.InvalidSchema as e: print(e) continue except requests.exceptions.TooManyRedirects as e: print(e) continue
async def download_async(url: str, save_path: str, save_name: str, auto_extension=False): resp = await aiorequests.get(url, stream=True) if resp.status_code == 404: raise ValueError('文件不存在') content = await resp.content if auto_extension: #没有指定后缀,自动识别后缀名 try: extension = filetype.guess_mime(content).split('/')[1] except: raise ValueError('不是有效文件类型') abs_path = os.path.join(save_path, f'{save_name}.{extension}') else: abs_path = os.path.join(save_path, save_name) with open(abs_path, 'wb') as f: f.write(content) return abs_path
def upload_files( self, dataset_id, files_to_upload=[], annotation_task=None, folder_id=None, status=None, annotation_set_id=None, class_encoding=None, session_id: str = None ): files = [ ('files', (os.path.basename(path), open(path, 'rb'), filetype.guess_mime(path))) for path in files_to_upload ] data = {} if annotation_task: data['annotation_task'] = annotation_task if session_id: data['session_id'] = session_id if isinstance(class_encoding, dict): for key, val in class_encoding.items(): data['class_encoding_{}'.format(key)] = val url = self.url( backend.dataset_upload.format(dataset_id), folder_id=folder_id, annotation_set_id=annotation_set_id, ) r = self.post(url, files=files, data=data) json_resp = r.json() if (r.status_code >= http.HTTPStatus.BAD_REQUEST) and ('errors' in json_resp): raise Exception('Error description:' + '\n'.join(json_resp['errors'])) if r.status_code != http.HTTPStatus.OK: print('Error - Response:', r.text, 'files:', files_to_upload) status.update(len(files)) status.progress() return json_resp
def __request_download(self, link, output, overwrite=False): """request download""" try: if os.path.isfile(output) is False or overwrite is True: open(output, "wb").write(requests.get(link).content) if bool(filetype.guess_mime(output)) is True: print(link, output) return True else: return False else: print("File " + output + " exists.") return False except KeyboardInterrupt: if os.path.isfile(OSUtils.fileplusextension(output)): os.remove(OSUtils.fileplusextension(output)) raise except Exception: raise
def filetype(filename): try: return magic.from_file(filename, mime=True) except AttributeError: pass return guess_mime(filename)
def test_guess_mime_memoryview(self): buf = memoryview(bytearray([0xFF, 0xD8, 0xFF, 0x00, 0x08])) mime = filetype.guess_mime(buf) self.assertTrue(mime is not None) self.assertEqual(mime, 'image/jpeg')
def test_guess_mime_buffer_invalid(self): buf = bytearray([0xFF, 0x00, 0x00, 0x00, 0x00]) mime = filetype.guess_mime(buf) self.assertTrue(mime is None)
def test_guess_mime_file_path(self): mime = filetype.guess_mime(FIXTURES + '/sample.jpg') self.assertTrue(mime is not None) self.assertEqual(mime, 'image/jpeg')