def clearDir(dirPath): dirPath = os.path.abspath(dirPath) fileList = os.listdir(dirPath) os.chdir(dirPath) for file in fileList: filePath = os.path.abspath(file) if os.path.isdir(filePath): clearDir(filePath) if len(os.listdir(filePath)) == 0: os.rmdir(filePath) if settings.display != "none" and settings.display != "error": print("\033[93mRemove Folder ", filePath, "\033[0m") else: fileType = filetype.guess(filePath) if fileType is None or fileType.mime not in audioType: os.remove(filePath) if settings.display != "none" and settings.display != "error": print("\033[93mRemove File ", filePath, "\033[0m") os.chdir("..")
def get_FileType(filePath): configfile = open("./config.json", 'r', encoding="utf-8") data = json.loads(configfile.read()) tmp = str(os.path.basename(filePath)).split('.') type = None if (len(tmp) > 1): type_str = tmp[len(tmp) - 1] for i in data["type"]['types']: if (type_str in data["type"][i]): return i tmp = fileType.filetype(filePath) if (tmp != "unknown"): return tmp else: type = filetype.guess(filePath) if type is not None: return type.EXTENSION else: return None else: return str(type).split('.')[2] if len( str(type).split('.')) > 1 else None
def open_image(self): print_log("Start") self.set_status("Select image...") fname = QFileDialog.getOpenFileName( self, 'Open image', str(Path.home()), "Image files (*.jpg *.jpeg *.gif *.png)") if fname[0]: print_log("File path is '{}'".format(fname[0])) try: kind = filetype.guess(fname[0]) print_log("Mime type: '{}'".format(kind.mime)) if kind.mime.split("/")[0] == "image": self.show_image(fname[0]) self.set_status("Opened.") else: print_log("Not Supported File") self.set_status("Not Supported file..") except FileNotFoundError as e: print_log("File Not Found!") self.set_status("File not found!") else: print_log("File path is not defined.") self.set_status("File not selected.") print_log("Finish")
def countPages(file): """ Open the comic file and count the number of pages """ try: kind = filetype.guess(file) if kind.mime == 'application/zip' or kind.mime == 'application/x-rar-compressed': compress = None if kind.mime == 'application/zip': compress = zipfile.ZipFile(file,'r') else: compress = rarfile.RarFile(file,'r') # find the images filelist = [f for f in sorted(compress.namelist()) if os.path.splitext(f)[1] in IMGEXT] return len(filelist) elif kind.mime == 'application/pdf': pdffile = open(file, "rb") pdf = PdfFileReader(pdffile) num = pdf.getNumPages() pdffile.close() return num except: logger.error("Something wrong happens while counting pages of {}".format(file)) logger.error(traceback.format_exc()) return 0
def convert_thread(serie, album): logger.debug("convert: start"); ts_start = time.time() serie = Serie.get(Serie.urlname == serie) album = Album.get(Album.serie == serie, Album.urlname == album) file = os.path.join(LIBRARY, serie.dirname, album.filename) cbzfile = os.path.join(LIBRARY, serie.dirname, createFilename(album.name) + '.cbz') try: kind = filetype.guess(file) if kind.mime == 'application/zip' or kind.mime == 'application/x-rar-compressed': # nothing to do ! pass elif kind.mime == 'application/pdf': with zipfile.ZipFile(cbzfile, 'w', compression=zipfile.ZIP_STORED) as cbz: with WandImage(filename=file, resolution=PDFDPI) as images: pages = len(images.sequence) for page in range(pages): image = WandImage(images.sequence[page]).make_blob('jpeg') cbz.writestr("page{:03d}.jpeg".format(page+1), image) logger.debug("convert: end {}s".format(time.time() - ts_start)) shutil.move(file, CONVERTED_ARCHIVE) album.filename = cbzfile album.filetype = 'cbz' album.save() except: logger.error("Something wrong happens while converting album {}".format(album.name)) logger.error(traceback.format_exc())
def Down_load(file_url, file_full_name, now_photo_count, all_photo_count): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36" } # 开始下载图片 with closing(get(file_url, headers=headers, stream=True)) as response: chunk_size = 1024 # 单次请求最大值 content_size = int(response.headers['content-length']) # 文件总大小 data_count = 0 # 当前已传输的大小 with open(file_full_name, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) done_block = int((data_count / content_size) * 50) data_count = data_count + len(data) now_jd = (data_count / content_size) * 100 print("\r %s:[%s%s] %d%% %d/%d" % (file_full_name, done_block * '█', ' ' * (50 - 1 - done_block), now_jd, now_photo_count, all_photo_count), end=" ") # 下载完图片后获取图片扩展名,并为其增加扩展名 file_type = guess(file_full_name) rename(file_full_name, file_full_name + '.' + file_type.extension)
def down_load(file_url, file_full_name, now_photo_count, all_photo_count): """ 文件下载器 :param file_url: :param file_full_name: :param now_photo_count: :param all_photo_count: :return: """ # 开始下载图片 with closing(requests.get(file_url, headers=headers, stream=True)) as response: chunk_size = 1024 # 单次请求最大值 content_size = int(response.headers['content-length']) # 文件总大小 data_count = 0 # 当前已传输的大小 with open(file_full_name, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) done_block = int((data_count / content_size) * 50) data_count = data_count + len(data) now_jd = (data_count / content_size) * 100 print("\r %s:[%s%s] %d%% %d/%d" % (file_full_name, done_block * '█', ' ' * (50 - 1 - done_block), now_jd, now_photo_count, all_photo_count), end=" ") # 下载完图片后获取图片扩展名,并为其增加扩展名 file_type = guess(file_full_name) rename(file_full_name, file_full_name + '.' + file_type.extension)
def is_video(filename): kind = filetype.guess(filename) if kind is not None: file_type_str = kind.mime return file_type_str.find('video') > -1 else: return False
def validate(self, data): """ Validate that the question requires a file to be uploaded. Also check that the file has an acceptable MIME type based on the question asked. """ file = data.get("file") question = data.get("question") if question.type not in Question.FILE_TYPES: self.fail_for_field("invalid_question") kind = filetype.guess(file.read()) if kind is None: if question.type == Question.QuestionType.IMAGE_FILE: self.fail_for_field("invalid_image_file") elif question.type == Question.QuestionType.PDF_FILE: self.fail_for_field("invalid_pdf_file") else: self.fail_for_field("invalid_file") if (question.type == Question.QuestionType.IMAGE_FILE and kind.mime not in ["image/jpeg", "image/png"]): self.fail_for_field("invalid_image_file") elif (question.type == Question.QuestionType.PDF_FILE and kind.mime not in ["application/pdf"]): self.fail_for_field("invalid_pdf_file") return data
def check_document_format(self) -> bool: """Check if the downloaded document has the filetype that the downloader expects. If the downloader does not expect any special filetype, accept anything because there is no way to know if it is correct. :returns: True if it is of the right type, else otherwise :rtype: bool """ def print_warning() -> None: self.logger.error( "The downloaded data does not seem to be of " "the correct type ('%s')", self.expected_document_extension) if self.expected_document_extension is None: return True import filetype retrieved_kind = filetype.guess(self.get_document_data()) if retrieved_kind is None: print_warning() return False self.logger.debug("Retrieved kind of document seems to be '%s'", retrieved_kind.mime) if not isinstance(self.expected_document_extension, list): expected_document_extensions = [self.expected_document_extension] if retrieved_kind.extension in expected_document_extensions: return True else: print_warning() return False
def validate_inputs(self): """Validate inputs. - validate input file name and out file name - validation for mutually exclusive options e.g. protein sequence for contig input_type etc """ if not os.path.exists(self.input_sequence): logger.error("input file does not exist: {}".format( self.input_sequence)) exit() # otherwise you blow up your input when deleting intermediate files if self.output_file == self.input_sequence and self.clean: logger.error("output path same as input, must specify " "different path when cleaning to prevent " "accidental deletion of input files") exit() kind = filetype.guess(self.input_sequence) if kind is None: if self.is_fasta() == False: exit() else: logger.error(kind.extension) logger.error(kind.mime) logger.warning("Sorry, no support for this format.") exit() if self.threads > os.cpu_count(): logger.error( "Argument num_threads illegal value, expected (>=1 and =<{}): given `{}`)" .format(os.cpu_count(), self.threads)) exit()
def download(file_url, file_path, now_wallpaper_count, all_wallpaper_count): headers = { 'user-agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" } # 下载图片 with closing(get(file_url, headers=headers, stream=True)) as response: # 单次请求的最大值 chunk_size = 1024 # 文件总大小 这个参数在响应头 content_size = int(response.headers['content-length']) # 初始化当前传送的大小 data_count = 0 # 文件操作 if response.status_code == 200: with open(file_path, 'wb') as file: # iter_content 他是迭代地获取数据 for data in response.iter_content(chunk_size=chunk_size): file.write(data) done_block = int((data_count / content_size) * 50) data_count = data_count + len(data) # 当前下载百分百 now_percentage = (data_count / content_size) * 100 print('\r %s: [%s%s] %d%% %d/%d' % (file_path, '=', '=' * done_block, now_percentage, data_count, content_size), end='', flush=True ) print('\n') # 下载完成后获取图片格式 file_type = guess(file_path) try: rename(file_path, file_path + '.' + file_type.extension) except FileExistsError: print('该文件存在') rename(file_path, file_path + '副本.' + file_type.extension)
def spliter(file): # 读入音频 print('读入音频') sound = AudioSegment.from_file(file) audiotype = filetype.guess(file).EXTENSION if audiotype != "wav": return -1 sound = sound[:3 * 60 * 1000] #如果文件较大,先取前3分钟测试,根据测试结果,调整参数 # 分割 print('开始分割') chunks = split_on_silence( sound, min_silence_len=1400, silence_thresh=-55 ) #min_silence_len: 拆分语句时,静默满0.3秒则拆分。silence_thresh:小于-70dBFS以下的为静默。 print(chunks) # 创建保存目录 filepath = os.path.split(file)[0] chunks_path = filepath + '/chunks/' if not os.path.exists(chunks_path): os.mkdir(chunks_path) # 保存所有分段 print('开始保存') for i in range(len(chunks)): new = chunks[i] save_name = chunks_path + '%04d.%s' % (i, audiotype) new.export(save_name, format=audiotype) print('%04d' % i, len(new)) print('保存完毕') return len(chunks)
def register(): # create new form if request.method == 'POST': name = request.form['nombre'] sector = request.form['sector'] nivelEscolar = request.form['nivelEscolar'] latitud = request.form['lati'] longitud = request.form['longi'] pictureUpload = request.files['thefiles'] usuario = session['username'] # BASE64 BYTES OF PICTURE pictureUpload.save(pictureUpload.filename) with open(pictureUpload.filename, "rb") as f: data = f.read() encodedBytes = base64.b64encode(str(data[0]).encode("utf-8")) filename = encodedBytes.title() mimetype = filetype.guess(pictureUpload) # Create a Object Foto img = Foto(base64=encodedBytes, mimetype=mimetype, filename=filename) # SAVE ON SERVER -> ERROR USUARIO object if ClientSoap.NewForm(name, sector, nivelEscolar, usuario, latitud, longitud, img): print('New Form Created Succes!') return redirect('/dashboard') else: print('ERROR! During process to created new form')
def searchFile(dirPath): dirPath = os.path.abspath(dirPath) musicList = [] fileList = os.listdir(dirPath) os.chdir(dirPath) for file in fileList: filePath = os.path.abspath(file) if os.path.isdir(filePath): musicList += searchFile(filePath) else: fileType = filetype.guess(filePath) if fileType is not None and fileType.mime in audioType: musicTrack = MusicTrack.MusicTrack(filePath, fileType.mime) musicList.append(musicTrack) os.chdir("..") return musicList
def search_image(self, text_query: str, path, top_k, score): query_vector = None # Check if text_query is an image file path file_path = Path(text_query) if file_path.exists() and file_path.is_file(): file_type_guess = filetype.guess(str(file_path)) if file_type_guess and file_type_guess.extension in {"jpg", "png"}: query_vector = self.encode_image(str(file_path)) corpus_vectors, corpus_paths = self.get_image_corpus() image_features = torch.Tensor(corpus_vectors) image_features /= image_features.norm(dim=-1, keepdim=True) # Encode text query if query_vector is None: query_vector = self.encode_text(text_query) query_vector /= query_vector.norm(dim=-1, keepdim=True) # Indexed top-k nearest neighbor query corpus_ids, distances = self.index.knn_query(query_vector.cpu(), k=top_k) # Print results results = [] for corpus_id, score_value in zip(corpus_ids[0], distances[0]): match_file_path = str(corpus_paths[int(corpus_id)]) file_name = match_file_path.split("/")[-1] if path: file_name = match_file_path if score: result = f"{file_name} ({score_value:.4f})" results.append(result) print(result) else: results.append(file_name) print(file_name) return results
def _remote(self, url: str, identifier: str): target_directory = path.join(self.where, f"{identifier}") if self.installed(target_directory): return self._load_lib_file(target_directory) location = tempfile.mkdtemp() response = requests.get(url, stream=True) download_file = path.join(location, "downloaded.file") # download the file with open(download_file, "wb") as handle: for data in tqdm(response.iter_content()): handle.write(data) # try to uncompress file if needed file_type = filetype.guess(download_file) if file_type in ARCHIVE_TYPES: tmpdir = tempfile.mkdtemp() Archive(download_file).extractall(tmpdir) else: raise Exception("Unknown file type, could not uncompress") # check if there exists a ".agda-lib" file lib_file = glob.glob(path.join(tmpdir, "*.agda-lib")) assert len( lib_file) == 0, ".agda-lib file missing from library root" # move directory to `where` # shutil.move(target_directory) return lib_file[0]
def _add_image( client: SymphonyClient, local_file_path: str, entity_type: ImageEntity, entity_id: str, category: Optional[str] = None, ) -> None: file_type = filetype.guess(local_file_path) file_type = file_type.MIME if file_type is not None else "" img_key = client.store_file(local_file_path, file_type, False) file_size = os.path.getsize(local_file_path) AddImageMutation.execute( client, AddImageInput( entityType=entity_type, entityId=entity_id, imgKey=img_key, fileName=os.path.basename(local_file_path), fileSize=file_size, modified=datetime.utcnow(), contentType=file_type, category=category, ), )
def file_validation(board_name, refnum, upload, strip_metadata, is_reply=False): name, ext = os.path.splitext(upload.filename) image_ext = ('.png', '.jpg', '.jpeg') if ext not in image_ext and ext not in ('.mp4', '.webm', '.ogg'): return 1 save_path = "uploads/%s/%s%s" % (board_name, refnum, ext) upload.save(save_path) mime = filetype.guess(save_path) if mime.EXTENSION not in ('png', 'jpg', 'jpeg', 'mp4', 'webm', 'ogg'): os.remove(save_path) return 1 if mime.EXTENSION in map(lambda ext: ext[1:], image_ext): # remove '.' thumbnail(save_path, refnum, ext, is_reply) if ext in image_ext and strip_metadata: img = Image.open(save_path) data = list(img.getdata()) no_exif = Image.new(img.mode, img.size) no_exif.putdata(data) no_exif.save(save_path) return save_path
def run(self): # 根据操作系统路径前缀和年月日以及试卷id生成试卷电子化保存的路径 today = datetime.date.today() if platform_os_type.lower().index('windows') >= 0: store_path = windows_store_prefix + str(today.year) + os.sep + str( today.month) + os.sep + str(today.day) + os.sep elif platform_os_type.lower().index('linux') >= 0: store_path = linux_store_prefix + str(today.year) + os.sep + str( today.month) + os.sep + str(today.day) + os.sep # 判断目录是否存在, 不存在则创建. is_exists = os.path.exists(store_path) if not is_exists: os.makedirs(store_path) file_type = filetype.guess(self.sheet_file) if filetype is None: print("Cannot guess file type!") # 如果扫描文件是pdf格式的, 则需要先转换为图像. img_path = self.sheet_file if file_type.extension == 'pdf': img_path = self._convert_pdf2img(self.sheet_file, store_path) # 判断图片是否需要旋转 # 对扫描图像进行噪声处理 # 对扫描图像进行信息提取 self._extract_shape(img_path)
def run(file_path,*args, **kwargs): result = list() # extract meta data and signature data = magic.from_buffer(open(file_path, "rb").read(2048)).split(',') result =[ logger("signature",data[0]) ] if len(data) > 1: for x in data[1:]:result.append(logger("meta",x)) # get mime type mime = magic.from_file(file_path, mime=True) result.append(logger("mime-type",mime)) try: # get extension kind = filetype.guess(file_path) result.append(logger("original extension",f".{kind.extension}")) except: pass ext = file_path.split('.') if ext:result.append(logger("current extension",f".{ext[-1]}")) return result
def file_type_1(filepath): try: f = filetype.guess(filepath) ftype = f.mime.split("/")[1] return ftype except Exception as err: print(err)
def handlefile(myfile): kind = filetype.guess('app/static/upload/' + myfile.name) if kind is None: print('Cannot guess file type!') print('File extension: %s' % kind.extension) print('File MIME type: %s' % kind.mime) if (kind.extension == "pdf"): from pdfminer3.layout import LAParams, LTTextBox from pdfminer3.pdfpage import PDFPage from pdfminer3.layout import LAParams, LTTextBox from pdfminer3.pdfpage import PDFPage from pdfminer3.pdfinterp import PDFResourceManager from pdfminer3.pdfinterp import PDFPageInterpreter from pdfminer3.converter import PDFPageAggregator from pdfminer3.converter import TextConverter import io resource_manager = PDFResourceManager() fake_file_handle = io.StringIO() codec = 'utf-8' converter = TextConverter(resource_manager, fake_file_handle, codec=codec, laparams=LAParams()) page_interpreter = PDFPageInterpreter(resource_manager, converter) with open('app/static/upload/' + myfile.name, 'rb') as fh: for page in PDFPage.get_pages(fh, caching=True, check_extractable=True): page_interpreter.process_page(page) text = fake_file_handle.getvalue() converter.close() fake_file_handle.close() print(text) if (kind.extension == "png" or kind.extension == "jpg" or kind.extension == "webp"): from PIL import Image, ImageFilter, ImageChops import pytesseract from pytesseract import image_to_string import cv2 filename = 'app/static/upload/' + myfile.name imgcv = cv2.imread(filename, 0) imp = Image.open(filename) text = image_to_string(imp) #text = main_fun(imgcv,imp,kind.extension) #text=main_fun(im) print(text) dictionary = MakeForm(text) #dictionary.replace('"', "'") #print(dictionary) return dictionary
def lambda_handler(event, context): #1 - Get the bucket name bucket = event['Records'][0]['s3']['bucket']['name'] #2 - Get the file/key name key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8') copy_source_bucket_details = { 'Bucket': bucket, 'Key': key } try: #3 - Fetch the file from S3 response = s3.get_object(Bucket=bucket, Key=key) data_type = response["Body"].read(1024) kind = filetype.guess(data_type) print("Bucket Name : {}\tKey : {}".format(bucket,key)) if kind is not None and response['ContentType'] == kind.mime: print('Same ContentType found') if kind.extension in supported_formats: print('Allowed ->\tBucket Name :{}\t| Key :{}\t| ContentType :{}\t'.format(bucket,key,kind.extension)) print('Copying Files to Destination Bucket') # TODO Implement Security Check on ZIP FILES # Proceed to Copy extra_args = { 'ACL':'private' } s3.copy(copy_source_bucket_details, destination_bucket_name, key,extra_args) print('Copied Files to Destination Bucket & Deleting files from Source Bucket') print('Copied & Deleting ->\tSource_Bucket_Name :{}\t| Key :{}\t| ContentType :{}\t| Destination_Bucket_Name : {}'.format(bucket,key,kind.extension,destination_bucket_name)) # Once copied Delete the file from Source bucket Delete the File delete_function(s3,bucket,key) else: print('Denied ->\tBucket Name :{}\t| Key :{}\t| ContentType :{}\t'.format(bucket,key,kind.extension)) print('Not Supported format! Deleting the file..') # Implemented Delete the File delete_function(s3,bucket,key) else: print('Content Type Mismatch!. Deleting the file..') # Implemented Delete the file delete_function(s3,bucket,key) return 'Success!' except Exception as e: print(e) print('Some Error Occured! As precautionary deleting the file') delete_function(s3,bucket,key) raise e
def main(): kind = filetype.guess('tests/fixtures/sample.jpg') if kind is None: print('Cannot guess file type!') return print('File extension: %s' % kind.extension) print('File MIME type: %s' % kind.mime)
def check_if_icon_is_png(plugin_spec): directory = plugin_spec.directory icon_file = directory + "/" + "icon.png" kind = filetype.guess(icon_file) if kind.extension != "png": raise ValidationException( f"Included icon file ({kind.extension}) is not 'PNG'.")
def guess(self, blob): kind = filetype.guess(blob) if kind is None: print("Filetype doesn't know what this is.") # do smth useful here like OUR guessing # print(self.is_in_ascii_range(blob)) else: return kind
def main(): kind = filetype.guess('test.cvs') if kind is None: print('Cannot guess file type!') return print('File extension: %s' % kind.extension) print('File MIME type: %s' % kind.mime)
def checkFileType(path): # Check file type kind = filetype.guess(path) #print('kind: ' + str(kind.extension)) if kind is None: print('Cannot use file type') return return kind.extension
def get_all_img(self, path): file_list = os.listdir(path) for i in range(len(file_list)): name = path + '/' + file_list[i] kind = filetype.guess(name) if kind and kind.extension in IMAGE_FILE_TYPE: self.image_paths.append(name) self.image_names.append(file_list[i])
def get_document_extension(document_path): """Get document extension :param document_path: Path of the document :type document_path: str :return:: Extension (string) :rtype: str """ import filetype filetype.guess(document_path) kind = filetype.guess(document_path) if kind is None: m = re.match(r"^.*\.([^.]+)$", os.path.basename(document_path)) return m.group(1) if m else 'data' else: return kind.extension
def main(): buf = bytearray([0xFF, 0xD8, 0xFF, 0x00, 0x08]) kind = filetype.guess(buf) if kind is None: print('Cannot guess file type!') return print('File extension: %s' % kind.extension) print('File MIME type: %s' % kind.mime)
def main(): f = open('tests/fixtures/sample.jpg', 'rb') data = f.read() kind = filetype.guess(data) if kind is None: print('Cannot guess file type!') return print('File extension: %s' % kind.extension) print('File MIME type: %s' % kind.mime)
def test_guess_mov(self): kind = filetype.guess(FIXTURES + '/sample.mov') self.assertTrue(kind is not None) self.assertEqual(kind.mime, 'video/quicktime') self.assertEqual(kind.extension, 'mov')
def test_guess_tif(self): kind = filetype.guess(FIXTURES + '/sample.tif') self.assertTrue(kind is not None) self.assertEqual(kind.mime, 'image/tiff') self.assertEqual(kind.extension, 'tif')
def test_guess_png(self): kind = filetype.guess(FIXTURES + '/sample.png') self.assertTrue(kind is not None) self.assertEqual(kind.mime, 'image/png') self.assertEqual(kind.extension, 'png')
def test_guess_memoryview(self): buf = memoryview(bytearray([0xFF, 0xD8, 0xFF, 0x00, 0x08])) kind = filetype.guess(buf) self.assertTrue(kind is not None) self.assertEqual(kind.mime, 'image/jpeg') self.assertEqual(kind.extension, 'jpg')
def test_guess_buffer_invalid(self): buf = bytearray([0xFF, 0x00, 0x00, 0x00, 0x00]) kind = filetype.guess(buf) self.assertTrue(kind is None)
def test_guess_file_path(self): kind = filetype.guess(FIXTURES + '/sample.jpg') self.assertTrue(kind is not None) self.assertEqual(kind.mime, 'image/jpeg') self.assertEqual(kind.extension, 'jpg')