def allows(self, file_storage, path): matched = filetype.image_match(file_storage.read()) # Set the file object's position to the beginning if you read it file_storage.seek(0) if matched is None: # return False will raise with default error message raise NotAllowedUploadError("Custom validation error message") return True
def remove_empty(self): for dirName, subdirList, filelist in os.walk( self.eventConfig.getRootDir(), topdown=False): for fname in filelist: if filetype.image_match(dirName + '/' + fname): continue if (fname != '.DS_Store'): with open(dirName + "/" + fname) as in_file, open( (dirName + "/" + fname), 'r+') as out_file: out_file.writelines(line for line in in_file if line.strip()) out_file.truncate()
def processFiles(self): for dirName, subdirList, filelist in os.walk( self.eventConfig.getRootDir(), topdown=False): for fname in filelist: f = fname if filetype.image_match(dirName + '/' + fname): output = pytesseract.image_to_string( Image.open(dirName + '/' + fname)) with open(dirName + '/' + fname + '.txt', 'w') as newF: newF.write(output) f = fname + '.txt' #TODO handle audio files self.createLogFile(dirName, f)
def extract(self, directory): archive_type = filetype.archive_match(self.path) if archive_type is not None: if archive_type.extension == "zip": archive = zipfile.ZipFile(self.path) elif archive_type.extension == "rar": archive = rarfile.RarFile(self.path) archive.close = lambda: None else: return if archive is not None: self.extractedPath = directory for fileinfo in archive.infolist(): if not isinstance(fileinfo, rarfile.RarInfo) and fileinfo.is_dir(): continue filename = os.path.basename(fileinfo.filename) archived_file = archive.open(fileinfo) if filename.lower() in ["comicinfo.xml"]: continue self.fileCount += 1 file_bytes = archive.read(fileinfo) image_type = filetype.image_match(archived_file) if image_type is not None: self.imageCount += 1 file_hash = hashlib.blake2b( file_bytes, digest_size=16).hexdigest().upper() if file_hash in self.imageHashes.keys(): self.duplicateImages.add(filename) else: image_hash = ImageHasher(data=file_bytes, width=12, height=12).average_hash() self.imageHashes[file_hash] = ImageMeta( os.path.join(self.extractedPath, filename), file_hash, image_hash, image_type.extension) else: self.extras.add(filename) os.makedirs(self.extractedPath, 0o777, True) unarchived_file = open(os.path.join( self.extractedPath, filename), mode="wb") archived_file.seek(0, io.SEEK_SET) shutil.copyfileobj(archived_file, unarchived_file) archived_file.close() unarchived_file.close() archive.close()
def try_as_file_output(self, output: str): """ """ if not output.startswith("\"/data/"): return None output = output.strip('"').replace("/data", BRANE_DATA_DIR) if not path.isfile(output): return None extension = path.splitext(output)[1] # Render as JSON, if file extension is .json if extension == '.json': try: with open(output, 'rb') as f: json_data = loads(f.read()) except: json_data = { 'message': 'Please check the file, it doesn\'t seems to be valid JSON.' } return {'data': {'application/json': json_data}, 'metadata': {}} # Render as HTML, if file extension is .html extension = path.splitext(output)[1] if extension == '.html': with open(output, 'r') as f: html_data = f.read() return {'data': {'text/html': html_data}, 'metadata': {}} kind = image_match(output) if kind is not None: with open(output, 'rb') as f: image_data = b64encode(f.read()).decode('ascii') return {'data': {kind.mime: image_data}, 'metadata': {}}
def get_image_info(fp): """Reads some image info from a file descriptor.""" head = fp.read(32) fp.seek(0) if len(head) < 24: return "unknown", None, None magic_bytes = b"<?xml", b"<svg" if any(map(head.strip().startswith, magic_bytes)): return get_svg_info(fp) _type = filetype.image_match(bytearray(head)) fmt = _type.mime.split("/")[1] if _type else None width = None height = None if fmt == "png": check = struct.unpack(">i", head[4:8])[0] if check == 0x0D0A1A0A: width, height = struct.unpack(">ii", head[16:24]) elif fmt == "gif": width, height = struct.unpack("<HH", head[6:10]) elif fmt == "jpeg": # specification available under # http://www.w3.org/Graphics/JPEG/itu-t81.pdf # Annex B (page 31/35) # we are looking for a SOF marker ("start of frame"). # skip over the "start of image" marker # (filetype detection took care of that). fp.seek(2) while True: byte = fp.read(1) # "All markers are assigned two-byte codes: an X’FF’ byte # followed by a byte which is not equal to 0 or X’FF’." if not byte or ord(byte) != 0xFF: raise Exception("Malformed JPEG image.") # "Any marker may optionally be preceded by any number # of fill bytes, which are bytes assigned code X’FF’." while ord(byte) == 0xFF: byte = fp.read(1) if ord(byte) not in _JPEG_SOF_MARKERS: # header length parameter takes 2 bytes for all markers length = struct.unpack(">H", fp.read(2))[0] fp.seek(length - 2, 1) continue # else... # see Figure B.3 – Frame header syntax (page 35/39) and # Table B.2 – Frame header parameter sizes and values # (page 36/40) fp.seek(3, 1) # skip header length and precision parameters height, width = struct.unpack(">HH", fp.read(4)) if height == 0: # "Value 0 indicates that the number of lines shall be # defined by the DNL marker [...]" # # DNL is not supported by most applications, # so we won't support it either. raise Exception("JPEG with DNL not supported.") break # if the file is rotated, we want, for all intents and purposes, # to return the dimensions swapped. (all client apps will display # the image rotated, and any template computations are likely to want # to make decisions based on the "visual", not the "real" dimensions. # thumbnail code also depends on this behaviour.) fp.seek(0) if is_rotated(fp): width, height = height, width else: fmt = None return fmt, width, height