Example #1
0
    def allows(self, file_storage, path):
        matched = filetype.image_match(file_storage.read())
        # Set the file object's position to the beginning if you read it
        file_storage.seek(0)

        if matched is None:
            # return False will raise with default error message
            raise NotAllowedUploadError("Custom validation error message")

        return True
Example #2
0
 def remove_empty(self):
     for dirName, subdirList, filelist in os.walk(
             self.eventConfig.getRootDir(), topdown=False):
         for fname in filelist:
             if filetype.image_match(dirName + '/' + fname):
                 continue
             if (fname != '.DS_Store'):
                 with open(dirName + "/" + fname) as in_file, open(
                     (dirName + "/" + fname), 'r+') as out_file:
                     out_file.writelines(line for line in in_file
                                         if line.strip())
                     out_file.truncate()
Example #3
0
 def processFiles(self):
     for dirName, subdirList, filelist in os.walk(
             self.eventConfig.getRootDir(), topdown=False):
         for fname in filelist:
             f = fname
             if filetype.image_match(dirName + '/' + fname):
                 output = pytesseract.image_to_string(
                     Image.open(dirName + '/' + fname))
                 with open(dirName + '/' + fname + '.txt', 'w') as newF:
                     newF.write(output)
                 f = fname + '.txt'
             #TODO handle audio files
             self.createLogFile(dirName, f)
Example #4
0
    def extract(self, directory):
        archive_type = filetype.archive_match(self.path)
        if archive_type is not None:
            if archive_type.extension == "zip":
                archive = zipfile.ZipFile(self.path)
            elif archive_type.extension == "rar":
                archive = rarfile.RarFile(self.path)
                archive.close = lambda: None
            else:
                return

            if archive is not None:
                self.extractedPath = directory
                for fileinfo in archive.infolist():
                    if not isinstance(fileinfo,
                                      rarfile.RarInfo) and fileinfo.is_dir():
                        continue
                    filename = os.path.basename(fileinfo.filename)
                    archived_file = archive.open(fileinfo)
                    if filename.lower() in ["comicinfo.xml"]:
                        continue
                    self.fileCount += 1
                    file_bytes = archive.read(fileinfo)

                    image_type = filetype.image_match(archived_file)
                    if image_type is not None:
                        self.imageCount += 1
                        file_hash = hashlib.blake2b(
                            file_bytes, digest_size=16).hexdigest().upper()
                        if file_hash in self.imageHashes.keys():
                            self.duplicateImages.add(filename)
                        else:
                            image_hash = ImageHasher(data=file_bytes,
                                                     width=12,
                                                     height=12).average_hash()
                            self.imageHashes[file_hash] = ImageMeta(
                                os.path.join(self.extractedPath, filename),
                                file_hash, image_hash, image_type.extension)
                    else:
                        self.extras.add(filename)

                    os.makedirs(self.extractedPath, 0o777, True)
                    unarchived_file = open(os.path.join(
                        self.extractedPath, filename),
                                           mode="wb")
                    archived_file.seek(0, io.SEEK_SET)
                    shutil.copyfileobj(archived_file, unarchived_file)
                    archived_file.close()
                    unarchived_file.close()
                archive.close()
Example #5
0
    def try_as_file_output(self, output: str):
        """

        """
        if not output.startswith("\"/data/"):
            return None

        output = output.strip('"').replace("/data", BRANE_DATA_DIR)
        if not path.isfile(output):
            return None

        extension = path.splitext(output)[1]

        # Render as JSON, if file extension is .json
        if extension == '.json':
            try:
                with open(output, 'rb') as f:
                    json_data = loads(f.read())
            except:
                json_data = {
                    'message':
                    'Please check the file, it doesn\'t seems to be valid JSON.'
                }

            return {'data': {'application/json': json_data}, 'metadata': {}}

        # Render as HTML, if file extension is .html
        extension = path.splitext(output)[1]
        if extension == '.html':
            with open(output, 'r') as f:
                html_data = f.read()

            return {'data': {'text/html': html_data}, 'metadata': {}}

        kind = image_match(output)
        if kind is not None:
            with open(output, 'rb') as f:
                image_data = b64encode(f.read()).decode('ascii')

            return {'data': {kind.mime: image_data}, 'metadata': {}}
Example #6
0
def get_image_info(fp):
    """Reads some image info from a file descriptor."""
    head = fp.read(32)
    fp.seek(0)
    if len(head) < 24:
        return "unknown", None, None

    magic_bytes = b"<?xml", b"<svg"
    if any(map(head.strip().startswith, magic_bytes)):
        return get_svg_info(fp)

    _type = filetype.image_match(bytearray(head))
    fmt = _type.mime.split("/")[1] if _type else None

    width = None
    height = None
    if fmt == "png":
        check = struct.unpack(">i", head[4:8])[0]
        if check == 0x0D0A1A0A:
            width, height = struct.unpack(">ii", head[16:24])
    elif fmt == "gif":
        width, height = struct.unpack("<HH", head[6:10])
    elif fmt == "jpeg":
        # specification available under
        # http://www.w3.org/Graphics/JPEG/itu-t81.pdf
        # Annex B (page 31/35)

        # we are looking for a SOF marker ("start of frame").
        # skip over the "start of image" marker
        # (filetype detection took care of that).
        fp.seek(2)

        while True:
            byte = fp.read(1)

            # "All markers are assigned two-byte codes: an X’FF’ byte
            # followed by a byte which is not equal to 0 or X’FF’."
            if not byte or ord(byte) != 0xFF:
                raise Exception("Malformed JPEG image.")

            # "Any marker may optionally be preceded by any number
            # of fill bytes, which are bytes assigned code X’FF’."
            while ord(byte) == 0xFF:
                byte = fp.read(1)

            if ord(byte) not in _JPEG_SOF_MARKERS:
                # header length parameter takes 2 bytes for all markers
                length = struct.unpack(">H", fp.read(2))[0]
                fp.seek(length - 2, 1)
                continue

            # else...
            # see Figure B.3 – Frame header syntax (page 35/39) and
            # Table B.2 – Frame header parameter sizes and values
            # (page 36/40)
            fp.seek(3, 1)  # skip header length and precision parameters
            height, width = struct.unpack(">HH", fp.read(4))

            if height == 0:
                # "Value 0 indicates that the number of lines shall be
                # defined by the DNL marker [...]"
                #
                # DNL is not supported by most applications,
                # so we won't support it either.
                raise Exception("JPEG with DNL not supported.")

            break

        # if the file is rotated, we want, for all intents and purposes,
        # to return the dimensions swapped. (all client apps will display
        # the image rotated, and any template computations are likely to want
        # to make decisions based on the "visual", not the "real" dimensions.
        # thumbnail code also depends on this behaviour.)
        fp.seek(0)
        if is_rotated(fp):
            width, height = height, width
    else:
        fmt = None

    return fmt, width, height