Exemple #1
0
 def get(self, request, mid=None):
     media = Medium.objects.get(id=mid)
     full_path = media.media.path
     #assert magic_file(avatar.media.path)=='image/jpeg', 'Error'
     #PDF, textPython, textPlain, textHtml, videoMP4='application/pdf', 'text/x-python', 'text/plain', 'text/html', 'video/mp4'
     #textPlain, textPython, textHtml, PDF,
     mime_type = magic_file(full_path, mime=True)
     #print(mime_type)
     if mime_type in ['image/jpeg', 'image/png', 'image/gif']:
         response = HttpResponse(content_type=mime_type)  #'image/png')
         im = img_open(full_path)
         im.save(response, 'PNG')
         return response
     elif mime_type in [
             'application/pdf', 'text/x-python', 'text/plain', 'text/html',
             'video/mpeg', 'video/mp4', 'audio/mpeg'
     ]:
         return stream(full_path, contentType=mime_type)
     #elif mime_type in []:
     #	return stream(full_path, contentType=mime_type)
     #	return HttpResponse(content, content_type='text/plain; charset=utf-8')
     #elif mime_type in [PDF]:
     #	return stream(full_path, contentType=mime_type)
     #	return pdfResponse(media)
     else:
         return stream(full_path,
                       mode='attachment',
                       contentType='application/octet-stream')
Exemple #2
0
def calc_magic(stream):
    # Missing python-magic features:
    # - magic_descriptor (https://github.com/ahupp/python-magic/pull/227)
    # - direct support for symlink flag
    magic_cookie = magic.magic_open(magic.MAGIC_SYMLINK)
    magic.magic_load(magic_cookie, None)
    try:
        fd_path = get_fd_path(stream)
        if fd_path:
            return magic.maybe_decode(magic.magic_file(magic_cookie, fd_path))
        else:
            # Handle BytesIO in-memory streams
            stream.seek(0, os.SEEK_SET)
            return magic.maybe_decode(magic.magic_buffer(magic_cookie, stream.read()))
    finally:
        magic.magic_close(magic_cookie)
    return None
 def from_file(self, path):
     return magic.magic_file(self.cookie,path.encode('utf-8'))
def ident(buf, length: int, path) -> Dict:
    data = {
        'ascii': None,
        'hex': None,
        'magic': None,
        'mime': None,
        'type': 'unknown'
    }

    if length <= 0:
        return data

    header = buf[:min(64, length)]
    data['ascii'] = dotdump(header)
    data['hex'] = safe_str(hexlify(header))

    # noinspection PyBroadException
    try:
        # Loop over the labels returned by libmagic, ...
        labels = []
        if file_type:
            with magic_lock:
                labels = magic.magic_file(file_type, path).split(b'\n')
                labels = [
                    label[2:] if label.startswith(b'- ') else label
                    for label in labels
                ]

        mimes = []
        if mime_type:
            with magic_lock:
                mimes = magic.magic_file(mime_type, path).split(b'\n')
                mimes = [
                    mime[2:] if mime.startswith(b'- ') else mime
                    for mime in mimes
                ]

        # For user feedback set the mime and magic meta data to always be the primary
        # libmagic responses
        if len(labels) > 0:
            data['magic'] = safe_str(labels[0])

        if len(mimes) > 0 and mimes[0] != b'':
            data['mime'] = safe_str(mimes[0])

        # Highest priority is given to mime type matching something
        tagged = False

        for label in labels:
            label = dotdump(label)

            if custom.match(label):
                data['type'] = label.split('custom: ')[1].strip()
                tagged = True
                break

        # Second priority is mime times marked as trusted
        if not tagged:
            for mime in mimes:
                mime = dotdump(mime)

                if mime in trusted_mimes:
                    data['type'] = trusted_mimes[mime]
                    tagged = True
                    break

        # As a third priority try matching the tl_patterns
        if not tagged:
            minimum = len(tl_patterns)
            sl_tag = None

            # Try each label and see how far down the tl_patterns list we go
            # before we hit a match, the closer to the beginning of the list we are the better
            # the tag match is. The final line of tl_patterns matches anything and sets
            # tag to 'unknown', so this loop should never finish with sl_tag as None
            # Unless the tl_patters table has been changed inappropriately
            for label in labels:
                label = dotdump(label)

                # ... match against our patterns and, ...
                index = 0
                for entry in tl_patterns:
                    if index >= minimum:
                        break

                    if entry[1].search(label):  # pylint:disable=E1101
                        break

                    index += 1

                # ... keep highest precedence (lowest index) match.
                if index < minimum:
                    minimum = index
                    sl_tag = subtype(label)

                    # If a label does match, take the best from that label
                    # Further labels from magic are probably terrible
                    break

            assert sl_tag is not None, "tl_patterns seems to be missing a match all => unknown rule at the end"

            # Based on the sub tag we found, figure out the top level tag to use
            tl_tag = sl_to_tl.get(sl_tag, tl_patterns[minimum][0])
            data['type'] = '/'.join((tl_tag, sl_tag))

    except Exception as e:
        print(str(e))
        pass

    if not recognized.get(data['type'], False):
        data['type'] = 'unknown'

    if data['type'] == 'document/office/unknown':
        # noinspection PyBroadException
        try:
            root_entry_property_offset = buf.find(
                u"Root Entry".encode("utf-16-le"))
            if -1 != root_entry_property_offset:
                # Get root entry's GUID and try to guess document type
                clsid_offset = root_entry_property_offset + 0x50
                if len(buf) >= clsid_offset + 16:
                    clsid = buf[clsid_offset:clsid_offset + 16]
                    if len(clsid) == 16 and clsid != "\0" * len(clsid):
                        clsid_str = uuid.UUID(bytes_le=clsid)
                        clsid_str = clsid_str.urn.rsplit(':', 1)[-1].upper()
                        if clsid_str in OLE_CLSID_GUIDs:
                            data['type'] = OLE_CLSID_GUIDs[clsid_str]
        except Exception:
            pass

    return data
Exemple #5
0
    def ident(self, buf, length: int, path) -> Dict:
        data = {
            "ascii": None,
            "hex": None,
            "magic": None,
            "mime": None,
            "type": "unknown"
        }

        if length <= 0:
            return data

        header = buf[:min(64, length)]
        data["ascii"] = dotdump(header)
        data["hex"] = safe_str(hexlify(header))

        # noinspection PyBroadException
        try:
            # Loop over the labels returned by libmagic, ...
            labels = []
            mimes = []

            with self.lock:
                try:
                    labels = magic.magic_file(self.file_type,
                                              path).split(b"\n")
                except magic.MagicException as me:
                    labels = me.message.split(b"\n")

                try:
                    mimes = magic.magic_file(self.mime_type, path).split(b"\n")
                except magic.MagicException as me:
                    mimes = me.message.split(b"\n")

            mimes = [
                mime[2:].strip() if mime.startswith(b"- ") else mime.strip()
                for mime in mimes
            ]
            labels = [
                label[2:].strip()
                if label.startswith(b"- ") else label.strip()
                for label in labels
            ]

            # For user feedback set the mime and magic meta data to always be the primary
            # libmagic responses
            if len(labels) > 0:

                def find_special_words(word, labels):
                    for index, label in enumerate(labels):
                        if word in label:
                            return index
                    return -1

                # If an expected label is not the first label returned by Magic, then make it so
                # Manipulating the mime accordingly varies between special word cases
                special_word_cases = [
                    (b"OLE 2 Compound Document : Microsoft Word Document",
                     False),
                    (b"Lotus 1-2-3 WorKsheet", True),
                ]
                for word, alter_mime in special_word_cases:
                    index = find_special_words(word, labels)
                    if index >= 0:
                        labels.insert(0, labels.pop(index))
                        if len(labels) == len(mimes) and alter_mime:
                            mimes.insert(0, mimes.pop(index))
                data["magic"] = safe_str(labels[0])

            for mime in mimes:
                if mime != b"":
                    data["mime"] = safe_str(mime)
                    break

            # First lets try to find any custom types
            for label in labels:
                label = dotdump(label)

                if self.custom.match(label):
                    data["type"] = label.split("custom: ")[1].strip()
                    break

            # Second priority is mime times marked as trusted
            if data["type"] == "unknown":
                with self.lock:
                    trusted_mimes = self.trusted_mimes

                for mime in mimes:
                    mime = dotdump(mime)

                    if mime in trusted_mimes:
                        data["type"] = trusted_mimes[mime]
                        break

            # As a third priority try matching the magic_patterns
            if data["type"] == "unknown":
                found = False
                with self.lock:
                    compiled_magic_patterns = self.compiled_magic_patterns

                for label in labels:
                    for entry in compiled_magic_patterns:
                        if entry[1].search(dotdump(label)):  # pylint: disable=E1101
                            data['type'] = entry[0]
                            found = True
                            break
                    if found:
                        break

        except Exception as e:
            self.log.error(
                f"An error occured during file identification: {e.__class__.__name__}({str(e)})"
            )
            pass

        # If mime is text/* and type is unknown, set text/plain to trigger
        # language detection later.
        if data["type"] == "unknown" and data['mime'] is not None and data[
                'mime'].startswith("text/"):
            data["type"] = "text/plain"

        # Lookup office documents by GUID if we're still not sure what they are
        if data["type"] == "document/office/unknown":
            # noinspection PyBroadException
            try:
                root_entry_property_offset = buf.find(
                    u"Root Entry".encode("utf-16-le"))
                if -1 != root_entry_property_offset:
                    # Get root entry's GUID and try to guess document type
                    clsid_offset = root_entry_property_offset + 0x50
                    if len(buf) >= clsid_offset + 16:
                        clsid = buf[clsid_offset:clsid_offset + 16]
                        if len(clsid) == 16 and clsid != b"\0" * len(clsid):
                            clsid_str = uuid.UUID(bytes_le=clsid)
                            clsid_str = clsid_str.urn.rsplit(":",
                                                             1)[-1].upper()
                            if clsid_str in OLE_CLSID_GUIDs:
                                data["type"] = OLE_CLSID_GUIDs[clsid_str]
                        else:
                            bup_details_offset = buf[:
                                                     root_entry_property_offset
                                                     + 0x100].find(
                                                         u"Details".encode(
                                                             "utf-16-le"))
                            if -1 != bup_details_offset:
                                data["type"] = "quarantine/mcafee"
            except Exception:
                pass

        return data
Exemple #6
0
	def contentType(self):
		return magic_file(self.media.path, mime=True)
		return True if mime_type=='text/plain' else False