예제 #1
0
    def _load_magic_file(self):
        self.magic_file = ':'.join(
            (constants.MAGIC_RULE_PATH, '/usr/share/file/magic.mgc'))

        if self.use_cache:
            self.log.info("Checking for custom magic file...")
            with get_cachestore('system',
                                config=self.config,
                                datastore=self.datastore) as cache:
                try:
                    custom_magic = "/tmp/custom.magic"
                    cache.download('custom_magic', custom_magic)
                    self.magic_file = ':'.join(
                        (custom_magic, '/usr/share/file/magic.mgc'))
                    self.log.info("Custom magic file loaded!")
                except FileStoreException:
                    self.log.info("No custom magic file found.")

        with self.lock:
            self.file_type = magic.magic_open(magic.MAGIC_CONTINUE +
                                              magic.MAGIC_RAW)
            magic.magic_load(self.file_type, self.magic_file)

            self.mime_type = magic.magic_open(magic.MAGIC_CONTINUE +
                                              magic.MAGIC_RAW +
                                              magic.MAGIC_MIME)
            magic.magic_load(self.mime_type, self.magic_file)
예제 #2
0
파일: mime.py 프로젝트: larseggert/xml2rfc
def get_file_mime_type(name):
    m = magic.Magic()
    m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME
                                | magic.MAGIC_MIME_ENCODING)
    magic.magic_load(m.cookie, None)
    filetype = m.from_file(name)
    return filetype
예제 #3
0
def create_dataset_series(path: str, preprocess: Callable[[str],
                                                          str]) -> Iterable:
    """Create dataset series.

    Arguments:
        path: The path of the file with the data
        preprocess: Preprocessor function

    Returns:
        The dataset series.
    """
    filetyper = magic.Magic(mime=True)
    filetyper.flags |= magic.MAGIC_SYMLINK
    filetyper.cookie = magic.magic_open(filetyper.flags)
    magic.magic_load(filetyper.cookie, None)

    log("Loading {}".format(path))
    file_type = filetyper.from_file(path)

    if file_type.startswith('text/'):
        reader = PlainTextFileReader(path)
        for line in reader.read():
            yield preprocess(line)
    elif file_type == 'application/gzip' or file_type == 'application/x-gzip':
        gzreader = GZipReader(path)
        for line in gzreader.read():
            yield preprocess(line)
    elif file_type == 'application/octet-stream':
        return np.load(path)
    else:
        raise Exception("Unsupported data type: {}, file {}".format(
            file_type, path))
예제 #4
0
파일: validators.py 프로젝트: ekr/ietfdb
def get_mime_type(content):
    # try to fixup encoding
    if hasattr(magic, "open"):
        m = magic.open(magic.MAGIC_MIME)
        m.load()
        filetype = m.buffer(content)
    else:
        m = magic.Magic()
        m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME
                                    | magic.MAGIC_MIME_ENCODING)
        magic.magic_load(m.cookie, None)
        filetype = m.from_buffer(content)

    return filetype.split('; ', 1)
예제 #5
0
def get_cleaned_text_file_content(uploaded_file):
    """Read uploaded file, try to fix up encoding to UTF-8 and
    transform line endings into Unix style, then return the content as
    a UTF-8 string. Errors are reported as
    django.core.exceptions.ValidationError exceptions."""

    if not uploaded_file:
        return u""

    if uploaded_file.size and uploaded_file.size > 10 * 1000 * 1000:
        raise ValidationError("Text file too large (size %s)." %
                              uploaded_file.size)

    content = "".join(uploaded_file.chunks())

    # try to fixup encoding
    import magic
    if hasattr(magic, "open"):
        m = magic.open(magic.MAGIC_MIME)
        m.load()
        filetype = m.buffer(content)
    else:
        m = magic.Magic()
        m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME
                                    | magic.MAGIC_MIME_ENCODING)
        magic.magic_load(m.cookie, None)
        filetype = m.from_buffer(content)

    if not filetype.startswith("text"):
        raise ValidationError(
            "Uploaded file does not appear to be a text file.")

    match = re.search("charset=([\w-]+)", filetype)
    if not match:
        raise ValidationError("File has unknown encoding.")

    encoding = match.group(1)
    if "ascii" not in encoding:
        try:
            content = content.decode(encoding)
        except Exception as e:
            raise ValidationError(
                "Error decoding file (%s). Try submitting with UTF-8 encoding or remove non-ASCII characters."
                % str(e))

    # turn line-endings into Unix style
    content = content.replace("\r\n", "\n").replace("\r", "\n")

    return content.encode("utf-8")
예제 #6
0
 def parse_file_charset(self):
     import magic
     self.fd.file.seek(0)
     content = self.fd.file.read(4096)
     if hasattr(magic, "open"):
         m = magic.open(magic.MAGIC_MIME)
         m.load()
         filetype = m.buffer(content)
     else:
         m = magic.Magic()
         m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
         magic.magic_load(m.cookie, None)
         filetype = m.from_buffer(content)
     if not 'ascii' in filetype:
         self.parsed_info.add_error('A plain text ASCII document must be submitted.')
예제 #7
0
def put_identify_custom_magic_file(**_):
    """
    Save a new version of identify's custom LibMagic file

    Variables:
    None

    Arguments:
    None

    Data Block:
    <current custom.magic file>

    Result example:
    {"success": True}
    """
    data = request.json.encode('utf-8')

    magic_file = None
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp:
            magic_file = tmp.name
            tmp.write(data)

        try:
            test = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_RAW)
            magic.magic_load(test, magic_file)
        except magic.MagicException:
            return make_api_response(
                {'success': False},
                "The magic file you have submitted is invalid.", 400)
    finally:
        if magic_file and os.path.exists(magic_file):
            os.unlink(magic_file)

    with forge.get_cachestore('system', config=config,
                              datastore=STORAGE) as cache:
        if hashlib.sha256(data).hexdigest() == get_sha256_for_file(
                constants.MAGIC_RULE_PATH):
            cache.delete('custom_magic')
        else:
            cache.save('custom_magic', data, ttl=ADMIN_FILE_TTL, force=True)

    # Notify components watching to reload magic file
    event_sender.send('identify', 'magic')

    return make_api_response({'success': True})
예제 #8
0
파일: util.py 프로젝트: KWMORALE/mwdb-core
def calc_magic(stream):
    # Missing python-magic features:
    # - magic_descriptor (https://github.com/ahupp/python-magic/pull/227)
    # - direct support for symlink flag
    magic_cookie = magic.magic_open(magic.MAGIC_SYMLINK)
    magic.magic_load(magic_cookie, None)
    try:
        fd_path = get_fd_path(stream)
        if fd_path:
            return magic.maybe_decode(magic.magic_file(magic_cookie, fd_path))
        else:
            # Handle BytesIO in-memory streams
            stream.seek(0, os.SEEK_SET)
            return magic.maybe_decode(magic.magic_buffer(magic_cookie, stream.read()))
    finally:
        magic.magic_close(magic_cookie)
    return None
예제 #9
0
 def parse_file_charset(self):
     import magic
     self.fd.file.seek(0)
     content = self.fd.file.read(4096)
     if hasattr(magic, "open"):
         m = magic.open(magic.MAGIC_MIME)
         m.load()
         filetype = m.buffer(content)
     else:
         m = magic.Magic()
         m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME
                                     | magic.MAGIC_MIME_ENCODING)
         magic.magic_load(m.cookie, None)
         filetype = m.from_buffer(content)
     if not 'ascii' in filetype:
         self.parsed_info.add_error(
             'A plain text document must be submitted.')
예제 #10
0
파일: plain_parser.py 프로젝트: ekr/ietfdb
 def parse_file_charset(self):
     import magic
     self.fd.file.seek(0)
     content = self.fd.file.read()
     if hasattr(magic, "open"):
         m = magic.open(magic.MAGIC_MIME)
         m.load()
         filetype = m.buffer(content)
     else:
         m = magic.Magic()
         m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
         magic.magic_load(m.cookie, None)
         filetype = m.from_buffer(content)
     if not 'ascii' in filetype and not 'utf-8' in filetype:
         self.parsed_info.add_error('A plain text ASCII document is required.  '
             'Found an unexpected encoding: "%s".  '
             'You probably have one or more non-ascii characters in your file.'  % filetype
         )
예제 #11
0
파일: textupload.py 프로젝트: algby/ietfdb
def get_cleaned_text_file_content(uploaded_file):
    """Read uploaded file, try to fix up encoding to UTF-8 and
    transform line endings into Unix style, then return the content as
    a UTF-8 string. Errors are reported as
    django.forms.ValidationError exceptions."""

    if not uploaded_file:
        return u""

    if uploaded_file.size and uploaded_file.size > 10 * 1000 * 1000:
        raise django.forms.ValidationError("Text file too large (size %s)." % uploaded_file.size)

    content = "".join(uploaded_file.chunks())

    # try to fixup encoding
    import magic
    if hasattr(magic, "open"):
        m = magic.open(magic.MAGIC_MIME)
        m.load()
        filetype = m.buffer(content)
    else:
        m = magic.Magic()
        m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
        magic.magic_load(m.cookie, None)
        filetype = m.from_buffer(content)

    if not filetype.startswith("text"):
        raise django.forms.ValidationError("Uploaded file does not appear to be a text file.")

    match = re.search("charset=([\w-]+)", filetype)
    if not match:
        raise django.forms.ValidationError("File has unknown encoding.")

    encoding = match.group(1)
    if "ascii" not in encoding:
        try:
            content = content.decode(encoding)
        except Exception as e:
            raise django.forms.ValidationError("Error decoding file (%s). Try submitting with UTF-8 encoding or remove non-ASCII characters." % str(e))

    # turn line-endings into Unix style
    content = content.replace("\r\n", "\n").replace("\r", "\n")

    return content.encode("utf-8")
예제 #12
0
from typing import List, Any, Callable
import magic

# tests: lint, mypy

# pylint: disable=invalid-name
Reader = Callable[[List[str]], Any]

FILETYPER = magic.Magic(mime=True)
FILETYPER.flags |= magic.MAGIC_SYMLINK
FILETYPER.cookie = magic.magic_open(FILETYPER.flags)
magic.magic_load(FILETYPER.cookie, None)
예제 #13
0
    x[1] = re.compile(x[1], re.IGNORECASE)

custom = re.compile(r'^custom: ', re.IGNORECASE)

ssdeep_from_file = None

magic_lock = None
file_type = None
mime_type = None

if platform.system() != 'Windows':
    import magic

    magic_lock = threading.Lock()

    file_type = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_RAW)
    magic.magic_load(file_type, constants.RULE_PATH)

    mime_type = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_MIME)
    magic.magic_load(mime_type, constants.RULE_PATH)

    try:
        # noinspection PyUnresolvedReferences
        import ssdeep  # ssdeep requires apt-get cython and pip install ssdeep
        ssdeep_from_file = ssdeep.hash_from_file
    except ImportError:
        pass  # ssdeep_from_file will be None if we fail to import ssdeep.


# Translate the match object into a sub-type label.
def subtype(label):
예제 #14
0
'''Check the type of given file.
   You need install python-magic and libmagic,
   you'd better run this program on linux platform.
   '''
import magic
ms = magic.magic_open(magic.MAGIC_NONE)
ms.load()
fileName = r'c:\windows\notepad.exe'
data = open(fileName,'rb').read()
print ms.buffer(data)
예제 #15
0

search_in_files is a text search python module.

The function search_in_text_files search a regexp in files, and return
the list files that's complain the regexp.
It's a folder resursive search by default.

"""

import os
import re
from magic import open as magic_open
from magic import MAGIC_MIME

MagicMime = magic_open(MAGIC_MIME)
MagicMime.load()


def recursion(expression, folder, files):
    results = []
    list_nodes = os.listdir(folder)
    for node_name in list_nodes:
        if node_name not in ['..', '.', '.git'] and \
                node_name[-1] != '~':
            if re.search(files, node_name):
                node_path = os.path.join(folder, node_name)
                if os.path.isdir(node_path):
                    results.extend(recursion(expression, node_path, files))
                else:
                    ctype = MagicMime.file(node_path)
예제 #16
0
 def __init__(self,magic_file):
     self.cookie = magic.magic_open(magic.MAGIC_COMPRESS|magic.MAGIC_MIME|magic.MAGIC_CONTINUE|magic.MAGIC_PRESERVE_ATIME|magic.MAGIC_ERROR|magic.MAGIC_MIME_ENCODING)
     # lolololo
     magic.magic_load(self.cookie,magic_file.encode('utf-8'))
     self.thread = threading.currentThread()
예제 #17
0

search_in_files is a text search python module.

The function search_in_text_files search a regexp in files, and return
the list files that's complain the regexp.
It's a folder resursive search by default.

"""

import os
import re
from magic import open as magic_open
from magic import MAGIC_MIME

MagicMime = magic_open(MAGIC_MIME)
MagicMime.load()


def recursion(expression, folder, files):
    results = []
    list_nodes = os.listdir(folder)
    for node_name in list_nodes:
        if node_name not in ['..', '.', '.git'] and \
                node_name[-1] != '~':
            if re.search(files, node_name):
                node_path = os.path.join(folder, node_name)
                if os.path.isdir(node_path):
                    results.extend(recursion(expression, node_path, files))
                else:
                    ctype = MagicMime.file(node_path)
예제 #18
0
class FileUtil(object):
    MAGIC_LOAD = magic.magic_open(magic.MAGIC_MIME)
    TEXT_PLAIN = magic.from_buffer(b'MQ==\n', mime=True)
    AUTO_NAME = "autonamed_buffer_data-%s.bin"

    @classmethod
    def auto_name(cls):
        rn = ''.join(
            random.choices(string.ascii_uppercase + string.digits, k=6))
        return cls.AUTO_NAME % rn

    @classmethod
    def multipart_file_tuple(cls,
                             filename,
                             buffer=None,
                             content_type=None,
                             custom_header={}):

        if content_type is None and buffer is None:
            content_type = cls.file_mime_type(filename)
        elif content_type is None:
            content_type = cls.buffer_mime_type(buffer)

        fileobj = None
        if buffer is None:
            fileobj = open(filename, 'rb')
        else:
            fileobj = io.BytesIO(buffer)

        fname = os.path.basename(filename)
        return (fname, fileobj, content_type, custom_header)

    @classmethod
    def buffer_mime_type(cls, buffer):
        return magic.from_buffer(buffer, mime=True)

    @classmethod
    def file_mime_type(cls, filename):
        return magic.from_file(filename, mime=True)

    @classmethod
    def buffer_base64_string(cls, buffer, strip_new_lines=True):
        r = base64.encodebytes(buffer).decode('utf8')
        if strip_new_lines:
            r = r.replace('\n', '')
        return r

    @classmethod
    def file_base64_string(cls, filename, strip_new_lines=True):
        if not os.path.exists(filename):
            return None
        content = open(filename, 'rb').read()
        return cls.buffer_base64_string(content, strip_new_lines)

    @classmethod
    def load_file(cls, filename):
        if not os.path.exists(filename):
            return None, None
        content = open(filename, 'rb').read()
        return cls.buffer_mime_type(content), content

    @classmethod
    def is_base64_encoded(cls, buffer):
        mt = cls.buffer_mime_type(buffer)
        if mt != cls.TEXT_PLAIN:
            return False
        try:
            _ = base64.decodebytes(buffer.encode('utf8'))
        except:
            return False
        return True

    @classmethod
    def zip_content(cls, name, buffer) -> bytes:
        obj = io.BytesIO()
        zf_obj = zipfile.ZipFile(obj, "a", zipfile.ZIP_DEFLATED, False)
        zf_obj.writestr(name, buffer)
        zf_obj.close()
        obj.seek(0)
        return obj.read()

    @classmethod
    def zip_content_b64(cls, name, buffer, strip_new_lines=True) -> bytes:
        data = cls.zip_content(name, buffer)
        return cls.buffer_base64_string(data, strip_new_lines=strip_new_lines)