Example #1
0
    def apply(self, filename, evaluation):
        'FileFormat[filename_String]'

        findfile = Expression('FindFile', filename).evaluate(evaluation)
        if findfile == Symbol('$Failed'):
            evaluation.message('FileFormat', 'nffil',
                               Expression('FileFormat', filename))
            return findfile

        path = findfile.get_string_value()

        if not FileFormat.detector:
            loader = magic.MagicLoader()
            loader.load()
            FileFormat.detector = magic.MagicDetector(loader.mimetypes)

        mime = set(FileFormat.detector.match(path))

        # If match fails match on extension only
        if mime == set([]):
            mime, encoding = mimetypes.guess_type(path)
            if mime is None:
                mime = set([])
            else:
                mime = set([mime])

        result = []
        for key in mimetype_dict.keys():
            if key in mime:
                result.append(mimetype_dict[key])

        # the following fixes an extremely annoying behaviour on some (not all)
        # installations of Windows, where we end up classifying .csv files als XLS.
        if len(result) == 1 and result[0] == 'XLS' and path.lower().endswith(
                '.csv'):
            return String('CSV')

        if len(result) == 0:
            result = 'Binary'
        elif len(result) == 1:
            result = result[0]
        else:
            return None

        return from_python(result)
Example #2
0
    def apply(self, filename, evaluation):
        'FileFormat[filename_String]'

        findfile = Expression('FindFile', filename).evaluate(evaluation)
        if findfile == Symbol('$Failed'):
            evaluation.message(
                'FileFormat', 'nffil', Expression('FileFormat', filename))
            return findfile

        path = findfile.get_string_value()

        if not FileFormat.detector:
            loader = magic.MagicLoader()
            loader.load()
            FileFormat.detector = magic.MagicDetector(loader.mimetypes)

        mime = set(FileFormat.detector.match(path))

        # If match fails match on extension only
        if mime == set([]):
            mime, encoding = mimetypes.guess_type(path)
            if mime is None:
                mime = set([])
            else:
                mime = set([mime])

        result = []
        for key in mimetype_dict.keys():
            if key in mime:
                result.append(mimetype_dict[key])

        # the following fixes an extremely annoying behaviour on some (not all)
        # installations of Windows, where we end up classifying .csv files als XLS.
        if len(result) == 1 and result[0] == 'XLS' and path.lower().endswith('.csv'):
            return String('CSV')

        if len(result) == 0:
            result = 'Binary'
        elif len(result) == 1:
            result = result[0]
        else:
            return None

        return from_python(result)
Example #3
0
    def apply(self, filename, evaluation):
        'FileFormat[filename_String]'

        findfile = Expression('FindFile', filename).evaluate(evaluation)
        if findfile == Symbol('$Failed'):
            evaluation.message(
                'FileFormat', 'nffil', Expression('FileFormat', filename))
            return findfile

        path = findfile.get_string_value()

        if not FileFormat.detector:
            loader = magic.MagicLoader()
            loader.load()
            FileFormat.detector = magic.MagicDetector(loader.mimetypes)

        mime = set(FileFormat.detector.match(path))

        # If match fails match on extension only
        if mime == set([]):
            mime, encoding = mimetypes.guess_type(path)
            if mime is None:
                mime = set([])
            else:
                mime = set([mime])

        result = []
        for key in mimetype_dict.keys():
            if key in mime:
                result.append(mimetype_dict[key])

        if len(result) == 0:
            result = 'Binary'
        elif len(result) == 1:
            result = result[0]
        else:
            return None

        return from_python(result)
Example #4
0
    def apply(self, filename, evaluation):
        'FileFormat[filename_String]'

        findfile = Expression('FindFile', filename).evaluate(evaluation)
        if findfile == Symbol('$Failed'):
            evaluation.message('FileFormat', 'nffil',
                               Expression('FileFormat', filename))
            return findfile

        path = findfile.get_string_value()

        if not FileFormat.detector:
            loader = magic.MagicLoader()
            loader.load()
            FileFormat.detector = magic.MagicDetector(loader.mimetypes)

        mime = set(FileFormat.detector.match(path))

        # If match fails match on extension only
        if mime == set([]):
            mime, encoding = mimetypes.guess_type(path)
            if mime is None:
                mime = set([])
            else:
                mime = set([mime])

        result = []
        for key in mimetype_dict.keys():
            if key in mime:
                result.append(mimetype_dict[key])

        if len(result) == 0:
            result = 'Binary'
        elif len(result) == 1:
            result = result[0]
        else:
            return None

        return from_python(result)
Example #5
0
    def apply(self, filename, evaluation):
        'FileFormat[filename_String]'

        findfile = Expression('FindFile', filename).evaluate(evaluation)
        if findfile == Symbol('$Failed'):
            evaluation.message('FileFormat', 'nffil',
                               Expression('FileFormat', filename))
            return findfile

        path = findfile.get_string_value()

        if not FileFormat.detector:
            loader = magic.MagicLoader()
            loader.load()
            FileFormat.detector = magic.MagicDetector(loader.mimetypes)

        mime = set(FileFormat.detector.match(path))

        # If match fails match on extension only
        if mime == set([]):
            mime, encoding = mimetypes.guess_type(path)
            if mime is None:
                mime = set([])
            else:
                mime = set([mime])

        # TODO: Add more file formats

        typedict = {
            'application/dicom': 'DICOM',
            'application/dbase': 'DBF',
            'application/dbf': 'DBF',
            'application/eps': 'EPS',
            'application/fits': 'FITS',
            'application/json': 'JSON',
            'application/mathematica': 'NB',
            'application/mdb': 'MDB',
            'application/mbox': 'MBOX',
            'application/msaccess': 'MDB',
            'application/octet-stream': 'OBJ',
            'application/pdf': 'PDF',
            'application/pcx': 'PCX',
            'application/postscript': 'EPS',
            'application/rss+xml': 'RSS',
            'application/rtf': 'RTF',
            'application/sla': 'STL',
            'application/tga': 'TGA',
            'application/vnd.google-earth.kml+xml': 'KML',
            'application/vnd.ms-excel': 'XLS',
            'application/vnd.ms-pki.stl': 'STL',
            'application/vnd.oasis.opendocument.spreadsheet': 'ODS',
            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
            'XLSX',  # nopep8
            'application/vnd.sun.xml.calc': 'SXC',
            'application/vnd.msaccess': 'MDB',
            'application/vnd.wolfram.cdf': 'CDF',
            'application/vnd.wolfram.cdf.text': 'CDF',
            'application/vnd.wolfram.mathematica.package': 'Package',
            'application/xhtml+xml': 'XHTML',
            'application/xml': 'XML',
            'application/x-3ds': '3DS',
            'application/x-cdf': 'NASACDF',
            'application/x-eps': 'EPS',
            'application/x-flac': 'FLAC',
            'application/x-font-bdf': 'BDF',
            'application/x-hdf': 'HDF',
            'application/x-msaccess': 'MDB',
            'application/x-netcdf': 'NetCDF',
            'application/x-shockwave-flash': 'SWF',
            'application/x-tex': 'TeX',  # Also TeX
            'audio/aiff': 'AIFF',
            'audio/basic': 'AU',  # Also SND
            'audio/midi': 'MIDI',
            'audio/x-aifc': 'AIFF',
            'audio/x-aiff': 'AIFF',
            'audio/x-flac': 'FLAC',
            'audio/x-wav': 'WAV',
            'chemical/seq-na-genbank': 'GenBank',
            'chemical/seq-aa-fasta': 'FASTA',
            'chemical/seq-na-fasta': 'FASTA',
            'chemical/seq-na-fastq': 'FASTQ',
            'chemical/seq-na-sff': 'SFF',
            'chemical/x-cif': 'CIF',
            'chemical/x-daylight-smiles': 'SMILES',
            'chemical/x-hin': 'HIN',
            'chemical/x-jcamp-dx': 'JCAMP-DX',
            'chemical/x-mdl-molfile': 'MOL',
            'chemical/x-mdl-sdf': 'SDF',
            'chemical/x-mdl-sdfile': 'SDF',
            'chemical/x-mdl-tgf': 'TGF',
            'chemical/x-mmcif': 'CIF',
            'chemical/x-mol2': 'MOL2',
            'chemical/x-mopac-input': 'Table',
            'chemical/x-pdb': 'PDB',
            'chemical/x-xyz': 'XYZ',
            'image/bmp': 'BMP',
            'image/eps': 'EPS',
            'image/fits': 'FITS',
            'image/gif': 'GIF',
            'image/jp2': 'JPEG2000',
            'image/jpeg': 'JPEG',
            'image/pbm': 'PNM',
            'image/pcx': 'PCX',
            'image/pict': 'PICT',
            'image/png': 'PNG',
            'image/svg+xml': 'SVG',
            'image/tga': 'TGA',
            'image/tiff': 'TIFF',
            'image/vnd.dxf': 'DXF',
            'image/vnd.microsoft.icon': 'ICO',
            'image/x-3ds': '3DS',
            'image/x-dxf': 'DXF',
            'image/x-exr': 'OpenEXR',
            'image/x-icon': 'ICO',
            'image/x-ms-bmp': 'BMP',
            'image/x-pcx': 'PCX',
            'image/x-portable-anymap': 'PNM',
            'image/x-portable-bitmap': 'PBM',
            'image/x-portable-graymap': 'PGM',
            'image/x-portable-pixmap': 'PPM',
            'image/x-xbitmap': 'XBM',
            'model/x3d+xml': 'X3D',
            'model/vrml': 'VRML',
            'model/x-lwo': 'LWO',
            'model/x-pov': 'POV',
            'text/calendar': 'ICS',
            'text/comma-separated-values': 'CSV',
            'text/csv': 'CSV',
            'text/html': 'HTML',
            'text/mathml': 'MathML',
            'text/plain': 'Text',
            'text/rtf': 'RTF',
            'text/scriptlet': 'SCT',
            'text/tab-separated-values': 'TSV',
            'text/texmacs': 'Text',
            'text/vnd.graphviz': 'DOT',
            'text/x-csrc': 'C',
            'text/x-tex': 'TeX',
            'text/x-vcalendar': 'VCS',
            'text/x-vcard': 'VCF',
            'video/avi': 'AVI',
            'video/quicktime': 'QuickTime',
            'video/x-flv': 'FLV',
            # None: 'Binary',
        }

        result = []
        for key in typedict.keys():
            if key in mime:
                result.append(typedict[key])

        if len(result) == 0:
            result = 'Binary'
        elif len(result) == 1:
            result = result[0]
        else:
            return None

        return from_python(result)
Example #6
0
 def _infer_form(self, filename, evaluation):
     ext = Expression('FileExtension', filename).evaluate(evaluation)
     ext = ext.get_string_value()
     return self._extdict.get(ext)
Example #7
0
 def _infer_form(self, filename, evaluation):
     ext = Expression('FileExtension', filename).evaluate(evaluation)
     ext = ext.get_string_value().lower()
     return self._extdict.get(ext)
Example #8
0
    def apply(self, filename, evaluation):
        'FileFormat[filename_String]'

        findfile = Expression('FindFile', filename).evaluate(evaluation)
        if findfile == Symbol('$Failed'):
            evaluation.message(
                'FileFormat', 'nffil', Expression('FileFormat', filename))
            return findfile

        path = findfile.get_string_value()

        if not FileFormat.detector:
            loader = magic.MagicLoader()
            loader.load()
            FileFormat.detector = magic.MagicDetector(loader.mimetypes)

        mime = set(FileFormat.detector.match(path))

        # If match fails match on extension only
        if mime == set([]):
            mime, encoding = mimetypes.guess_type(path)
            if mime is None:
                mime = set([])
            else:
                mime = set([mime])

        # TODO: Add more file formats

        typedict = {
            'application/dicom': 'DICOM',
            'application/dbase': 'DBF',
            'application/dbf': 'DBF',
            'application/eps': 'EPS',
            'application/fits': 'FITS',
            'application/json': 'JSON',
            'application/mathematica': 'NB',
            'application/mdb': 'MDB',
            'application/mbox': 'MBOX',
            'application/msaccess': 'MDB',
            'application/octet-stream': 'OBJ',
            'application/pdf': 'PDF',
            'application/pcx': 'PCX',
            'application/postscript': 'EPS',
            'application/rss+xml': 'RSS',
            'application/rtf': 'RTF',
            'application/sla': 'STL',
            'application/tga': 'TGA',
            'application/vnd.google-earth.kml+xml': 'KML',
            'application/vnd.ms-excel': 'XLS',
            'application/vnd.ms-pki.stl': 'STL',
            'application/vnd.oasis.opendocument.spreadsheet': 'ODS',
            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'XLSX',    # nopep8
            'application/vnd.sun.xml.calc': 'SXC',
            'application/vnd.msaccess': 'MDB',
            'application/vnd.wolfram.cdf': 'CDF',
            'application/vnd.wolfram.cdf.text': 'CDF',
            'application/vnd.wolfram.mathematica.package': 'Package',
            'application/xhtml+xml': 'XHTML',
            'application/xml': 'XML',
            'application/x-3ds': '3DS',
            'application/x-cdf': 'NASACDF',
            'application/x-eps': 'EPS',
            'application/x-flac': 'FLAC',
            'application/x-font-bdf': 'BDF',
            'application/x-hdf': 'HDF',
            'application/x-msaccess': 'MDB',
            'application/x-netcdf': 'NetCDF',
            'application/x-shockwave-flash': 'SWF',
            'application/x-tex': 'TeX',  # Also TeX
            'audio/aiff': 'AIFF',
            'audio/basic': 'AU',        # Also SND
            'audio/midi': 'MIDI',
            'audio/x-aifc': 'AIFF',
            'audio/x-aiff': 'AIFF',
            'audio/x-flac': 'FLAC',
            'audio/x-wav': 'WAV',
            'chemical/seq-na-genbank': 'GenBank',
            'chemical/seq-aa-fasta': 'FASTA',
            'chemical/seq-na-fasta': 'FASTA',
            'chemical/seq-na-fastq': 'FASTQ',
            'chemical/seq-na-sff': 'SFF',
            'chemical/x-cif': 'CIF',
            'chemical/x-daylight-smiles': 'SMILES',
            'chemical/x-hin': 'HIN',
            'chemical/x-jcamp-dx': 'JCAMP-DX',
            'chemical/x-mdl-molfile': 'MOL',
            'chemical/x-mdl-sdf': 'SDF',
            'chemical/x-mdl-sdfile': 'SDF',
            'chemical/x-mdl-tgf': 'TGF',
            'chemical/x-mmcif': 'CIF',
            'chemical/x-mol2': 'MOL2',
            'chemical/x-mopac-input': 'Table',
            'chemical/x-pdb': 'PDB',
            'chemical/x-xyz': 'XYZ',
            'image/bmp': 'BMP',
            'image/eps': 'EPS',
            'image/fits': 'FITS',
            'image/gif': 'GIF',
            'image/jp2': 'JPEG2000',
            'image/jpeg': 'JPEG',
            'image/pbm': 'PNM',
            'image/pcx': 'PCX',
            'image/pict': 'PICT',
            'image/png': 'PNG',
            'image/svg+xml': 'SVG',
            'image/tga': 'TGA',
            'image/tiff': 'TIFF',
            'image/vnd.dxf': 'DXF',
            'image/vnd.microsoft.icon': 'ICO',
            'image/x-3ds': '3DS',
            'image/x-dxf': 'DXF',
            'image/x-exr': 'OpenEXR',
            'image/x-icon': 'ICO',
            'image/x-ms-bmp': 'BMP',
            'image/x-pcx': 'PCX',
            'image/x-portable-anymap': 'PNM',
            'image/x-portable-bitmap': 'PBM',
            'image/x-portable-graymap': 'PGM',
            'image/x-portable-pixmap': 'PPM',
            'image/x-xbitmap': 'XBM',
            'model/x3d+xml': 'X3D',
            'model/vrml': 'VRML',
            'model/x-lwo': 'LWO',
            'model/x-pov': 'POV',
            'text/calendar': 'ICS',
            'text/comma-separated-values': 'CSV',
            'text/csv': 'CSV',
            'text/html': 'HTML',
            'text/mathml': 'MathML',
            'text/plain': 'Text',
            'text/rtf': 'RTF',
            'text/scriptlet': 'SCT',
            'text/tab-separated-values': 'TSV',
            'text/texmacs': 'Text',
            'text/vnd.graphviz': 'DOT',
            'text/x-csrc': 'C',
            'text/x-tex': 'TeX',
            'text/x-vcalendar': 'VCS',
            'text/x-vcard': 'VCF',
            'video/avi': 'AVI',
            'video/quicktime': 'QuickTime',
            'video/x-flv': 'FLV',
            # None: 'Binary',
        }

        result = []
        for key in typedict.keys():
            if key in mime:
                result.append(typedict[key])

        if len(result) == 0:
            result = 'Binary'
        elif len(result) == 1:
            result = result[0]
        else:
            return None

        return from_python(result)