Python OleFileIO.OleFileIO Exemples, olefile.OleFileIO.OleFileIO Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : txrm_to_image.py Projet : DiamondLightSource/txrm2tiff

 def convert(self,
             txrm_file,
             custom_reference=None,
             ignore_reference=False,
             annotate=False):
     with OleFileIO(str(txrm_file)) as ole:
         images = txrm_wrapper.extract_all_images(ole)
         reference = _get_reference(ole, txrm_file.name, custom_reference,
                                    ignore_reference)
         if reference is not None:
             self.image_output = _apply_reference(images, reference)
         else:
             self.image_output = np.around(images)
         if (len(self.image_output) > 1
                 and ole.exists("ImageInfo/MosiacRows")
                 and ole.exists("ImageInfo/MosiacColumns")):
             mosaic_rows = txrm_wrapper.read_imageinfo_as_int(
                 ole, "MosiacRows")
             mosaic_cols = txrm_wrapper.read_imageinfo_as_int(
                 ole, "MosiacColumns")
             if mosaic_rows != 0 and mosaic_cols != 0:
                 # Version 13 style mosaic:
                 self.image_output = _stitch_images(
                     self.image_output, (mosaic_cols, mosaic_rows), 1)
         if annotate:
             # Extract annotations
             annotator = Annotator(self.image_output[0].shape[::-1])
             if annotator.extract_annotations(
                     ole):  # True if any annotations were drawn
                 self.annotator = annotator
             else:
                 self.annotator = False
         # Create metadata
         self.ome_metadata = create_ome_metadata(ole, self.image_output)

Exemple #2

0

Afficher le fichier

Fichier : txrm_to_image.py Projet : DiamondLightSource/txrm2tiff

def _get_reference(ole, txrm_name, custom_reference, ignore_reference):
    if custom_reference is not None:
        logging.info("%s is being processed with file %s as a reference.",
                     txrm_name, custom_reference.name)
        reference_path = str(custom_reference)
        try:
            if isOleFile(reference_path):
                with OleFileIO(reference_path) as ref_ole:
                    references = txrm_wrapper.extract_all_images(
                        ref_ole)  # should be float for averaging & dividing
            elif ".tif" in reference_path:
                with tf.TiffFile(reference_path) as tif:
                    references = np.asarray(tif.pages[:])
            else:
                msg = f"Unable to open file '{reference_path}'. Only tif/tiff or xrm/txrm files are supported for custom references."
                logging.error(msg)
                raise IOError(msg)
        except:
            logging.error("Error occurred reading custom reference",
                          exc_info=True)
            raise
        if len(references) > 1:
            # if reference file is an image stack take median of the images
            return _dynamic_despeckle_and_average_series(references)
        return references[0]

    elif ole.exists("ReferenceData/Image") and not ignore_reference:
        logging.info("Internal reference will be applied to %s", txrm_name)
        return txrm_wrapper.extract_reference_image(ole)

    logging.debug("%s is being processed without a reference.", txrm_name)
    return None

Exemple #3

0

Afficher le fichier

Fichier : ole.py Projet : pombredanne/ingestors

 def extract_ole_metadata(self, file_path):
     with open(file_path, 'r') as fh:
         if not isOleFile(fh):
             return
         fh.seek(0)
         ole = OleFileIO(fh)
         self.extract_olefileio_metadata(ole)

Exemple #4

0

Afficher le fichier

def checkVersion(file):
    vrs = None
    filename = os.path.abspath(file)
    ole = OleFileIO(filename)
    elements = ole.listdir(streams=True, storages=False)
    for e in elements:
        if (e[-1] == 'RSeDb'):
            data = ole.openstream(e).read()
            version, i = getVersionInfo(data, 20)
            if (version.major >= 14):
                setDumpFolder(file)
                return ole
            break

    if (version):
        vrsName = version.major
        if (version.major >= 11): vrsName += 1996
        QMessageBox.critical(
            FreeCAD.ActiveDocument, 'FreeCAD: Inventor workbench...',
            'Can\'t load file created with Inventor v%d' % (vrsName))
        logError('Can\'t load file created with Inventor v%d' % (vrsName))
    else:
        QMessageBox.critical(
            FreeCAD.ActiveDocument, 'FreeCAD: Inventor workbench...',
            'Can\'t determine Inventor version file was created with')
        logError('Can\'t determine Inventor version file was created with!')
    return None

Exemple #5

0

Afficher le fichier

Fichier : crypto.py Projet : ZEROSHE/oletools

def is_encrypted(some_file):
    """
    Determine whether document contains encrypted content.

    This should return False for documents that are just write-protected or
    signed or finalized. It should return True if ANY content of the file is
    encrypted and can therefore not be analyzed by other oletools modules
    without given a password.

    Exception: there are way to write-protect an office document by embedding
    it as encrypted stream with hard-coded standard password into an otherwise
    empty OLE file. From an office user point of view, this is no encryption,
    but regarding file structure this is encryption, so we return `True` for
    these.

    This should not raise exceptions needlessly.

    This implementation is rather simple: it returns True if the file contains
    streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not
    test whether these streams actually contain data or whether the ole file
    structure contains the necessary references to these. It also checks the
    "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream
    is accessible (c.f. [MS-OLEPS] 2.25.1)

    :param some_file: File name or an opened OleFileIO
    :type some_file: :py:class:`olefile.OleFileIO` or `str`
    :returns: True if (and only if) the file contains encrypted content
    """
    log.debug('is_encrypted')
    if isinstance(some_file, OleFileIO):
        return is_encrypted_ole(some_file)   # assume it is OleFileIO
    if zipfile.is_zipfile(some_file):
        return is_encrypted_zip(some_file)
    # otherwise assume it is the name of an ole file
    return is_encrypted_ole(OleFileIO(some_file))

Exemple #6

0

Afficher le fichier

Fichier : oleid.py Projet : seebog/eml_analyzer

    def __init__(self, data: bytes):
        self.oid: Optional[oletools.oleid.OleID] = None

        if isOleFile(data):
            ole_file = OleFileIO(data)
            self.oid = oletools.oleid.OleID(ole_file)
            self.oid.check()

Exemple #7

0

Afficher le fichier

Fichier : importerUtils.py Projet : aWZHY0yQH81uOYvH/InventorLoader

def setInventorFile(file):
    global _inventor_file
    global _dump_folder

    _inventor_file = os.path.abspath(file)
    setDumpFolder(_inventor_file)
    return OleFileIO(file)

Exemple #8

0

Afficher le fichier

    def __init__(self, msg_file_path):
        self.msg_file_path = msg_file_path
        self.include_attachment_data = False

        if not self.is_valid_msg_file():
            raise Exception(
                "Invalid file provided, please provide valid Microsoft’s Outlook MSG file."
            )

        with OleFileIO(msg_file_path) as ole_file:
            # process directory entries
            ole_root = ole_file.root
            kids_dict = ole_root.kids_dict

            self._message = Message(kids_dict)
            self._message_dict = self._message.as_dict()

            # process msg properties
            self._set_properties()

            # process msg recipients
            self._set_recipients()

            # process attachments
            self._set_attachments()

Exemple #9

0

Afficher le fichier

Fichier : olefileio.py Projet : whoami0622/PEviewer-Packer-Protector-PEinfo-website

    def __init__(self, olefile, path='', parent=None):
        if not hasattr(olefile, 'openstream'):
            isOleFile = import_isOleFile()
            OleFileIO = import_OleFileIO()

            if not isOleFile(olefile):
                errormsg = 'Not an OLE2 Compound Binary File.'
                raise InvalidOleStorageError(errormsg)
            olefile = OleFileIO(olefile)
        OleStorageItem.__init__(self, olefile, path, parent)

Exemple #10

0

Afficher le fichier

 def get_general(self, data, f):
     '''
     Extract general info
     '''
     for k, v in OleFileIO(f).get_metadata().__dict__.items():
         if v != None:
             if type(v) == bytes:
                 if len(v) > 0:
                     data.update({k: v.decode("utf-8", errors="ignore")})
             else:
                 data.update({k: v})

Exemple #11

0

Afficher le fichier

    def __init__(self, filename):

        self.OleFile = OleFileIO(filename)

        # Components
        self.Components = self.parseComponents(
            self.readStream("Components6/Data"))
        manifest = getU32(self.readStream("Components6/Header"))
        counted = len(self.Components)
        if manifest != counted:
            print "Warning: Header disagrees about component count, says there are " + str(
                manifest) + ", but we counted " + str(counted) + "."

Exemple #12

0

Afficher le fichier

 def get_general(self, data, temp_f):
     '''
     Extract general info
     '''
     for temp_k, temp_v in OleFileIO(
             temp_f).get_metadata().__dict__.items():
         if temp_v is not None:
             if isinstance(temp_v, bytes):
                 if len(temp_v) > 0:
                     data.update(
                         {temp_k: temp_v.decode("utf-8", errors="ignore")})
             else:
                 data.update({temp_k: temp_v})

Exemple #13

0

Afficher le fichier

Fichier : ole.py Projet : seekersapp2013/aleph

 def extract_ole_metadata(self, file_path, entity):
     with open(file_path, 'rb') as fh:
         if not isOleFile(fh):
             return
         fh.seek(0)
         try:
             ole = OleFileIO(fh)
             self.extract_olefileio_metadata(ole, entity)
         except (RuntimeError, IOError):
             # OLE reading can go fully recursive, at which point it's OK
             # to just eat this runtime error quietly.
             log.warning("Failed to read OLE data: %r", entity)
         except Exception:
             log.exception("Failed to read OLE data: %r", entity)

Exemple #14

0

Afficher le fichier

 def get_streams(self,dump) -> (list,list):
     '''
     get streams
     '''
     _Listobjects = []
     _List = []
     ole = OleFileIO(dump)
     listdir = ole.listdir()
     for direntry in listdir:
         dirs = re.sub(r'[^\x20-\x7f]',r'', " : ".join(direntry))
         tempdecoded = sub(br'[^\x20-\x7F]+',b'', ole.openstream(direntry).getvalue())
         _Listobjects.append(tempdecoded)
         _List.append({"Name":dirs,"Parsed":tempdecoded.decode("utf-8",errors="ignore")})
     return _List,_Listobjects

Exemple #15

0

Afficher le fichier

def ReadFile(doc, readProperties):
    first = 0
    list = {}
    counters = {}

    # LOG.LOG_FILTER = LOG.LOG_FILTER | LOG.LOG_DEBUG

    if (isOleFile(getInventorFile())):
        ole = OleFileIO(getInventorFile())
        setFileVersion(ole)
        elements = ole.listdir(streams=True, storages=False)

        folder = getInventorFile()[0:-4]
        if not os.path.exists(folder):
            os.makedirs(folder)

        counter = 1
        list = []
        for fname in elements:
            if (len(fname) == 1):
                list.append(fname)
            else:
                #Ensure that RSe* files will be parsed first
                if (fname[-1].startswith('RSe')):
                    #ensure RSeDb is the very first "file" to be parsed
                    list.insert(first, fname)
                    if (fname[-1] == 'RSeDb'):
                        first += 1
                elif (not fname[-1].startswith('B')):
                    list.append(fname)

        for fname in list:
            ReadElement(ole, fname, doc, counter, readProperties)
            counter += 1
        ole.close()

        now = datetime.datetime.now()
        if (len(doc.Comment) > 0):
            doc.Comment += '\n'
        doc.Comment = '# %s: read from %s' % (
            now.strftime('%Y-%m-%d %H:%M:%S'), getInventorFile())

        logMessage("Dumped data to folder: '%s'" % (getInventorFile()[0:-4]),
                   LOG.LOG_INFO)

        return True
    logError("Error - '%s' is not a valid Autodesk Inventor file." % (infile))
    return False

Exemple #16

0

Afficher le fichier

    def __init__(self, filename):

        self.OleFile = OleFileIO(filename)

        # TOC = Table Of Contents
        # A list of the footprints contained in this PcbLib can be found here:
        #self.TOC = TOC( self.readStream("Library/ComponentParamsTOC/Data") )
        # not always present

        #
        # Parse library parameters
        # Library/Data contains a list of parameters (string: "|"-separated key-value pairs)
        # followed by the count and names of footprints in the library
        #
        buffer = self.readStream("Library/Data")

        # Properties
        print "Library properties:"
        length = getU32(buffer[:4])
        self.Properties = parseKeyValueString(buffer[4:4+length])
        print self.Properties
        
        # Footprint list
        cursor = 4+length
        count = getU32(buffer[cursor:])
        cursor += 4
        print "Footprints in library: "+str(count)
        footprints = []
        for i in range(count):
            subrecord = SubRecord(buffer[cursor:])
            name = SubRecord_String(subrecord)
            print " * "+name
            footprints.append(name)
            cursor += subrecord.length

        # Parse all the footprints
        self.Footprints = []
        for footprint in footprints:
            print "Parsing "+footprint+" ..."
            self.Footprints.append(
                    Footprint(self.readStream(footprint+"/Data"))
                )
                
        # Create a dictionary of footprints to access them by name
        self.FootprintsByName = {}
        for footprint in self.Footprints:
            self.FootprintsByName[footprint.name] = footprint

Exemple #17

0

Afficher le fichier

Fichier : file_sorting.py Projet : DiamondLightSource/txrm2tiff

def ole_file_works(path):
    if (path.suffix == ".txrm") or (path.suffix == ".xrm"):
        if isOleFile(str(path)):
            with OleFileIO(str(path)) as ole_file:
                number_frames_taken = read_imageinfo_as_int(ole_file, "ImagesTaken")
                expected_number_frames = read_imageinfo_as_int(ole_file, "NoOfImages")
                # Returns true even if all frames aren't written, throwing warning.
                if number_frames_taken != expected_number_frames:
                    logging.warning("%s is an incomplete %s file: only %i out of %i frames have been written",
                                path.name, path.suffix, number_frames_taken, expected_number_frames)
                # Check for reference frame:
                if not ole_file.exists("ReferenceData/Image"):
                    logging.warning("No reference data found in file %s", path)
                return True
        else:
            logging.warning("Could not read ole file %s", path)
    else:
        logging.warning("%s not .txrm or .xrm", path)
    return False

Exemple #18

0

Afficher le fichier

def filename_to_lines(filepath):
    filename = filepath.split('/')[-1]
    extension = filename.split('.')[-1]
    if '.' not in filename or extension in ['txt']:
        return open(filepath, 'r', encoding='utf-8').readlines()
    if extension in ['hwp']:
        return OleFileIO(filepath).openstream('PrvText').read().decode(
            'utf-16').split('\n')
    if extension in ['doc', 'docx']:
        return [p.text for p in docx.Document(filepath).paragraphs]
    if extension in ['pdf']:
        return parser.from_file(filepath)['content'].split('\n')
    if extension in ['jpg', 'png', 'jpeg', 'bmp', 'gif', 'tiff', 'jfif']:
        easyocr_terms = EASYOCR.readtext(filepath, detail=0)
        tesseract_terms = image_to_string(Image.open(filepath),
                                          lang='kor+eng').split('\n')
        return easyocr_terms + tesseract_terms
        # return EASYOCR.readtext(filepath, detail=0)
    else:
        raise ValueError('알려지지 않은 확장자')

Exemple #19

0

Afficher le fichier

Fichier : metadata_extractor.py Projet : j0nk0/Forensic-Tools_Android

def oleMetaData(file_path, save=True):
    now = dt.now()
    file_name = getFileName(file_path)
    metadata = "Time: %d/%d/%d %d : %d : %d. Found the following metadata for file %s:\n\n" % (
        now.year, now.month, now.day, now.hour, now.minute, now.second,
        file_name[:-4])
    try:
        ole = OleFileIO(file_path)
        meta = ole.get_metadata()
        ole.close()
        author = meta.author.decode("latin-1")
        creation_time = meta.create_time.ctime()
        last_author = meta.last_saved_by.decode("latin-1")
        last_edit_time = meta.last_saved_time.ctime()
        last_printed = meta.last_printed.ctime()
        revisions = meta.revision_number.decode("latin-1")
        company = meta.company.decode("latin-1")
        creating_app = meta.creating_application.decode("latin-1")

        metadata += "Original Author: %s\nCreation Time: %s\nLast Author: %s\n" % (author, creation_time, last_author) \
                    + "Last Modification Time: %s\nLast Printed at: %s\Total Revisions: %s\n" % (last_edit_time, last_printed, revisions) \
                    + "Created with: %s\nCompany: %s" % (creating_app, company)

        try:
            print(metadata)
        except UnicodeEncodeError:
            print(
                "Console encoding can't decode the result. Enter chcp 65001 in the console and rerun the script."
            )

        if save:
            file_name = getFileName(file_path)
            tgt = file_name + ".txt"

            saveResult(tgt, metadata)

    except OSError as e1:
        print("File not supported: %s" % e1)
    except FileNotFoundError:
        print("Specified file could not be found")

Exemple #20

0

Afficher le fichier

Fichier : md_reader2.py Projet : yarmol/v7py

 def read(self):
     mylog.info(u'Начинаю чтение %s' % self.filename)
     self.ole = OleFileIO(self.filename)
     oledirs = self.ole.listdir()
     mylog.debug('OLE_DIRS: %s' % oledirs)
     for entry in oledirs:
         entry_name = entry[0]
         mylog.debug(u'entry_name: %s' % entry_name)
         try:
             if entry_name == 'Metadata':
                 if "Main MetaData Stream" in entry and self.parse_metadata:
                     self.handler_metadata(entry)
             if entry_name == 'Document':
                 if "Dialog Stream" in entry and self.parse_dialog:
                     self.handler_dialog(entry)
                 if "Container.Profile" in entry:
                     continue
                 if "Container.Contents" in entry:
                     continue
         except Exception as e:
             mylog.exception(u'Ошибка при чтении конфигурации %s' %
                             e.message)
     return self.read_result

Exemple #21

0

Afficher le fichier

if __name__ == '__main__':
    if (len(sys.argv) > 1):
        files = sys.argv[1:]
        filename = files[0].decode(
            sys.getfilesystemencoding())  # make it UNICODE!
        setInventorFile(filename)
        if (isOleFile(filename)):
            if (len(files) == 1):
                open(filename)
            else:
                # this is only for debugging purposes...
                docname = os.path.splitext(os.path.basename(filename))[0]
                docname = decode(docname, utf=True)
                doc = FreeCAD.newDocument(docname)

                ole = OleFileIO(filename)
                setFileVersion(ole)
                elements = ole.listdir(streams=True, storages=False)
                counter = 1
                if (files[1] == 'l'):
                    for filename in elements:
                        ListElement(ole, filename, counter)
                        counter += 1
                else:
                    list = {}
                    counters = {}

                    for a in (elements):
                        path = PrintableName(a)
                        list['%s' % (counter)] = a
                        counters['%s' % (counter)] = counter

Exemple #22

0

Afficher le fichier

def is_encrypted(some_file):
    """
    Determine whether document contains encrypted content.

    This should return False for documents that are just write-protected or
    signed or finalized. It should return True if ANY content of the file is
    encrypted and can therefore not be analyzed by other oletools modules
    without given a password.

    Exception: there are way to write-protect an office document by embedding
    it as encrypted stream with hard-coded standard password into an otherwise
    empty OLE file. From an office user point of view, this is no encryption,
    but regarding file structure this is encryption, so we return `True` for
    these.

    This should not raise exceptions needlessly.

    This implementation is rather simple: it returns True if the file contains
    streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not
    test whether these streams actually contain data or whether the ole file
    structure contains the necessary references to these. It also checks the
    "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream
    is accessible (c.f. [MS-OLEPS] 2.25.1)

    :param some_file: File name or an opened OleFileIO
    :type some_file: :py:class:`olefile.OleFileIO` or `str`
    :returns: True if (and only if) the file contains encrypted content
    """
    log.debug('is_encrypted')

    # ask msoffcrypto if possible
    if check_msoffcrypto():
        log.debug('Checking for encryption using msoffcrypto')
        file_handle = None
        file_pos = None
        try:
            if isinstance(some_file, OleFileIO):
                # TODO: hacky, replace once msoffcrypto-tools accepts OleFileIO
                file_handle = some_file.fp
                file_pos = file_handle.tell()
                file_handle.seek(0)
            else:
                file_handle = open(some_file, 'rb')

            return msoffcrypto.OfficeFile(file_handle).is_encrypted()

        except Exception as exc:
            log.warning('msoffcrypto failed to interpret file {} or determine '
                        'whether it is encrypted: {}'
                        .format(file_handle.name, exc))

        finally:
            try:
                if file_pos is not None:   # input was OleFileIO
                    file_handle.seek(file_pos)
                else:                      # input was file name
                    file_handle.close()
            except Exception as exc:
                log.warning('Ignoring error during clean up: {}'.format(exc))

    # if that failed, try ourselves with older and less accurate code
    try:
        if isinstance(some_file, OleFileIO):
            return _is_encrypted_ole(some_file)
        if zipfile.is_zipfile(some_file):
            return _is_encrypted_zip(some_file)
        # otherwise assume it is the name of an ole file
        with OleFileIO(some_file) as ole:
            return _is_encrypted_ole(ole)
    except Exception as exc:
        log.warning('Failed to check {} for encryption ({}); assume it is not '
                    'encrypted.'.format(some_file, exc))

    return False

Exemple #23

0

Afficher le fichier

Fichier : md_reader.py Projet : yarmol/v7py

def parse_md(filename):
    mylog.info(u'Начинаю чтение %s' % filename)
    m = {'dds': []}
    ole = OleFileIO(filename)
    # mylog.debug('OLE_DIRS: %s' % ole.listdir())
    m['entry'] = {}
    for entry in ole.listdir():
        mylog.debug(entry[0])

        #with open("stream_%s" % entry[0],'w+') as f:
        #    f.write(repr(entry))

        if entry[0] == 'Document':
            #print entry
            if "Dialog Stream" in entry:
                continue
                try:
                    sz = ole.get_size(entry)
                    f = ole.openstream(entry)
                    #print f.read(sz)
                    f.close()
                except Exception as e:
                    mylog.exception(repr(e.args))
            if "Container.Profile" in entry:
                continue
                try:
                    sz = ole.get_size(entry)
                    f = ole.openstream(entry)
                    #print f.read(sz)
                    f.close()
                except:
                    mylog.exception()
            if "Container.Contents" in entry:
                continue
                sz = ole.get_size(entry)
                f = ole.openstream(entry)
                #print f.read(sz)
                f.close()

            if "MD Programm text" in entry:
                continue
                '''
                Пока что не работает, работало в прежних версиях python
                try:
                    sz= ole.get_size(entry)
                    f=ole.openstream(entry)
                    tx= f.read(sz)
                    f.close()
                    #print zlib.compress('test').encode('hex')
                    zi=zlib.decompress((zlib_head+tx))
                    print zi
                except Exception,e:
                    mylog.exception('read MD Programm text')
                    mylog.info(tx[:10].encode('hex'))
                    #print e
                '''
        if entry[0] == 'Metadata':
            if "Main MetaData Stream" in entry:

                try:
                    #sz= ole.get_size(entry)
                    f = ole.openstream(entry)
                    tx = f.read()
                    f.close()
                    #print zlib.compress('test').encode('hex')
                    #d=zlib.decompressobj()
                    #zi=zlib.decompress(zlib_head+tx)
                    #tx_fixed = utils.fixunicode(tx,'cp1251')
                    #mylog.debug(tx.decode('cp1251'))
                    m['dds'] = ParseTree(tx.decode('cp1251', errors='ignore'))
                except Exception as e:
                    mylog.exception('parse metadata error')
        #if entry[0] == 'Journal':
        #write dumps
        if DUMP_META:
            if "MD Programm text" in entry:
                sz = ole.get_size(entry)
                f = ole.openstream(entry)
                tx = f.read(sz)
                f.close()
                hx = tx.encode('hex')
                if ztest.find(hx) > 0:
                    #print entry
                    #print hx
                    pass
                try:
                    #zlib.compress("//test").encode('hex')
                    #'789c d3d72f492d2e0100 0664021f'
                    #      d3d72f492d2e0100
                    tx = zlib.decompress(zlib_head + tx)
                    #print "MODULE:", tx
                    pass
                except Exception as e:
                    #print "size MD text:", sz, e.message
                    pass
                dump_stream("entry-%s" % entry, zlib_head + tx)
            else:
                dump_stream("entry-%s" % entry, ole.openstream(entry).read())
    return m

Exemple #24

0

Afficher le fichier

 def from_path(path: Path) -> Dict[str, Any]:
     with OleFileIO(path) as ole_file_io:
         return ole_file_io.get_metadata().__dict__.items()

Exemple #25

0

Afficher le fichier

    def __init__(
        self,
        path,
        prefix="",
        ole=None,
        filename=None,
        encoding=None,
        lazy=False,
    ):
        """
        :param path: path to the msg file in the system or is the raw msg file.
        :param prefix: used for extracting embeded msg files
            inside the main one. Do not set manually unless
            you know what you are doing.
        :param filename: optional, the filename to be used by default when
            saving.
        :param: extract_attachments: extract data from attachments to message
        :param lazy: continue with extraction even if an attachment fails
        """
        self.path = path
        self.filename = filename
        if ole is None:
            ole = OleFileIO(path)
        self.ole = ole
        self.lazy = lazy

        # Parse the main props
        self.prefix = prefix
        prop_type = constants.TYPE_MESSAGE_EMBED
        if self.prefix == "":
            prop_type = constants.TYPE_MESSAGE
        propdata = self._getStream("__properties_version1.0")
        self.mainProperties = Properties(propdata, prop_type)

        # Determine if the message is unicode-style:
        # PidTagStoreSupportMask
        self.is_unicode = False
        if "340D0003" in self.mainProperties:
            value = self.mainProperties["340D0003"].value
            self.is_unicode = (value & 0x40000) != 0

        self.encoding = encoding
        # if "66C30003" in self.mainProperties:
        #     # PidTagCodePageId
        #     codepage = self.mainProperties["66C30003"].value
        #     self.encoding = get_encoding(codepage, self.encoding)

        if self.encoding is None:
            metadata = ole.get_metadata()
            self.encoding = get_encoding(metadata.codepage)

        if "3FFD0003" in self.mainProperties:
            # PidTagMessageCodepage
            codepage = self.mainProperties["3FFD0003"].value
            self.encoding = get_encoding(codepage, self.encoding)
        if self.encoding is None:
            self.encoding = self.guessEncoding()

        log.debug("Message encoding: %s", self.encoding)
        self.subject = self.getStringField("0037")
        self.date = self.mainProperties.date