Example #1
0
def stomp_file(original_file):
    stomped_file = original_file + '.stomped'
    if olefile.isOleFile(original_file):
        shutil.copyfile(original_file, stomped_file)
        stomp_ole(stomped_file)
        return True
    elif zipfile.is_zipfile(original_file):
        tmpdir = tempfile.TemporaryDirectory(prefix='stomp_')
        with zipfile.ZipFile(original_file) as zf:
            zf.extractall(tmpdir.name)

            file_list = [
                f for f in iglob(tmpdir.name + '/**/*', recursive=True)
                if os.path.isfile(f)
            ]
            for f in file_list:
                if olefile.isOleFile(f):
                    stomp_ole(f)

            os.chdir(pathlib.Path(stomped_file).resolve().parent)
            shutil.make_archive(stomped_file, 'zip', tmpdir.name)
            if os.path.exists(stomped_file):
                os.remove(stomped_file)
            os.rename(stomped_file + '.zip', stomped_file)
        return True
    else:
        return False
def read_meta_txm(file_name, meta_data_name=None):

    try:
        olef.isOleFile(file_name)
        
        meta_data= None
        ole = olef.OleFileIO(file_name)
        if ole.exists('ImageInfo/NoOfImages'):                  
            stream = ole.openstream('ImageInfo/NoOfImages')
            data = stream.read()
            n_images = struct.unpack('<I', data)
            number_of_images = n_images[0]

            if ole.exists(meta_data_name):
                print 'Reading: [%s].' % meta_data_name
                stream = ole.openstream(meta_data_name)
                data = stream.read()
                if (meta_data_name == 'ImageInfo/Date'):
                    meta_data = struct.unpack('<'+'17s23x'*number_of_images, data)
                else:
                    struct_fmt = "<{}f".format(number_of_images)
                    meta_data = struct.unpack(struct_fmt, data)            
            ole.close()

    except KeyError:
        print 'Reading: [%s] failed.' % file_name
        meta_data = None

    return np.asarray(meta_data)
Example #3
0
def read_meta_txm(file_name, meta_data_name=None):

    try:
        olef.isOleFile(file_name)

        meta_data = None
        ole = olef.OleFileIO(file_name)
        if ole.exists('ImageInfo/NoOfImages'):
            stream = ole.openstream('ImageInfo/NoOfImages')
            data = stream.read()
            n_images = struct.unpack('<I', data)
            number_of_images = n_images[0]

            if ole.exists(meta_data_name):
                print 'Reading: [%s].' % meta_data_name
                stream = ole.openstream(meta_data_name)
                data = stream.read()
                if (meta_data_name == 'ImageInfo/Date'):
                    meta_data = struct.unpack(
                        '<' + '17s23x' * number_of_images, data)
                else:
                    struct_fmt = "<{}f".format(number_of_images)
                    meta_data = struct.unpack(struct_fmt, data)
            ole.close()

    except KeyError:
        print 'Reading: [%s] failed.' % file_name
        meta_data = None

    return np.asarray(meta_data)
Example #4
0
 def __init__(self, file):
     self.format = "ooxml"
     file.seek(
         0)  # TODO: Investigate the effect (required for olefile.isOleFile)
     # olefile cannot process non password protected ooxml files.
     if olefile.isOleFile(file):
         ole = olefile.OleFileIO(file)
         self.file = ole
         self.type, self.info = _parseinfo(
             self.file.openstream('EncryptionInfo'))
         logger.debug("OOXMLFile.type: {}".format(self.type))
         self.secret_key = None
         if self.type == 'agile':
             # TODO: Support aliases?
             self.keyTypes = ('password', 'private_key', 'secret_key')
         elif self.type == 'standard':
             self.keyTypes = ('password', 'secret_key')
         elif self.type == 'extensible':
             pass
     elif zipfile.is_zipfile(file):
         self.file = file
         self.type, self.info = None, None
         self.secret_key = None
     else:
         raise Exception("Unsupported file format")
Example #5
0
    def is_file_encrypted(filepath):
        if olefile.isOleFile(filepath):
            return FileEncryptionValidator._validate_ole_file(filepath)
        elif zipfile.is_zipfile(filepath):
            return FileEncryptionValidator._validate_zip_file(filepath)

        return None
def parse_snt_file(snt_file):
    if not olefile.isOleFile(snt_file):
        print "This is not an OLE file"
        return None
    ole = olefile.OleFileIO(snt_file)
    note = {}
    for stream in ole.listdir():
        if stream[0].count("-") == 3:
            if stream[0] not in note:
                note[stream[0]] = {
                    # Read timestamps
                    "created": ole.getctime(stream[0]),
                    "modified": ole.getmtime(stream[0])
                }

            content = None
            if stream[1] == '0':
                # Parse RTF text
                content = ole.openstream(stream).read()
            elif stream[1] == '3':
                # Parse UTF text
                content = ole.openstream(stream).read().decode("utf-16")

            if content:
                note[stream[0]][stream[1]] = content

    return note
Example #7
0
 def match(cls, meta, local_path):
     if isOleFile(local_path):
         if meta.mime_type in cls.MIME_TYPES:
             return cls.BASE_SCORE
         if meta.extension in cls.EXTENSIONS:
             return cls.BASE_SCORE
     return -1
Example #8
0
 def extract_ole_metadata(self, file_path):
     with open(file_path, 'r') as fh:
         if not isOleFile(fh):
             return
         fh.seek(0)
         ole = OleFileIO(fh)
         self.extract_olefileio_metadata(ole)
Example #9
0
def extract_office2003_from_unknown_office(fullpath, fileobj=None):
    '''
    
    从不明 office(可能是 office2003, office2007) 中解出内嵌的 office2003
    :return: [
            (host_fullpath,filename_from_host,<file_open_handler>),
            ]
    '''

    import zipfile
    import olefile
    import io

    r = []

    if olefile.isOleFile(fileobj if fileobj else fullpath):
        r.append((fullpath, os.path.basename(fullpath),
                  fileobj if fileobj else open(fullpath, 'rb')))

    elif zipfile.is_zipfile(fileobj if fileobj else fullpath):
        with zipfile.ZipFile(fileobj if fileobj else fullpath) as z:
            for subfile in z.namelist():
                with z.open(subfile) as zt:
                    magic = zt.read(len(olefile.MAGIC))
                    if magic == olefile.MAGIC:
                        r.append((fullpath, io_text_arg(subfile),
                                  io.BytesIO(z.open(subfile).read())))
    else:
        raise ValueError(u'not office file')

    return r
Example #10
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-k',
                       dest='secret_key',
                       help='MS-OFFCRYPTO secretKey value (hex)')
    group.add_argument('-p',
                       dest='private_key',
                       type=argparse.FileType('rb'),
                       help='RSA private key file')
    group.add_argument('-P', dest='password', help='Password ASCII')
    parser.add_argument('infile', nargs='?', type=argparse.FileType('rb'))
    parser.add_argument('outfile', nargs='?', type=argparse.FileType('wb'))
    args = parser.parse_args()

    if not olefile.isOleFile(args.infile):
        raise AssertionError("No OLE file")

    file = OfficeFile(args.infile)

    if args.secret_key:
        file.load_skey(binascii.unhexlify(args.secret_key))
    elif args.private_key:
        file.load_privkey(args.private_key)
    elif args.password:
        file.load_password(args.password)

    file.decrypt(args.outfile)
Example #11
0
    def check(self):
        """
        Open file and run all checks on it.

        :returns: list of all :py:class:`Indicator`s created
        """
        # check if it is actually an OLE file:
        oleformat = Indicator('ole_format', True, name='OLE format')
        self.indicators.append(oleformat)
        if self.ole:
            oleformat.value = True
        elif not olefile.isOleFile(self.filename):
            oleformat.value = False
            return self.indicators
        else:
            # parse file:
            self.ole = olefile.OleFileIO(self.filename)
        # checks:
        self.check_properties()
        self.check_encrypted()
        self.check_word()
        self.check_excel()
        self.check_powerpoint()
        self.check_visio()
        self.check_object_pool()
        self.check_flash()
        self.ole.close()
        return self.indicators
Example #12
0
def main():
    
    p = ArgumentParser(description="Lord Of The Bups")
    p.add_argument("-f", "--file", help=".bup file to parse")
    p.add_argument("-c", "--corrupt", help="Attempt to parse the Details portion of a corrupted bup file", action="store_true")
    p.add_argument("-d", "--details", help="Print detection details", action="store_true")
    p.add_argument("-o", "--output", help="Specify an output directory for the decoded files")
    args = p.parse_args()

    if args.file:
        if os.path.isfile(args.file):
            if olefile.isOleFile(args.file):
                b = Bupectomy()
            else:
                sys.exit("[ - ] Not an OLE file")
        else:
            sys.exit("[ - ] {} not found".format(args.file))

        if args.details:
            b.extractfiles(args.file)
            b.details_to_json()
            print b.details

        elif args.output:
            b.extractfiles(args.file)
            b.details_to_json()
            b.writefiles(args.output)

        elif args.corrupt:
            b.corrupted_bup(args.file)

        else:
            b.extractfiles(args.file)
            b.details_to_json()
            b.writefiles()
Example #13
0
    def load_data(self):
        tmp_list = []
        hex_content = []
        total = []
        hex_arr = []
        total_list = []
        total_arr = []

        #check if olefile
        for file in self.im_file:
            im_file = 'hwp_seeds\\' + file

            if not olefile.isOleFile(im_file):
                self.im_file.remove(file)

        for file in self.im_file:
            ole = olefile.OleFileIO(im_file, write_mode=True)
            stream = ole.openstream(field)
            data = stream.read()
            stream.seek(0)
            hex_content.append(data)
        #print(len(hex_content)) #274

        for string_hex in hex_content:
            string_hex = binascii.hexlify(string_hex)
            hex_list = [
                int(string_hex[i:i + 2], 16)
                for i in range(0, len(string_hex), 2)
            ]
            hex_arr = np.asarray(hex_list)
            total_list.append(hex_arr)
        total_arr = np.asarray(total_list)
        print(total_arr.shape)  #(290,101430)
def get_rfa_info(rfa_file):
    rfa_version = "unknown"
    rfa_owner = "unknown"
    try:
        creationdate = time.strftime('%m/%d/%Y', time.gmtime(os.path.getctime(rfa_file)))
        creationtime = time.strftime('%H:%M:%S', time.gmtime(os.path.getctime(rfa_file)))
        modifieddate = time.strftime('%m/%d/%Y', time.gmtime(os.path.getmtime(rfa_file)))
        modifiedtime = time.strftime('%H:%M:%S', time.gmtime(os.path.getmtime(rfa_file)))
    except:
        creationdate, creationtime, modifieddate, modifiedtime = "unknown", "unknown", "unknown", "unknown"
    try: 
        sd = win32security.GetFileSecurity(rfa_file, win32security.OWNER_SECURITY_INFORMATION)
        owner_sid = sd.GetSecurityDescriptorOwner ()
        owner_name, owner_domain, type = win32security.LookupAccountSid(None, owner_sid)
        rfa_owner = f'{owner_domain}\\{owner_name}'
        if olefile.isOleFile(rfa_file):
            try:
              rfa_ole = olefile.OleFileIO(rfa_file)
              rfa_bfi = rfa_ole.openstream("BasicFileInfo")
              rfa_file_info = str(rfa_bfi.read()).replace("\\x00", "")
              adesk_version_pattern = re.compile(r'Autodesk Revit \d{4}|Format..\d{4}')
              rfa_version = re.search(adesk_version_pattern, rfa_file_info)[0]
            except:
                pass
        else:
          print(f'file does not appear to be an ole file: {rfa_file}')
                
    except:
        pass

    return creationdate, creationtime, modifieddate, modifiedtime, rfa_owner, rfa_version
Example #15
0
 def _winoffice(self):
     """Process a winoffice file using olefile/oletools."""
     oid = oletools.oleid.OleID(self.src_path)  # First assume a valid file
     if not olefile.isOleFile(self.src_path):
         # Manual processing, may already count as suspicious
         try:
             ole = olefile.OleFileIO(self.src_path, raise_defects=olefile.DEFECT_INCORRECT)
         except Exception:
             self.make_dangerous('Unparsable WinOffice file')
         if ole.parsing_issues:
             self.make_dangerous('Parsing issues with WinOffice file')
         else:
             if ole.exists('macros/vba') or ole.exists('Macros') \
                     or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
                 self.make_dangerous('WinOffice file containing a macro')
     else:
         indicators = oid.check()
         # Encrypted can be set by multiple checks on the script
         if oid.encrypted.value:
             self.make_dangerous('Encrypted WinOffice file')
         if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                 or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
             self.make_dangerous('WinOffice file containing a macro')
         for i in indicators:
             if i.id == 'ObjectPool' and i.value:
                 self.make_dangerous('WinOffice file containing an object pool')
             elif i.id == 'flash' and i.value:
                 self.make_dangerous('WinOffice file with embedded flash')
     self.add_description('WinOffice file')
Example #16
0
 def _winoffice(self):
     """Process a winoffice file using olefile/oletools."""
     # LOG: processing_type property
     self.set_property('processing_type', 'WinOffice')
     oid = oletools.oleid.OleID(self.src_path)  # First assume a valid file
     if not olefile.isOleFile(self.src_path):
         # Manual processing, may already count as suspicious
         try:
             ole = olefile.OleFileIO(self.src_path,
                                     raise_defects=olefile.DEFECT_INCORRECT)
         except:
             self.make_dangerous('not parsable')
         if ole.parsing_issues:
             self.make_dangerous('parsing issues')
         else:
             if ole.exists('macros/vba') or ole.exists('Macros') \
                     or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
                 self.make_dangerous('macro')
     else:
         indicators = oid.check()
         # Encrypted can be set by multiple checks on the script
         if oid.encrypted.value:
             self.make_dangerous('encrypted')
         if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                 or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
             self.make_dangerous('macro')
         for i in indicators:
             if i.id == 'ObjectPool' and i.value:
                 # TODO: Is it suspicious?
                 # LOG: user defined property
                 self.set_property('objpool', True)
             elif i.id == 'flash' and i.value:
                 self.make_dangerous('flash')
Example #17
0
def getHashes(bupname,htype):
    #
    #Return a dictionary of stream name and hash. 
    #
    try:
        if olefile.isOleFile(bupname) is not True:
            print >>sys.stderr, 'Error - %s is not a valid OLE file.' % bupname
            sys.exit(1)

        ole = olefile.OleFileIO(bupname)                
        hashes = {}
        for entry in ole.listdir():
            if entry[0] != "Details":
                fdata = ole.openstream(entry[0]).read()
                ptext = decryptStream(fdata)
                if htype == 'md5':
                    m = hashlib.md5() 
                elif htype == 'sha1':
                    m = hashlib.sha1() 
                elif htype == 'sha256':
                    m = hashlib.sha256() 
                m.update(ptext)
                hashes[entry[0]] = m.hexdigest()                    
        ole.close()        
        return hashes
    except Exception as e:
        print >>sys.stderr, 'Error - %s' % e
        sys.exit(1)
Example #18
0
def main():
    args = parser.parse_args()

    if args.test_encrypted:
        if not is_encrypted(args.infile):
            print("{}: not encrypted".format(args.infile.name),
                  file=sys.stderr)
            sys.exit(1)
        else:
            logger.debug("{}: encrypted".format(args.infile.name))
        return

    if not olefile.isOleFile(args.infile):
        raise AssertionError("Not OLE file")

    if args.verbose:
        logger.removeHandler(logging.NullHandler())
        logging.basicConfig(level=logging.DEBUG, format="%(message)s")

    file = OfficeFile(args.infile)

    if args.password:
        file.load_key(password=args.password)
    else:
        raise AssertionError("Password is required")

    if args.outfile is None:
        ifWIN32SetBinary(sys.stdout)
        if hasattr(sys.stdout, 'buffer'):  # For Python 2
            args.outfile = sys.stdout.buffer
        else:
            args.outfile = sys.stdout

    file.decrypt(args.outfile)
Example #19
0
def BasicInfoGa(targetFile):
    # Check for ole structures
    if isOleFile(targetFile) == True:
        print(f"{infoS} Ole File: {green}True{white}")
    else:
        print(f"{infoS} Ole File: {red}False{white}")

    # Check for encryption
    if is_encrypted(targetFile) == True:
        print(f"{infoS} Encrypted: {green}True{white}")
    else:
        print(f"{infoS} Encrypted: {red}False{white}")
    
    # VBA_MACRO scanner
    vbascan = OleID(targetFile)
    vbascan.check()
    # Sanitizing the array
    vba_params = []
    for vb in vbascan.indicators:
        vba_params.append(vb.id)

    if "vba_macros" in vba_params:
        for vb in vbascan.indicators:
            if vb.id == "vba_macros":
                if vb.value == True:
                    print(f"{infoS} VBA Macros: {green}Found{white}")
                    MacroHunter(targetFile)
                else:
                    print(f"{infoS} VBA Macros: {red}Not Found{white}")
    else:
        MacroHunter(targetFile)
Example #20
0
def unquarantine(f):
    base = os.path.basename(f)
    realbase, ext = os.path.splitext(base)

    if not HAVE_OLEFILE:
        log.info("Missed olefile dependency: pip install olefile")
    if ext.lower() == ".bup" or (HAVE_OLEFILE and olefile.isOleFile(f)):
        try:
            return mcafee_unquarantine(f)
        except:
            pass

    if ext.lower() in func_map:
        try:
            return func_map[ext.lower()](f)
        except Exception as e:
            print(e)
            pass

    for func in (kav_unquarantine, trend_unquarantine, sep_unquarantine,
                 mse_unquarantine, xorff_unquarantine):
        try:
            quarfile = func(f)
            if quarfile:
                return quarfile
        except:
            pass
Example #21
0
 def __init__(self, file):
     self.format = "ooxml"
     file.seek(
         0)  # TODO: Investigate the effect (required for olefile.isOleFile)
     # olefile cannot process non password protected ooxml files.
     # TODO: this code is duplicate of OfficeFile(). Merge?
     if olefile.isOleFile(file):
         ole = olefile.OleFileIO(file)
         self.file = ole
         with self.file.openstream("EncryptionInfo") as stream:
             self.type, self.info = _parseinfo(stream)
         logger.debug("OOXMLFile.type: {}".format(self.type))
         self.secret_key = None
         if self.type == "agile":
             # TODO: Support aliases?
             self.keyTypes = ("password", "private_key", "secret_key")
         elif self.type == "standard":
             self.keyTypes = ("password", "secret_key")
         elif self.type == "extensible":
             pass
     elif zipfile.is_zipfile(file):
         self.file = file
         self.type, self.info = None, None
         self.secret_key = None
     else:
         raise Exception("Unsupported file format")
Example #22
0
    def check(self):
        """
        Open file and run all checks on it.

        :returns: list of all :py:class:`Indicator`s created
        """
        # check if it is actually an OLE file:
        oleformat = Indicator('ole_format', True, name='OLE format')
        self.indicators.append(oleformat)
        if self.ole:
            oleformat.value = True
        elif not olefile.isOleFile(self.filename):
            oleformat.value = False
            return self.indicators
        else:
            # parse file:
            self.ole = olefile.OleFileIO(self.filename)
        # checks:
        self.check_properties()
        self.check_encrypted()
        self.check_word()
        self.check_excel()
        self.check_powerpoint()
        self.check_visio()
        self.check_object_pool()
        self.check_flash()
        self.ole.close()
        return self.indicators
Example #23
0
def test(filenames, ole_file_class=OleRecordFile,
         must_parse=None, do_per_record=None, verbose=False):
    """ parse all given file names and print rough structure

    if an error occurs while parsing a stream of type in must_parse, the error
    will be raised. Otherwise a message is printed
    """
    logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
    if do_per_record is None:
        def do_per_record(record):         # pylint: disable=function-redefined
            pass   # do nothing
    if not filenames:
        logging.info('need file name[s]')
        return 2
    for filename in filenames:
        logging.info('checking file {0}'.format(filename))
        if not olefile.isOleFile(filename):
            logging.info('not an ole file - skip')
            continue
        ole = ole_file_class(filename)

        for stream in ole.iter_streams():
            logging.info('  parse ' + str(stream))
            try:
                for record in stream.iter_records():
                    logging.info('    ' + str(record))
                    do_per_record(record)
            except Exception:
                if not must_parse:
                    raise
                elif isinstance(stream, must_parse):
                    raise
                else:
                    logging.info('  failed to parse', exc_info=True)
    return 0
Example #24
0
def printDump(bupname, DumpFunction=IdentityFunction, allfiles=False):
    #
    #Print Hex dump/Hex-ASCII dump of first or all streams
    #
    if sys.platform == 'win32' and DumpFunction == IdentityFunction:
        import msvcrt
        msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
    try:
        if olefile.isOleFile(bupname) is not True:
            print >>sys.stderr, 'Error - %s is not a valid OLE file.' % bupname
            sys.exit(1)

        ole = olefile.OleFileIO(bupname)
        printNewline = False
        for entry in ole.listdir():
            if entry[0] != "Details":
                if printNewline:
                    print
                printNewline = True
                StdoutWriteChunked(DumpFunction(decryptStream(ole.openstream(entry[0]).read())))
                if not allfiles:
                    break
        ole.close()
    except Exception as e:
        print >>sys.stderr, 'Error - %s' % e
        sys.exit(1)
Example #25
0
 def _winoffice(self):
     """Process a winoffice file using olefile/oletools."""
     oid = oletools.oleid.OleID(self.src_path)  # First assume a valid file
     if not olefile.isOleFile(self.src_path):
         # Manual processing, may already count as suspicious
         try:
             ole = olefile.OleFileIO(self.src_path,
                                     raise_defects=olefile.DEFECT_INCORRECT)
         except Exception:
             self.make_dangerous('Unparsable WinOffice file')
         if ole.parsing_issues:
             self.make_dangerous('Parsing issues with WinOffice file')
         else:
             if ole.exists('macros/vba') or ole.exists('Macros') \
                     or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
                 self.make_dangerous('WinOffice file containing a macro')
     else:
         indicators = oid.check()
         # Encrypted can be set by multiple checks on the script
         if oid.encrypted.value:
             self.make_dangerous('Encrypted WinOffice file')
         if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                 or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
             self.make_dangerous('WinOffice file containing a macro')
         for i in indicators:
             if i.id == 'ObjectPool' and i.value:
                 self.make_dangerous(
                     'WinOffice file containing an object pool')
             elif i.id == 'flash' and i.value:
                 self.make_dangerous('WinOffice file with embedded flash')
     self.add_description('WinOffice file')
def main():
    args = parser.parse_args()

    if not olefile.isOleFile(args.infile):
        raise AssertionError("No OLE file")

    file = OfficeFile(args.infile)

    if args.verbose:
        logger.removeHandler(logging.NullHandler())
        logging.basicConfig(level=logging.DEBUG, format="%(message)s")

    if args.password:
        file.load_key(password=args.password)
    else:
        raise AssertionError("Password is required")

    if args.outfile == None:
        ifWIN32SetBinary(sys.stdout)
        if hasattr(sys.stdout, 'buffer'):  ## For Python 2
            args.outfile = sys.stdout.buffer
        else:
            args.outfile = sys.stdout

    file.decrypt(args.outfile)
Example #27
0
    def test_rough_doctype(self):
        """Checks all samples, expect either ole files or good ooxml output"""
        # map from extension to expected doctype
        ext2doc = dict(
            docx=ooxml.DOCTYPE_WORD,
            docm=ooxml.DOCTYPE_WORD,
            dotx=ooxml.DOCTYPE_WORD,
            dotm=ooxml.DOCTYPE_WORD,
            xml=(ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_WORD_XML),
            xlsx=ooxml.DOCTYPE_EXCEL,
            xlsm=ooxml.DOCTYPE_EXCEL,
            xlsb=ooxml.DOCTYPE_EXCEL,
            xlam=ooxml.DOCTYPE_EXCEL,
            xltx=ooxml.DOCTYPE_EXCEL,
            xltm=ooxml.DOCTYPE_EXCEL,
            pptx=ooxml.DOCTYPE_POWERPOINT,
            pptm=ooxml.DOCTYPE_POWERPOINT,
            ppsx=ooxml.DOCTYPE_POWERPOINT,
            ppsm=ooxml.DOCTYPE_POWERPOINT,
            potx=ooxml.DOCTYPE_POWERPOINT,
            potm=ooxml.DOCTYPE_POWERPOINT,
        )

        # files that are neither OLE nor xml:
        except_files = 'empty', 'text'
        except_extns = 'rtf', 'csv'

        # analyse all files in data dir
        for base_dir, _, files in os.walk(DATA_BASE_DIR):
            for filename in files:
                if filename in except_files:
                    if self.DO_DEBUG:
                        print('skip file: ' + filename)
                    continue
                extn = splitext(filename)[1]
                if extn:
                    extn = extn[1:]  # remove the dot
                if extn in except_extns:
                    if self.DO_DEBUG:
                        print('skip extn: ' + filename)
                    continue

                full_name = join(base_dir, filename)
                if isOleFile(full_name):
                    if self.DO_DEBUG:
                        print('skip ole: ' + filename)
                    continue
                acceptable = ext2doc[extn]
                if not isinstance(acceptable, tuple):
                    acceptable = (acceptable, )
                try:
                    doctype = ooxml.get_type(full_name)
                except Exception:
                    self.fail('Failed to get doctype of {0}'.format(filename))
                self.assertTrue(
                    doctype in acceptable,
                    msg='Doctype "{0}" for {1} not acceptable'.format(
                        doctype, full_name))
                if self.DO_DEBUG:
                    print('ok: {0} --> {1}'.format(filename, doctype))
Example #28
0
def test(filenames, ole_file_class=OleRecordFile,
         must_parse=None, do_per_record=None, verbose=False):
    """ parse all given file names and print rough structure

    if an error occurs while parsing a stream of type in must_parse, the error
    will be raised. Otherwise a message is printed
    """
    logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
    if do_per_record is None:
        def do_per_record(record):         # pylint: disable=function-redefined
            pass   # do nothing
    if not filenames:
        logging.info('need file name[s]')
        return 2
    for filename in filenames:
        logging.info('checking file {0}'.format(filename))
        if not olefile.isOleFile(filename):
            logging.info('not an ole file - skip')
            continue
        ole = ole_file_class(filename)

        for stream in ole.iter_streams():
            logging.info('  parse ' + str(stream))
            try:
                for record in stream.iter_records():
                    logging.info('    ' + str(record))
                    do_per_record(record)
            except Exception:
                if not must_parse:
                    raise
                elif isinstance(stream, must_parse):
                    raise
                else:
                    logging.info('  failed to parse', exc_info=True)
    return 0
Example #29
0
    def __init__(self, data: bytes):
        self.oid: Optional[oletools.oleid.OleID] = None

        if isOleFile(data):
            ole_file = OleFileIO(data)
            self.oid = oletools.oleid.OleID(ole_file)
            self.oid.check()
def validate(msg):
    validation_dict = {
        'input': {
            'class': get_full_class_name(msg), # Get the full name of the class
            'has_len': has_len(msg), # Does the input have a __len__ attribute?
            'len': len(msg) if has_len(msg) else None, # If input has __len__, put the value here
        },
        'olefile': {
            'valid': olefile.isOleFile(msg),
        },
    }
    if validation_dict['olefile']['valid']:
        validation_dict['message'] = {
            'initializes': False,
        }
        try:
            msg_instance = Message(msg)
        except NotImplementedError:
            # Should we have a special procedure for handling it if we get "not implemented"?
            pass
        except:
            pass
        else:
            validation_dict['message']['initializes'] = True
            validation_dict['message']['msg'] = validate_msg(msg_instance)
    return validation_dict
Example #31
0
    def getZipFiles(self, attachment, filename):
        '''
			Checks a zip for parsable files and extracts all macros
		'''
        log.debug(
            "[%d] Found attachment with archive extension - file name: %s" %
            (self.id, filename))
        vba_code_all_modules = ''
        file_object = StringIO.StringIO(attachment)
        files_in_zip = self.zipwalk(file_object, 0, [])

        for zip_name, zip_data in files_in_zip:
            # checks if it is a file

            zip_mem_data = StringIO.StringIO(zip_data)
            name, ext = os.path.splitext(zip_name.filename)
            # send to the VBA_Parser
            # fallback with extensions - maybe removed in future releases
            if olefile.isOleFile(zip_mem_data) or ext in EXTENSIONS:
                log.info(
                    "[%d] File in zip detected! Name: %s - check for VBA" %
                    (self.id, zip_name.filename))
                vba_parser = olevba.VBA_Parser(filename=zip_name.filename,
                                               data=zip_data)
                for (subfilename, stream_path, vba_filename,
                     vba_code) in vba_parser.extract_all_macros():
                    vba_code_all_modules += vba_code + '\n'
        return vba_code_all_modules
Example #32
0
 def match(cls, meta, local_path):
     if isOleFile(local_path):
         if meta.mime_type in cls.MIME_TYPES:
             return cls.BASE_SCORE
         if meta.extension in cls.EXTENSIONS:
             return cls.BASE_SCORE
     return -1
def _get_reference(ole, txrm_name, custom_reference, ignore_reference):
    if custom_reference is not None:
        logging.info("%s is being processed with file %s as a reference.",
                     txrm_name, custom_reference.name)
        reference_path = str(custom_reference)
        try:
            if isOleFile(reference_path):
                with OleFileIO(reference_path) as ref_ole:
                    references = txrm_wrapper.extract_all_images(
                        ref_ole)  # should be float for averaging & dividing
            elif ".tif" in reference_path:
                with tf.TiffFile(reference_path) as tif:
                    references = np.asarray(tif.pages[:])
            else:
                msg = f"Unable to open file '{reference_path}'. Only tif/tiff or xrm/txrm files are supported for custom references."
                logging.error(msg)
                raise IOError(msg)
        except:
            logging.error("Error occurred reading custom reference",
                          exc_info=True)
            raise
        if len(references) > 1:
            # if reference file is an image stack take median of the images
            return _dynamic_despeckle_and_average_series(references)
        return references[0]

    elif ole.exists("ReferenceData/Image") and not ignore_reference:
        logging.info("Internal reference will be applied to %s", txrm_name)
        return txrm_wrapper.extract_reference_image(ole)

    logging.debug("%s is being processed without a reference.", txrm_name)
    return None
Example #34
0
def main():
    args = parser.parse_args()

    if args.test_encrypted:
        if not is_encrypted(args.infile):
            print("{}: not encrypted".format(args.infile.name), file=sys.stderr)
            sys.exit(1)
        else:
            logger.debug("{}: encrypted".format(args.infile.name))
        return

    if not olefile.isOleFile(args.infile):
        raise AssertionError("Not OLE file")

    if args.verbose:
        logger.removeHandler(logging.NullHandler())
        logging.basicConfig(level=logging.DEBUG, format="%(message)s")

    file = OfficeFile(args.infile)

    if args.password:
        # this will always raise an error for 2000-03 files, cannot be decrypted.
        # TODO: check and return output stating such, allowing safedocs to ignore file.
        file.load_key(password=args.password)
    else:
        raise AssertionError("Password is required")

    if args.outfile is None:
        ifWIN32SetBinary(sys.stdout)
        if hasattr(sys.stdout, 'buffer'):  # For Python 2
            args.outfile = sys.stdout.buffer
        else:
            args.outfile = sys.stdout

    file.decrypt(args.outfile)
Example #35
0
    def collectMetaDataFromFile(self, file):
        allAssetMetaData = None

        if olefile.isOleFile(file):
            with olefile.OleFileIO(file) as ole:
                oleDirs = ole.listdir()
                streamName = ''
                streamData = b''
                hasResolvedPath = False
                if ole.exists('FileAssetMetaData2'):
                    streamName = 'FileAssetMetaData2'
                elif ole.exists('FileAssetMetaData3'):
                    streamName = 'FileAssetMetaData3'
                    hasResolvedPath = True
                else:
                    return None

                oleStream = ole.openstream(streamName)

                allAssetMetaData = []

                for a in self.readStream(oleStream, hasResolvedPath):
                    #print(a)
                    allAssetMetaData.append(a)

            return allAssetMetaData

        else:
            return allAssetMetaData
Example #36
0
def validate(msg):
    validation_dict = {
        'input': {
            'class': get_full_class_name(msg), # Get the full name of the class
            'has_len': has_len(msg), # Does the input have a __len__ attribute?
            'len': len(msg) if has_len(msg) else None, # If input has __len__, put the value here
        },
        'olefile': {
            'valid': olefile.isOleFile(msg),
        },
    }
    if validation_dict['olefile']['valid']:
        validation_dict['message'] = {
            'initializes': False,
        }
        try:
            msg_instance = Message(msg)
        except NotImplementedError:
            # Should we have a special procedure for handling it if we get "not implemented"?
            pass
        except:
            pass
        else:
            validation_dict['message']['initializes'] = True
            validation_dict['message']['msg'] = validate_msg(msg_instance)
    return validation_dict
Example #37
0
def open_file(filename):
    """ try to open somehow as zip or ole or so; raise exception if fail """
    try:
        if olefile.isOleFile(filename):
            print('is ole file: ' + filename)
            # todo: try ppt_parser first
            yield olefile.OleFileIO(filename)
        elif is_zipfile(filename):
            print('is zip file: ' + filename)
            zipper = ZipFile(filename, 'r')
            for subfile in zipper.namelist():
                head = b''
                try:
                    head = zipper.open(subfile).read(len(olefile.MAGIC))
                except RuntimeError:
                    print('zip is encrypted: ' + filename)  # todo: passwords?!
                    yield None

                if head == olefile.MAGIC:
                    print('  unzipping ole: ' + subfile)
                    yield olefile.OleFileIO(
                        zipper.open(subfile).read(MAX_SIZE))
                else:
                    pass  # print('unzip skip: ' + subfile)
        else:  # todo: add more file types
            print('open failed: ' + filename)
            yield None  # --> leads to non-0 return code
    except Exception:
        print_exc()
        yield None  # --> leads to non-0 return code
Example #38
0
    def is_valid_msg_file(self):
        if not os.path.exists(self.msg_file_path):
            return False

        if not isOleFile(self.msg_file_path):
            return False

        return True
Example #39
0
    def __init__(self, filedir, endian='<'):
        self.file_dir = os.path.normpath(filedir)
        self._endian = endian
        assert olefile.isOleFile(
            filedir), 'Input file should be an OLE container'
        self.ole = olefile.OleFileIO(filename=filedir, write_mode=False)

        self.clear_cache()
Example #40
0
    def __init__(self, olefile, path='', parent=None):
        if not hasattr(olefile, 'openstream'):
            isOleFile = import_isOleFile()
            OleFileIO = import_OleFileIO()

            if not isOleFile(olefile):
                errormsg = 'Not an OLE2 Compound Binary File.'
                raise InvalidOleStorageError(errormsg)
            olefile = OleFileIO(olefile)
        OleStorageItem.__init__(self, olefile, path, parent)
 def dubbel_klik(self, event):
     # methode voor het lezen van bestanden of navigeren van directories via de bestandList
     selectie = str(self.bestandList.get(self.bestandList.curselection()))
     fp = self.dirVar + selectie
     # Pak de filepath omdat deze nog wel vaker gebruikt wordt.
     # pak de selectie zonder extensies
     # rpartition returnt "" als het scheidingsteken niet gevonden kan worden
     if selectie.rpartition(".")[0] == "":
         self.update_directory(self.dirVar + selectie.rpartition(".")[2])
         # update de directory als er geen extensie is en het dus een map is.
     else:
         # anders kijken of het bestand een pdf of office bestand is
         if selectie.rpartition(".")[2] == "pdf":
             try:
                 file = open(fp, "rb")
                 parser = PDFParser(file)
                 # open het bestand en een parser
                 doc = PDFDocument(parser, None)
                 docDict = doc.info[0]
                 # parse het document. Om een of andere reden is doc.info een
                 # lijst met een dictionary als enig item. Haal de dictionary eruit.
                 self.console_print('"' + selectie + '" is een pdf-bestand.')
                 for tag in docDict.keys():
                     if docDict[tag] != "":
                         self.console_print("%s: %s" % (tag, docDict[tag]))
                     # print alle tags die niet leeg zijn. %s en %s zijn
                     # respectievelijk de eerste en tweede waarden ernaast.
                 file.close()
             except (IOError, WindowsError):
                 self.console_print('"' + selectie + '" kan niet gelezen worden.')
         # Noot: Olefile leest geen nieuwe office extensies zoals docx.
         if olefile.isOleFile(fp) == True:
             try:
                 ole = olefile.OleFileIO(fp)
                 meta = ole.get_metadata()
                 # open het bestand in read-only modus, en lees de metadata
                 if ole.exists("WordDocument"):
                     self.console_print('"' + selectie + '" is een wordbestand.')
                 if ole.exists("PowerPoint Document"):
                     self.console_print('"' + selectie + '" is een powerpointbestand.')
                 if ole.exists("Workbook"):
                     self.console_print('"' + selectie + '" is een excelbestand.')
                 # Kijk of er in de storages aangegeven wordt dat het bestand een
                 # word, powerpoint danwel excelbestand is.
                 metaDict = dict(Auteur=str(meta.author), Titel=str(meta.title), Creatiedatum=str(meta.create_time))
                 # Alleen de creatiedatum, auteur en titel zijn van belang. Maak er een dictionary mee.
                 for tag in metaDict.keys():
                     if metaDict[tag] == "":
                         metaDict[tag] = "Geen"
                     self.console_print("%s: %s" % (tag, metaDict[tag]))
                     # Als er geen waarde is, is het handiger als er Geen staat.
                 ole.close()
             except (IOError, WindowsError):
                 self.console_print('"' + selectie + '" kan niet gelezen worden.')
Example #42
0
def process_msg(subdir,file):
    if olefile.isOleFile(os.path.join(subdir,file))==False:
        process_mime_msg(subdir,file)
        return
    os.mkdir(os.path.join(subdir,file)+".dir")
    ole = olefile.OleFileIO(os.path.join(subdir,file))
    attach_list = get_msg_attach_list(ole)
    extract_msg_files(attach_list,ole,subdir,file)
    extract_msg_message(ole,subdir,file)
    ole.close()
    os.remove(os.path.join(subdir,file))
Example #43
0
    def test_rough_doctype(self):
        """Checks all samples, expect either ole files or good ooxml output"""
        # map from extension to expected doctype
        ext2doc = dict(
            docx=ooxml.DOCTYPE_WORD, docm=ooxml.DOCTYPE_WORD,
            dotx=ooxml.DOCTYPE_WORD, dotm=ooxml.DOCTYPE_WORD,
            xml=(ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_WORD_XML),
            xlsx=ooxml.DOCTYPE_EXCEL, xlsm=ooxml.DOCTYPE_EXCEL,
            xlsb=ooxml.DOCTYPE_EXCEL, xlam=ooxml.DOCTYPE_EXCEL,
            xltx=ooxml.DOCTYPE_EXCEL, xltm=ooxml.DOCTYPE_EXCEL,
            pptx=ooxml.DOCTYPE_POWERPOINT, pptm=ooxml.DOCTYPE_POWERPOINT,
            ppsx=ooxml.DOCTYPE_POWERPOINT, ppsm=ooxml.DOCTYPE_POWERPOINT,
            potx=ooxml.DOCTYPE_POWERPOINT, potm=ooxml.DOCTYPE_POWERPOINT,
            ods=ooxml.DOCTYPE_NONE, odt=ooxml.DOCTYPE_NONE,
            odp=ooxml.DOCTYPE_NONE,
        )

        # files that are neither OLE nor xml:
        except_files = 'empty', 'text'
        except_extns = 'rtf', 'csv'

        # analyse all files in data dir
        for base_dir, _, files in os.walk(DATA_BASE_DIR):
            for filename in files:
                if filename in except_files:
                    if self.DO_DEBUG:
                        print('skip file: ' + filename)
                    continue
                extn = splitext(filename)[1]
                if extn:
                    extn = extn[1:]      # remove the dot
                if extn in except_extns:
                    if self.DO_DEBUG:
                        print('skip extn: ' + filename)
                    continue

                full_name = join(base_dir, filename)
                if isOleFile(full_name):
                    if self.DO_DEBUG:
                        print('skip ole: ' + filename)
                    continue
                acceptable = ext2doc[extn]
                if not isinstance(acceptable, tuple):
                    acceptable = (acceptable, )
                try:
                    doctype = ooxml.get_type(full_name)
                except Exception:
                    self.fail('Failed to get doctype of {0}'.format(filename))
                self.assertTrue(doctype in acceptable,
                                msg='Doctype "{0}" for {1} not acceptable'
                                    .format(doctype, full_name))
                if self.DO_DEBUG:
                    print('ok: {0} --> {1}'.format(filename, doctype))
Example #44
0
    def close_destination(self, destination):
        if destination.cword == b'objdata':
            log.debug('*** Close object data at index %Xh' % self.index)
            rtfobj = RtfObject()
            self.objects.append(rtfobj)
            rtfobj.start = destination.start
            rtfobj.end = destination.end
            # Filter out all whitespaces first (just ignored):
            hexdata1 = destination.data.translate(None, b' \t\r\n\f\v')
            # Then filter out any other non-hex character:
            hexdata = re.sub(b'[^a-fA-F0-9]', b'', hexdata1)
            if len(hexdata) < len(hexdata1):
                # this is only for debugging:
                nonhex = re.sub(b'[a-fA-F0-9]', b'', hexdata1)
                log.debug('Found non-hex chars in hexdata: %r' % nonhex)
            # MS Word accepts an extra hex digit, so we need to trim it if present:
            if len(hexdata) & 1:
                log.debug('Odd length, trimmed last byte.')
                hexdata = hexdata[:-1]
            rtfobj.hexdata = hexdata
            object_data = binascii.unhexlify(hexdata)
            rtfobj.rawdata = object_data
            rtfobj.rawdata_md5 = hashlib.md5(object_data).hexdigest()                    
            # TODO: check if all hex data is extracted properly

            obj = oleobj.OleObject()
            try:
                obj.parse(object_data)
                rtfobj.format_id = obj.format_id
                rtfobj.class_name = obj.class_name
                rtfobj.oledata_size = obj.data_size
                rtfobj.oledata = obj.data
                rtfobj.oledata_md5 = hashlib.md5(obj.data).hexdigest()         
                rtfobj.is_ole = True
                if obj.class_name.lower() == b'package':
                    opkg = oleobj.OleNativeStream(bindata=obj.data,
                                                  package=True)
                    rtfobj.filename = opkg.filename
                    rtfobj.src_path = opkg.src_path
                    rtfobj.temp_path = opkg.temp_path
                    rtfobj.olepkgdata = opkg.data
                    rtfobj.olepkgdata_md5 = hashlib.md5(opkg.data).hexdigest()     
                    rtfobj.is_package = True
                else:
                    if olefile.isOleFile(obj.data):
                        ole = olefile.OleFileIO(obj.data)
                        rtfobj.clsid = ole.root.clsid
                        rtfobj.clsid_desc = clsid.KNOWN_CLSIDS.get(rtfobj.clsid,
                            'unknown CLSID (please report at https://github.com/decalage2/oletools/issues)')
            except:
                pass
                log.debug('*** Not an OLE 1.0 Object')
Example #45
0
def process_file(filepath, field_filter_mode=None):
    """ decides which of the process_* functions to call """
    if olefile.isOleFile(filepath):
        logger.debug('Is OLE. Checking streams to see whether this is xls')
        if xls_parser.is_xls(filepath):
            logger.debug('Process file as excel 2003 (xls)')
            return process_xls(filepath)

        # encrypted files also look like ole, even if office 2007+ (xml-based)
        # so check for encryption, first
        ole = olefile.OleFileIO(filepath, path_encoding=None)
        oid = oleid.OleID(ole)
        if oid.check_encrypted().value:
            log.debug('is encrypted - raise error')
            raise FileIsEncryptedError(filepath)
        elif oid.check_powerpoint().value:
            log.debug('is ppt - cannot have DDE')
            return u''
        else:
            logger.debug('Process file as word 2003 (doc)')
            return process_doc(ole)

    with open(filepath, 'rb') as file_handle:
        if file_handle.read(4) == RTF_START:
            logger.debug('Process file as rtf')
            return process_rtf(file_handle, field_filter_mode)

    try:
        doctype = ooxml.get_type(filepath)
        logger.debug('Detected file type: {0}'.format(doctype))
    except Exception as exc:
        logger.debug('Exception trying to xml-parse file: {0}'.format(exc))
        doctype = None

    if doctype == ooxml.DOCTYPE_EXCEL:
        logger.debug('Process file as excel 2007+ (xlsx)')
        return process_xlsx(filepath)
    elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
        logger.debug('Process file as xml from excel 2003/2007+')
        return process_excel_xml(filepath)
    elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
        logger.debug('Process file as xml from word 2003/2007+')
        return process_docx(filepath)
    elif doctype is None:
        logger.debug('Process file as csv')
        return process_csv(filepath)
    else:  # could be docx; if not: this is the old default code path
        logger.debug('Process file as word 2007+ (docx)')
        return process_docx(filepath, field_filter_mode)
Example #46
0
    def run(self):
        super(Office, self).run()
        if self.args is None:
            return

        if not __sessions__.is_set():
            self.log('error', "No session opened")
            return

        if not HAVE_OLE:
            self.log('error', "Missing dependency, install OleFileIO (`pip install olefile`)")
            return

        # Tests to check for valid Office structures.
        OLE_FILE = olefile.isOleFile(__sessions__.current.file.path)
        XML_FILE = zipfile.is_zipfile(__sessions__.current.file.path)
        if OLE_FILE:
            ole = olefile.OleFileIO(__sessions__.current.file.path)
        elif XML_FILE:
            zip_xml = zipfile.ZipFile(__sessions__.current.file.path, 'r')
        else:
            self.log('error', "Not a valid office document")
            return

        if self.args.export is not None:
            if OLE_FILE:
                self.export(ole, self.args.export)
            elif XML_FILE:
                self.xml_export(zip_xml, self.args.export)
        elif self.args.meta:
            if OLE_FILE:
                self.metadata(ole)
            elif XML_FILE:
                self.xmlmeta(zip_xml)
        elif self.args.streams:
            if OLE_FILE:
                self.metatimes(ole)
            elif XML_FILE:
                self.xmlstruct(zip_xml)
        elif self.args.oleid:
            if OLE_FILE:
                self.oleid(ole)
            else:
                self.log('error', "Not an OLE file")
        elif self.args.vba or self.args.code:
            self.parse_vba(self.args.code)
        else:
            self.log('error', 'At least one of the parameters is required')
            self.usage()
Example #47
0
def getDetails(bupname):
    try:
        if olefile.isOleFile(bupname) is not True:
            print >>sys.stderr, 'Error - %s is not a valid OLE file.' % bupname
            sys.exit(1)

        ole = olefile.OleFileIO(bupname)
        #clean this up later by catching exception
        data = ole.openstream("Details").read()
        ptext=decryptStream(data)
        ole.close()
        return ptext
    except Exception as e:
        print >>sys.stderr, 'Error - %s' % e
        sys.exit(1)
Example #48
0
 def _winoffice(self):
     self.cur_file.add_log_details("processing_type", "WinOffice")
     # Try as if it is a valid document
     oid = oletools.oleid.OleID(self.cur_file.src_path)
     if not olefile.isOleFile(self.cur_file.src_path):
         # Manual processing, may already count as suspicious
         try:
             ole = olefile.OleFileIO(self.cur_file.src_path, raise_defects=olefile.DEFECT_INCORRECT)
         except:
             self.cur_file.add_log_details("not_parsable", True)
             self.cur_file.make_dangerous()
         if ole.parsing_issues:
             self.cur_file.add_log_details("parsing_issues", True)
             self.cur_file.make_dangerous()
         else:
             if (
                 ole.exists("macros/vba")
                 or ole.exists("Macros")
                 or ole.exists("_VBA_PROJECT_CUR")
                 or ole.exists("VBA")
             ):
                 self.cur_file.add_log_details("macro", True)
                 self.cur_file.make_dangerous()
     else:
         indicators = oid.check()
         # Encrypted ban be set by multiple checks on the script
         if oid.encrypted.value:
             self.cur_file.add_log_details("encrypted", True)
             self.cur_file.make_dangerous()
         if (
             oid.macros.value
             or oid.ole.exists("macros/vba")
             or oid.ole.exists("Macros")
             or oid.ole.exists("_VBA_PROJECT_CUR")
             or oid.ole.exists("VBA")
         ):
             self.cur_file.add_log_details("macro", True)
             self.cur_file.make_dangerous()
         for i in indicators:
             if i._id == "ObjectPool" and i.value:
                 # FIXME: Is it suspicious?
                 self.cur_file.add_log_details("objpool", True)
             elif i._id == "flash" and i.value:
                 self.cur_file.add_log_details("flash", True)
                 self.cur_file.make_dangerous()
     self._safe_copy()
Example #49
0
def process_file(filename, extract):
    pkgobj = None 
    if olefile.isOleFile(filename):
        print ' [*] File is an OLE file...'
        process_olefile(filename)

    elif isstream(filename):
        with open(filename, 'rb') as f:
            sdata = f.read()
            print ' [*] File is an extracted Packager Stream'
            print ' [*] Stream contains Packager Formatted data...'
            pkgobj = PackagerStream(sdata[4:].encode('hex'))
            print
            print pkgobj
    else:
        with open(filename, 'rb') as f:
            file_data = f.read()
        if file_data[0:2] == "PK":
            print ' [*] File is a zip archive..searching for embedded objects..'
            archive = StringIO(file_data)
            zf = ZipFile(archive, "r")
            for name in zf.namelist():
                if 'oleObject' in name:
                    print ' [*] Found OLE object: %s' % name
                    pkgobj = process_olefile(zf.read(name))
                    if extract:
                        with open(pkgobj.gethash('md5'), 'wb') as out:
                            out.write(pkgobj.Data)

        else:
            # Treat the file as an rtf doc
            rd = RTFDoc(filename)
            print ' [*] Scanning file for embedded objects'
            rd.scan()

    if extract:
        try:
            print ' Extracting embedded data as %s' % pkgobj.gethash('md5')
            with open(pkgobj.gethash('md5'), 'wb') as out:
                out.write(pkgobj.Data)
        except Exception as  e:
            print ' [!] An error occurred while writing the file :: %s' % e 
 def check(self):
     # check if it is actually an OLE file:
     oleformat = Indicator('ole_format', True, name='OLE format')
     self.indicators.append(oleformat)
     if not olefile.isOleFile(self.filename):
         oleformat.value = False
         return self.indicators
     # parse file:
     self.ole = olefile.OleFileIO(self.filename)
     # checks:
     self.check_properties()
     self.check_encrypted()
     self.check_word()
     self.check_excel()
     self.check_powerpoint()
     self.check_visio()
     self.check_ObjectPool()
     self.check_flash()
     self.ole.close()
     return self.indicators
def main():
    import argparse
    parser = argparse.ArgumentParser()
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-k', dest='secret_key', help='MS-OFFCRYPTO secretKey value (hex)')
    group.add_argument('-p', dest='private_key', type=argparse.FileType('rb'), help='RSA private key file')
    parser.add_argument('infile', nargs='?', type=argparse.FileType('rb'))
    parser.add_argument('outfile', nargs='?', type=argparse.FileType('wb'))
    args = parser.parse_args()

    if not olefile.isOleFile(args.infile):
        raise AssertionError, "No OLE file"

    file = OfficeFile(args.infile)

    if args.secret_key:
        file.load_skey(binascii.unhexlify(args.secret_key))
    elif args.private_key:
        file.load_privkey(args.private_key)

    file.decrypt(args.outfile)
Example #52
0
def extract(infile, dirname=None):
    if dirname is None:
        dirname = os.getcwd()
    try:
        if olefile.isOleFile(infile) is not True:
            print >>sys.stderr, 'Error - %s is not a valid OLE file.' % infile
            sys.exit(1)
        
        ole = olefile.OleFileIO(infile)
        filelist = ole.listdir()
        for fname in filelist:
            if not ole.get_size(fname[0]):
                print 'Warning: The "%s" stream reports a size of 0. Possibly a corrupt bup.' % fname[0]
            data = ole.openstream(fname[0]).read()
            fp = open(os.path.join(dirname, fname[0]),'wb')
            fp.write(data)
            fp.close()
        ole.close()
        return filelist
    except Exception as e:
        print >>sys.stderr, 'Error - %s' % e
        sys.exit(1)
def process_file(filename, extract):
    pkgobj = None 
    if olefile.isOleFile(filename):
        print ' [*] File is an OLE file...'
        ole = olefile.OleFileIO(filename)
        filelist = ole.listdir()
        print ' [*] Processing Streams...'
        for fname in filelist:
            if '\x01Ole10Native' in fname:
                print ' [*] Found Ole10Native Stream...checking for packager data'
                sdata = ole.openstream(fname).read()
                if sdata[4:6].encode('hex') == '0200':
                    print ' [*] Stream contains Packager Formatted data...'
                    pkgobj = PackagerStream(sdata[4:].encode('hex'))
                    print
                    print pkgobj                    

    elif isstream(filename):
        with open(filename, 'rb') as f:
            sdata = f.read()
            print ' [*] File is an extracted Packager Stream'
            print ' [*] Stream contains Packager Formatted data...'
            pkgobj = PackagerStream(sdata[4:].encode('hex'))
            print
            print pkgobj            
    else:
        # Treat the file as an rtf doc
        rd = RTFDoc(filename)
        print ' [*] Scanning file for embedded objects'
        rd.scan()

    if extract:
        try:
            print ' Extracting embedded data as %s' % pkgobj.gethash('md5')
            with open(pkgobj.gethash('md5'), 'wb') as out:
                out.write(pkgobj.Data)
        except Exception as  e:
            print ' [!] An error occurred while writing the file :: %s' % e 
Example #54
0
 def _winoffice(self):
     self.cur_file.add_log_details('processing_type', 'WinOffice')
     # Try as if it is a valid document
     oid = oletools.oleid.OleID(self.cur_file.src_path)
     if not olefile.isOleFile(self.cur_file.src_path):
         # Manual processing, may already count as suspicious
         try:
             ole = olefile.OleFileIO(self.cur_file.src_path, raise_defects=olefile.DEFECT_INCORRECT)
         except:
             self.cur_file.add_log_details('not_parsable', True)
             self.cur_file.make_dangerous()
         if ole.parsing_issues:
             self.cur_file.add_log_details('parsing_issues', True)
             self.cur_file.make_dangerous()
         else:
             if ole.exists('macros/vba') or ole.exists('Macros') \
                     or ole.exists('_VBA_PROJECT_CUR') or ole.exists('VBA'):
                 self.cur_file.add_log_details('macro', True)
                 self.cur_file.make_dangerous()
     else:
         indicators = oid.check()
         # Encrypted ban be set by multiple checks on the script
         if oid.encrypted.value:
             self.cur_file.add_log_details('encrypted', True)
             self.cur_file.make_dangerous()
         if oid.macros.value or oid.ole.exists('macros/vba') or oid.ole.exists('Macros') \
                 or oid.ole.exists('_VBA_PROJECT_CUR') or oid.ole.exists('VBA'):
             self.cur_file.add_log_details('macro', True)
             self.cur_file.make_dangerous()
         for i in indicators:
             if i._id == 'ObjectPool' and i.value:
                 # FIXME: Is it suspicious?
                 self.cur_file.add_log_details('objpool', True)
             elif i._id == 'flash' and i.value:
                 self.cur_file.add_log_details('flash', True)
                 self.cur_file.make_dangerous()
     self._safe_copy()
Example #55
0
    def run(self):
        super(Office, self).run()
        if self.args is None:
            return

        if not __sessions__.is_set():
            self.log('error', "No open session. This command expects a file to be open.")
            return

        if not HAVE_OLE:
            self.log('error', "Missing dependency, install OleFileIO (`pip install olefile oletools`)")
            return

        file_data = __sessions__.current.file.data
        if file_data.startswith(b'<?xml'):
            OLD_XML = file_data
        else:
            OLD_XML = False

        if file_data.startswith(b'MIME-Version:') and 'application/x-mso' in file_data:
            MHT_FILE = file_data
        else:
            MHT_FILE = False

        # Check for old office formats
        try:
            doctype = ooxml.get_type(__sessions__.current.file.path)
            OOXML_FILE = True
        except Exception:
            OOXML_FILE = False

        # set defaults
        XLSX_FILE = False
        EXCEL_XML_FILE = False
        DOCX_FILE = False
        if OOXML_FILE is True:
            if doctype == ooxml.DOCTYPE_EXCEL:
                XLSX_FILE = True
            elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
                EXCEL_XML_FILE = True
            elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
                DOCX_FILE = True

        # Tests to check for valid Office structures.
        OLE_FILE = olefile.isOleFile(__sessions__.current.file.path)
        XML_FILE = zipfile.is_zipfile(__sessions__.current.file.path)
        if OLE_FILE:
            ole = olefile.OleFileIO(__sessions__.current.file.path)
        elif XML_FILE:
            zip_xml = zipfile.ZipFile(__sessions__.current.file.path, 'r')
        elif OLD_XML:
            pass
        elif MHT_FILE:
            pass
        elif DOCX_FILE:
            pass
        elif EXCEL_XML_FILE:
            pass
        elif XLSX_FILE:
            pass
        else:
            self.log('error', "Not a valid office document")
            return

        if self.args.export is not None:
            if OLE_FILE:
                self.export(ole, self.args.export)
            elif XML_FILE:
                self.xml_export(zip_xml, self.args.export)
        elif self.args.meta:
            if OLE_FILE:
                self.metadata(ole)
            elif XML_FILE:
                self.xmlmeta(zip_xml)
        elif self.args.streams:
            if OLE_FILE:
                self.metatimes(ole)
            elif XML_FILE:
                self.xmlstruct(zip_xml)
        elif self.args.oleid:
            if OLE_FILE:
                self.oleid(ole)
            else:
                self.log('error', "Not an OLE file")
        elif self.args.vba or self.args.code:
            self.parse_vba(self.args.code)
        elif self.args.dde:
            self.get_dde(__sessions__.current.file.path)
        else:
            self.log('error', 'At least one of the parameters is required')
            self.usage()
Example #56
0
def sendFile():

    #bring app to foreground
    #root.deiconify()

    
    home = getSNTFilePath()

    try:
    	assert olefile.isOleFile(home)
    except (AssertionError,FileNotFoundError) as e:
    	root.deiconify()
    	if app is not None:
	      app.infoLabel['text'] = "No Sticky Notes file found at " + home + ".\nYou need to run the Sticky Notes application for the first time to create a Sticky Notes file."
	      app.infoLabel['fg'] = 'red'
      #bring app to foreground
      

    ole = olefile.OleFileIO(home)

    streamList = ole.listdir()

    counter = 0

    #print(streamList)
    for streamIndex in range(len(streamList)):
        # get zeroth streams
        #print(streamIndex)
        #print(len(streamList))
        #print(streamList[streamIndex])

        if (streamList[streamIndex])[len(streamList[streamIndex])
            - 1:][0] is '0':
            #print("valid stream with directory 0: "+ streamList[streamIndex])
            # print(ole.get_size(streamList[streamIndex]))

            # create file stream
            handle = ole.openstream(streamList[streamIndex])
            text = handle.read()
            ole.close()

            #find last index of actual closed bracket pair
            textDecode = text.decode("utf-8")
            openBracketsUnclosed = -1
            for searchIndex in range(len(textDecode)):
                if textDecode[searchIndex] == '{':
                    if openBracketsUnclosed == -1:
                        openBracketsUnclosed = 0
                    openBracketsUnclosed += 1
                if textDecode[searchIndex] == '}':
                    openBracketsUnclosed -= 1
                if openBracketsUnclosed == 0:
                    #print(searchIndex)
                    break;
            # base64 encode
            base64Encoded = base64.b64encode(extract_rtf.striprtf(textDecode[textDecode.find("{"):searchIndex+1].encode("utf-8")).encode('ascii'))
            #ascii preview with viewing as string so we can see newline and carriage return
            #print(textDecode.encode('ascii'))
            # create payload
            payload = {
                'user': username,
                'passcode': passcode,
                'number': counter,
                'data': base64Encoded,
                }

            try:
                # make POST request with payload
                r = requests.post(server+'/update',data=payload,timeout=5)
                #print((r.status_code, r.reason))
                if r.text[0:1] == '0':
                    print(('Sent note at index {0} success'.format(counter)))
                    app.infoLabel['text'] = 'Updated'
                    app.infoLabel['fg'] = 'black'
                    app.setUserPass['text'] = 'Update info'
                    counter = counter + 1



                else:
                    print('Failed to update note at index {0} {0}'.format(counter, r.text))
                    app.infoLabel['text'] = r.text
                    app.infoLabel['fg'] = 'red'
                    #bring app to foreground
                    root.deiconify()
                    return r.text
            except requests.exceptions.RequestException as e:
                #messagebox.showerror("Error sending request to " + server, e)
                print(e)
                return "Update to " + server + " failed"
    return 0
Example #57
0
    def run(self):

        super(Debup, self).run()
        if self.args is None:
            return

        if not __sessions__.is_set():
            self.log('error', "No session opened")
            return

        if not HAVE_OLE:
            self.log('error', "Missing dependency, install olefile (`pip install olefile`)")
            return

        # Check for valid OLE
        if not olefile.isOleFile(__sessions__.current.file.path):
            self.log('error', "Not a valid BUP File")
            return

        # Extract all the contents from the bup file. 

        ole = olefile.OleFileIO(__sessions__.current.file.path)
        # We know that BUPS are xor'd with 6A which is dec 106 for the decoder

        # This is the stored file.
        data = self.xordata(ole.openstream('File_0').read(), 106)

        # Get the details page
        data2 = self.xordata(ole.openstream('Details').read(), 106)

        # Close the OLE
        ole.close()

        # Process the details file
        rows = []
        filename = ''
        lines = data2.split('\n')
        for line in lines:
            if line.startswith('OriginalName'):
                fullpath = line.split('=')[1]
                pathsplit = fullpath.split('\\')
                filename = str(pathsplit[-1][:-1])
            try:
                k, v = line.split('=')
                rows.append([k, v[:-1]])  # Strip the \r from v
            except Exception as e:
                pass

                # If we opted to switch session then do that
        if data and self.args.session:
            try:
                tempname = os.path.join('/tmp', filename)
                with open(tempname, 'w') as temp:
                    temp.write(data)
                self.log('info', "Switching Session to Embedded File")
                __sessions__.new(tempname)
                return
            except:
                self.log('error', "Unble to Switch Session")
        # Else jsut print the date
        else:
            self.log('info', "BUP Details:")
            self.log('table', dict(header=['Description', 'Value'], rows=rows))
Example #58
0
def sendFile():
    global sendFileTimer
    sendFileTimer = threading.Timer(10, updateUISendFile)
    sendFileTimer.start()

    #bring app to foreground
    #root.deiconify()

    
    home = expanduser("~")
    home += "\AppData\Roaming\Microsoft\Sticky Notes\StickyNotes.snt"

    try:
    	assert olefile.isOleFile(home)
    except (AssertionError,FileNotFoundError) as e:
    	root.deiconify()
    	if app is not None:
	      app.infoLabel['text'] = "No Sticky Notes file found at " + home + ".\nYou need to run the Sticky Notes application for the first time to create a Sticky Notes file."
	      app.infoLabel['fg'] = 'red'
      #bring app to foreground
      

    ole = olefile.OleFileIO(home)

    streamList = ole.listdir()

    counter = 0

    #print(streamList)
    for streamIndex in range(len(streamList)):
        # get zeroth streams
        #print(streamIndex)
        #print(len(streamList))
        #print(streamList[streamIndex])

        if (streamList[streamIndex])[len(streamList[streamIndex])
            - 1:][0] is '0':
            #print("valid stream with directory 0: "+ streamList[streamIndex])
            # print(ole.get_size(streamList[streamIndex]))
            # create file stream
            handle = ole.openstream(streamList[streamIndex])
            text = handle.read()
            ole.close()
            #print text
            # base64 encode
            base64Encoded = base64.b64encode(text)

            # create payload
            payload = {
                'user': username,
                'passcode': passcode,
                'number': counter,
                'data': base64Encoded,
                }

            try:
                # make POST request with payload
                r = requests.post(server+'/update',data=payload,timeout=5)
                #print((r.status_code, r.reason))
                if r.text[0:1] == '0':
                    print(('Sent note at index {0} success'.format(counter)))
                    app.infoLabel['text'] = 'Updated'
                    app.infoLabel['fg'] = 'black'
                    app.setUserPass['text'] = 'Update info'
                    counter = counter + 1
                else:
                    print('Fail update note at index {0} {0}'.format(counter, r.text))
                    app.infoLabel['text'] = r.text
                    app.infoLabel['fg'] = 'red'
                    #bring app to foreground
                    root.deiconify()
                    return r.text
            except requests.exceptions.RequestException as e:
                #messagebox.showerror("Error sending request to " + server, e)
                print(e)
                return "Update to " + server + " failed"
    return 0
Example #59
0
    def run(self):
        super(Office, self).run()
        if self.args is None:
            return

        if not __sessions__.is_set():
            self.log('error', "No open session")
            return

        if not HAVE_OLE:
            self.log('error', "Missing dependency, install OleFileIO (`pip install olefile oletools`)")
            return

        file_data = __sessions__.current.file.data
        if file_data.startswith(b'<?xml'):
            OLD_XML = file_data
        else:
            OLD_XML = False

        if file_data.startswith(b'MIME-Version:') and 'application/x-mso' in file_data:
            MHT_FILE = file_data
        else:
            MHT_FILE = False

        # Tests to check for valid Office structures.
        OLE_FILE = olefile.isOleFile(__sessions__.current.file.path)
        XML_FILE = zipfile.is_zipfile(__sessions__.current.file.path)
        if OLE_FILE:
            ole = olefile.OleFileIO(__sessions__.current.file.path)
        elif XML_FILE:
            zip_xml = zipfile.ZipFile(__sessions__.current.file.path, 'r')
        elif OLD_XML:
            pass
        elif MHT_FILE:
            pass
        else:
            self.log('error', "Not a valid office document")
            return

        if self.args.export is not None:
            if OLE_FILE:
                self.export(ole, self.args.export)
            elif XML_FILE:
                self.xml_export(zip_xml, self.args.export)
        elif self.args.meta:
            if OLE_FILE:
                self.metadata(ole)
            elif XML_FILE:
                self.xmlmeta(zip_xml)
        elif self.args.streams:
            if OLE_FILE:
                self.metatimes(ole)
            elif XML_FILE:
                self.xmlstruct(zip_xml)
        elif self.args.oleid:
            if OLE_FILE:
                self.oleid(ole)
            else:
                self.log('error', "Not an OLE file")
        elif self.args.vba or self.args.code:
            self.parse_vba(self.args.code)
        else:
            self.log('error', 'At least one of the parameters is required')
            self.usage()
    def txrm(self,
             array_name=None,
             x_start=0,
             x_end=0,
             x_step=1,
             y_start=0,
             y_end=0,
             y_step=1,
             z_start=0,
             z_end=0,
             z_step=1):
        """ 
        Read 3-D tomographic projection data from a TXRM file 
        
        Parameters
        
        file_name : str
            Input txrm file.
        
        x_start, x_end, x_step : scalar, optional
            Values of the start, end and step of the
            slicing for the whole array.
        
        y_start, y_end, y_step : scalar, optional
            Values of the start, end and step of the
            slicing for the whole array.
        
        z_start, z_end, z_step : scalar, optional
            Values of the start, end and step of the
            slicing for the whole array.
        
        Returns
        
        out : array
            Returns the data as a matrix.
        """
        verbose = True
        try:
            olef.isOleFile(self.file_name)
            if (array_name == "theta"):
                ole = olef.OleFileIO(self.file_name)
                if ole.exists('ImageInfo/ImagesTaken'):                  
                    stream = ole.openstream('ImageInfo/ImagesTaken')
                    data = stream.read()
                    nev = struct.unpack('<I', data)
                    if verbose: print "ImageInfo/ImagesTaken = %i" % nev[0]  
                    n_images = nev[0]
                if ole.exists('ImageInfo/Angles'):                  
                    stream = ole.openstream('ImageInfo/Angles')
                    data = stream.read()
                    struct_fmt = "<{}f".format(n_images)
                    angles = struct.unpack(struct_fmt, data)
                    if verbose: print "ImageInfo/Angles: \n ",  angles  
                    theta = np.asarray(angles)                
                    num_z = theta.size
        	    if z_end is 0: z_end = num_z
		    if verbose: print "Constructed theta"
                dataset = theta[z_start:z_end:z_step]
            else:
                ole = olef.OleFileIO(self.file_name)
                datasize = np.empty((3), dtype=np.int)
                if ole.exists('ImageInfo/ImageWidth'):                 
                    stream = ole.openstream('ImageInfo/ImageWidth')
                    data = stream.read()
                    nev = struct.unpack('<I', data)
                    if verbose: print "ImageInfo/ImageWidth = %i" % nev[0]  
                    datasize[0] = np.int(nev[0])
                    n_cols = datasize[0]

                if ole.exists('ImageInfo/ImageHeight'):                  
                    stream = ole.openstream('ImageInfo/ImageHeight')
                    data = stream.read()
                    nev = struct.unpack('<I', data)
                    if verbose: print "ImageInfo/ImageHeight = %i" % nev[0]  
                    datasize[1] = np.int(nev[0])
                    n_rows = datasize[1]

                if ole.exists('ImageInfo/ImagesTaken'):                  
                    stream = ole.openstream('ImageInfo/ImagesTaken')
                    data = stream.read()
                    nev = struct.unpack('<I', data)
                    if verbose: print "ImageInfo/ImagesTaken = %i" % nev[0]  
                    nimgs = nev[0]
                    datasize[2] = np.int(nimgs)
                    n_images = datasize[2]

                # 10 float; 5 uint16 (unsigned 16-bit (2-byte) integers)
                if ole.exists('ImageInfo/DataType'):                  
                    stream = ole.openstream('ImageInfo/DataType')
                    data = stream.read()
                    struct_fmt = '<1I'
                    datatype = struct.unpack(struct_fmt, data)
                    datatype = int(datatype[0])
                    if verbose: print "ImageInfo/DataType: %f " %  datatype  

                if verbose: print 'Reading images - please wait...'
                absdata = np.empty((n_cols, n_rows, n_images), dtype=np.float32)
                #Read the images - They are stored in ImageData1, ImageData2... Each
                #folder contains 100 images 1-100, 101-200...           
                for i in range(1, nimgs+1):
                    img_string = "ImageData%i/Image%i" % (np.ceil(i/100.0), i)
                    stream = ole.openstream(img_string)
                    data = stream.read()
                    # 10 float; 5 uint16 (unsigned 16-bit (2-byte) integers)
                    if datatype == 10:
                        struct_fmt = "<{}f".format(n_cols*n_rows)
                        imgdata = struct.unpack(struct_fmt, data)
                    elif datatype == 5:                   
                        struct_fmt = "<{}h".format(n_cols*n_rows)
                        imgdata = struct.unpack(struct_fmt, data)
                    else:                            
                        print "Wrong data type"
                        return
                    
                absdata[:,:,i-1] = np.reshape(imgdata, (n_cols, n_rows), order='F')

                num_x, num_y, num_z = np.shape(absdata)
                data = np.swapaxes(data,0,2)
                num_z, num_y, num_x = np.shape(absdata)
                if x_end is 0:
                    x_end = num_x
                if y_end is 0:
                    y_end = num_y
                if z_end is 0:
                    z_end = num_z
                # Construct dataset from desired z, y, x.
                dataset = absdata[z_start:z_end:z_step,
                                y_start:y_end:y_step,
                                x_start:x_end:x_step]    
            ole.close()
            
        except KeyError:
            dataset = None

        return dataset