def check_activeX_ole_contents_swf(self, unzip_dir, office_type=""): """ Condition: activeX & SWF :param unzip_dir: :return: """ # Precondition if office_type == 'ppt': return False ret = False bin_docfile = b"\xD0\xCF\x11\xE0" for (root, _, files) in os.walk(unzip_dir): for filename in files: file_path = os.path.join(root, filename) if bool(re.match('activeX\d{1,2}.bin', filename)): if filename not in self.activeX_bin.keys(): with open(file_path, "r+b") as f: self.activeX_bin[filename] = f.read() if self.activeX_bin[filename][:4] == bin_docfile: ole_ = olefile.OleFileIO(self.activeX_bin[filename]) for stream in ole_.listdir(): if stream[-1] == "Contents": content = ole_.openstream(stream).read() if content[8:11] == b'FWS': ret = True break return ret
def check_equation_editor_harmful_face2(self, unzip_dir, office_type=""): ret = False bin_docfile = b"\xD0\xCF\x11\xE0" bin_eqn_clsid = b"\x02\xCE\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46" for (root, _, files) in os.walk(unzip_dir): for filename in files: if bool(re.match('oleObject\d{1,2}.bin', filename)): if filename not in self.oleObject_bin.keys(): filepath = os.path.join(root, filename) with open(filepath, "r+b") as f: self.oleObject_bin[filename] = f.read() if self.oleObject_bin[filename][:4] == bin_docfile: if re.search(bin_eqn_clsid, self.oleObject_bin[filename]) is not None: ole_ = olefile.OleFileIO( self.oleObject_bin[filename]) for stream in ole_.listdir(): if stream[-1].lower( ) == "\x01ole10native" or stream[-1].lower( ) == 'equation native': try: content = ole_.openstream(stream).read( 4) if content != b'\x1C\x00\x00\x00': ret = True break except IndexError as indErr: logging.warning( "check_equation_editor_harmful_face: {indErr}" .format(indErr=indErr)) logging.warning( "[filename]: {unzip_dir}".format( unzip_dir=unzip_dir)) return ret
def check_equation_editor_harmful_face(self, unzip_dir, office_type=""): # Precondition if office_type == 'ppt': return False ret = False bin_docfile = b"\xD0\xCF\x11\xE0" for (root, _, files) in os.walk(unzip_dir): for filename in files: if bool(re.match('oleObject\d{1,2}.bin', filename)): if filename not in self.oleObject_bin.keys(): filepath = os.path.join(root, filename) with open(filepath, "r+b") as f: self.oleObject_bin[filename] = f.read() if self.oleObject_bin[filename][:4] == bin_docfile: ole_ = olefile.OleFileIO(self.oleObject_bin[filename]) for stream in ole_.listdir(): if stream[-1].lower() == 'equation native': try: if ole_.openstream( stream).read()[0x23] == 8: ret = True break except IndexError as indErr: logging.warning( "check_equation_editor_harmful_face: {indErr}" .format(indErr=indErr)) logging.warning( "[filename]: {unzip_dir}".format( unzip_dir=unzip_dir)) return ret
def extract_office_docs(filename, password_list, output_folder): """ Exceptions: - ValueError: Document is an unsupported format. - PasswordError: Document is a supported format, but the password is unknown. - ExtractionError: Document is encrypted but not in a supported format. :param filename: Name of the potential docx file :param password_list: a list of password strings, ascii or unicode :param output_folder: a path to a directory where we can write to :return: The filename we wrote. Else, an exception is thrown. """ if not olefile.isOleFile(filename): raise ValueError("Not OLE") try: of = olefile.OleFileIO(filename) except IOError: raise ValueError("Corrupted OLE Document") password = None new_office = False # Checks parameters "EncryptionInfo" and "EncryptedPackage" in OLE file, which indicates the office file version if of.exists("EncryptionInfo") and of.exists("EncryptedPackage"): new_office = True metadata = parse_enc_info(of.openstream("EncryptionInfo")) # From the provided passwords, check the password and break if it's correct for pass_try in password_list: if check_password(pass_try, metadata) is True: password = pass_try break file = msoffcrypto.OfficeFile(open(filename, "rb")) if not new_office and not password: # re: older versions, such as xls, doc, ppt for pass_try in password_list: try: # use the provided password, if correct, break. file.load_key(password=pass_try) password = pass_try break except Exception as e: e_repr = repr(e) if "Failed to verify password" in e_repr: continue else: raise else: # use the provided password file.load_key(password=password) if password is None: raise PasswordError("Could not find correct password") tf = tempfile.NamedTemporaryFile(dir=output_folder, delete=False) name = tf.name file.decrypt(open(name, "wb")) tf.close() return name, password
def read_ole(downloader, datasetinfo, **kwargs): url = get_url(datasetinfo['url'], **kwargs) with temp_dir('ole') as folder: path = downloader.download_file(url, folder, 'olefile') ole = olefile.OleFileIO(path) data = ole.openstream('Workbook').getvalue() outputfile = join(folder, 'excel_file.xls') with open(outputfile, 'wb') as f: f.write(data) datasetinfo['url'] = outputfile datasetinfo['format'] = 'xls' return read_tabular(downloader, datasetinfo, **kwargs)
def getOLEHeaderInfo(filename): retval = {} try: ole = olefile.OleFileIO(filename) meta = ole.get_metadata() ole.close() retval['TimeStamp'] = meta.last_saved_time.strftime( '%Y-%m-%d %H:%M:%S') retval['Author'] = meta.author retval['Title'] = meta.title return retval except AttributeError: print(pefile.PEFormatError.message) return None except: return None
def extract_docx(filename, password_list, output_folder): """ Exceptions: - ValueError: Document is an unsupported format. - PasswordError: Document is a supported format, but the password is unknown. - ExtractionError: Document is encrypted but not in a supported format. :param filename: Name of the potential docx file :param password_list: a list of password strings, ascii or unicode :param output_folder: a path to a directory where we can write to :return: The filename we wrote. Else, an exception is thrown. """ if not olefile.isOleFile(filename): raise ValueError("Not OLE") try: of = olefile.OleFileIO(filename) except IOError: raise ValueError("Corrupted OLE Document") if of.exists("WordDocument"): # Cannot parse these files yet raise ValueError("Legacy Word Document") elif of.exists("EncryptionInfo") and of.exists("EncryptedPackage"): metadata = parse_enc_info(of.openstream("EncryptionInfo")) password = None for pass_try in password_list: if check_password(pass_try, metadata) is True: password = pass_try break if password is None: raise PasswordError("Could not find correct password") tf = tempfile.NamedTemporaryFile(dir=output_folder, suffix=".docx", delete=False) decode_stream(password, metadata, of.openstream("EncryptedPackage"), tf) name = tf.name tf.close() return name, password else: raise ValueError("Not encrypted")
def check_ole_stream_malicious_executable_data(self, unzip_dir, office_type=""): ret = False bin_docfile = b"\xD0\xCF\x11\xE0" for (root, _, files) in os.walk(unzip_dir): # print(root, files) for filename in files: if bool(re.match('oleObject\d{1,2}.bin', filename)): if filename not in self.oleObject_bin.keys(): filepath = os.path.join(root, filename) with open(filepath, "r+b") as f: self.oleObject_bin[filename] = f.read() if self.oleObject_bin[filename][:4] == bin_docfile: ole_ = olefile.OleFileIO(self.oleObject_bin[filename]) for stream in ole_.listdir(): if stream[-1] == "\x01Ole10Native": try: content = ole_.openstream(stream).read() stream = oleobj.OleNativeStream(content) if os.path.splitext( stream.src_path)[1].lower( ) in self.susp_ext: ret = True break except IndexError as indErr: logging.warning( "get_ole_stream_malicious_executable_data: {indErr}" .format(indErr=indErr)) logging.warning( "[filename]: {unzip_dir}".format( unzip_dir=unzip_dir)) except struct.error as structErr: logging.warning( "get_ole_stream_malicious_executable_data: {structErr}" .format(structErr=structErr)) logging.warning( "[filename]: {unzip_dir}".format( unzip_dir=unzip_dir)) return ret
def check_ole_swf_exploitable_data(self, unzip_dir, office_type=""): # Precondition if office_type == 'ppt': return False ret = False bin_docfile = b"\xD0\xCF\x11\xE0" for (root, _, files) in os.walk(unzip_dir): for filename in files: if bool(re.match('oleObject\d{1,2}.bin', filename)): if filename not in self.oleObject_bin.keys(): filepath = os.path.join(root, filename) with open(filepath, "r+b") as f: self.oleObject_bin[filename] = f.read() if self.oleObject_bin[filename][:4] == bin_docfile: ole_ = olefile.OleFileIO(self.oleObject_bin[filename]) for stream in ole_.listdir(): if stream[-1] == "\x01Ole10Native": try: content = ole_.openstream(stream).read() stream = oleobj.OleNativeStream(content) if stream.data is not None and stream.data[ 0:3] == b'FWS' and os.path.splitext( stream.filename)[1] == ".swf": ret = True break except IndexError as indErr: logging.warning( "get_ole_swf_exploitable_data: {indErr}" .format(indErr=indErr)) logging.warning( "[filename]: {unzip_dir}".format( unzip_dir=unzip_dir)) except struct.error as structErr: logging.warning( "get_ole_swf_exploitable_data: {structErr}" .format(structErr=structErr)) logging.warning( "[filename]: {unzip_dir}".format( unzip_dir=unzip_dir)) return ret
def check_ole_settingcontent_ms(self, unzip_dir, office_type=""): # Precondition if office_type != 'word': return False ret = False bin_docfile = b"\xD0\xCF\x11\xE0" for (root, _, files) in os.walk(unzip_dir): for filename in files: filepath = os.path.join(root, filename) if bool(re.match('oleObject\d{1,2}.bin', filename)): if filename not in self.oleObject_bin.keys(): filepath = os.path.join(root, filename) with open(filepath, "r+b") as f: self.oleObject_bin[filename] = f.read() if self.oleObject_bin[filename][:4] == bin_docfile: ole_ = olefile.OleFileIO(filepath) for stream in ole_.listdir(): if stream[-1] == "\x01Ole10Native": try: content = ole_.openstream(stream).read() stream = oleobj.OleNativeStream(content) if stream.data is not None and b'{12B1697E-D3A0-4DBC-B568-CCF64A3F934D}' in stream.data: # settingcontent-ms ret = True break except IndexError as indErr: logging.warning( "check_ole_settingcontent_ms: {indErr}" .format(indErr=indErr)) logging.warning( "[filename]: {filepath}".format( filepath=filepath)) except struct.error as structErr: logging.warning( "check_ole_settingcontent_ms: {structErr}" .format(structErr=structErr)) logging.warning( "[filename]: {filepath}".format( filepath=filepath)) return ret
def read_ole(downloader, datasetinfo, **kwargs): # type: (Download, Dict, Any) -> Tuple[List[str],Iterator[Union[List,Dict]]] """Read data from OLE Excel source Args: downloader (Download): Download object for downloading files datasetinfo (Dict): Dictionary of information about dataset **kwargs: Variables to use when evaluating template arguments Returns: Tuple[List[str],Iterator[Union[List,Dict]]]: Tuple (headers, iterator where each row is a list or dictionary) """ url = get_url(datasetinfo['url'], **kwargs) with temp_dir('ole') as folder: path = downloader.download_file(url, folder, 'olefile') ole = olefile.OleFileIO(path) data = ole.openstream('Workbook').getvalue() outputfile = join(folder, 'excel_file.xls') with open(outputfile, 'wb') as f: f.write(data) datasetinfo['url'] = outputfile datasetinfo['format'] = 'xls' return read_tabular(downloader, datasetinfo, **kwargs)
def getMXDVersion(filename): ofile = olefile.OleFileIO(filename) stream = ofile.openstream('Version') data = stream.read().decode('utf-16') version = data.split('\x00')[1] return version