def get_vba_keyword_autoopen(self, unzip_dir, office_type=""): # Precondition if office_type == 'xl': return False ret = False flag_xml_autoopen = False flag_vbaproject_bin = False flag_signature = False # "vbaProjectSignature.bin" for (root, _, files) in os.walk(unzip_dir): for filename in files: if filename == 'vbaData.xml': file_path = os.path.join(root, filename) with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read() utf8_parser = etree.XMLParser(encoding='utf-8') ooxml = etree.fromstring(xml_txt, parser=utf8_parser) for elem in ooxml.iter(): mcd = elem.find(_name('{{{wne}}}mcd')) if mcd is not None: # If it has OLE object # if XML_MACRO_AUTORUN if _name('{{{wne}}}macroName') in mcd.attrib.keys(): for xml_auto_keyword in self.XML_MACRO_AUTORUN: if xml_auto_keyword in mcd.attrib[_name('{{{wne}}}macroName')].upper(): flag_xml_autoopen = True elif filename == 'vbaProject.bin': flag_vbaproject_bin = True elif filename == 'vbaProjectSignature.bin': flag_signature = True break if flag_xml_autoopen and flag_vbaproject_bin and flag_signature is False: ret = True return ret
def check_external_framset_linkedToFile(self, unzip_dir, office_type=""): # Precondition if office_type != 'word': return False ret = False r_id = "" flag_link2file = False flag_external = False for (root, _, files) in os.walk(unzip_dir): for filename in files: file_path = os.path.join(root, filename) # dir search and find .xml if filename == "webSettings.xml" or filename == "settings.xml": with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read() try: xp = xml_parser.XmlParser() utf8_parser = etree.XMLParser(encoding='utf-8') ooxml = etree.fromstring(xml_txt, parser=utf8_parser) for elem in ooxml.iter(): p_ole = elem.find(_name('{{{w}}}frame')) if p_ole is not None: # If it has OLE object elements = xp.parse_object(p_ole) for sub in elements['sub']: if sub.tag == _name( '{{{w}}}sourceFileName'): r_id = sub.attrib[_name('{{{r}}}id')] elif sub.tag == _name( '{{{w}}}linkedToFile'): flag_link2file = True except etree.ParseError as parseErr: logging.warning(parseErr) logging.warning("file path: {file_path}".format( file_path=file_path)) if filename == 'webSettings.xml.rels' or filename == "settings.xml.rels": file_path = os.path.join(root, filename) if filename not in self.external_rels.keys(): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read().encode("utf-8") xp = xml_parser.XmlParser() xp.parse_relationship(xml_txt) self.external_rels[filename] = xp.relationships for relationship in self.external_rels[filename]: if relationship['id'] == r_id and relationship[ 'target_mode'] == "External": flag_external = True if flag_link2file and flag_external: ret = True break return ret
def get_exteranl_ole_link(self, unzip_dir, office_type=""): # Precondition if office_type != 'ppt': return False ret = False r_id = "" flag_ole_link = False flag_external = False flag_target_hta = False for (root, _, files) in os.walk(unzip_dir): for filename in files: _, ext = os.path.splitext(filename) file_path = os.path.join(root, filename) # dir search and find .xml if bool(re.match('slide\d{1}.xml', filename)): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read() xp = xml_parser.XmlParser() utf8_parser = etree.XMLParser(encoding='utf-8') ooxml = etree.fromstring(xml_txt, parser=utf8_parser) for elem in ooxml.iter(): p_ole = elem.find(_name('{{{p}}}oleObj')) if p_ole is not None: # If it has OLE object elements = xp.parse_object(p_ole) r_id = elements['attrib'][_name('{{{r}}}id')] for sub in elements['sub']: if sub.tag == _name('{{{p}}}link'): flag_ole_link = True break if filename == 'slide1.xml.rels': # e.g. document.xml.rels if filename not in self.external_rels.keys(): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read().encode("utf-8") xp = xml_parser.XmlParser() xp.parse_relationship(xml_txt) self.external_rels[filename] = xp.relationships for relationship in self.external_rels[filename]: if relationship['id'] == r_id and relationship[ 'target_mode'] == "External": flag_external = True decode_url = urllib.parse.unquote( relationship['target']) _, ext = os.path.splitext(decode_url) if ext == '.hta': flag_target_hta = True break if flag_ole_link and flag_external and flag_target_hta: ret = True break return ret
def get_ddelink(self, unzip_dir): ret = False ddelink_dict = {} for (root, _, files) in os.walk(unzip_dir): for filename in files: file_path = os.path.join(root, filename) # dir search and find .xml if bool(re.match('externalLink\d{1,2}.xml', filename)): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read() utf8_parser = etree.XMLParser(encoding='utf-8') try: ooxml = etree.fromstring(xml_txt, parser=utf8_parser) for elem in ooxml.iter(): # Paragraph ddeLink = elem.find(_name('{{{xl}}}ddeLink')) if ddeLink is not None: # If it has OLE object ddelink_dict[filename] = dict() ddelink_dict[filename][ 'ddeService'] = ddeLink.attrib[ 'ddeService'] ddelink_dict[filename][ 'ddeTopic'] = ddeLink.attrib['ddeTopic'] ret = True except xml.etree.ElementTree.ParseError as parse_err: logging.warning(parse_err) logging.warning("Error path: {file_path}".format( file_path=file_path)) return ret, ddelink_dict
def check_adobe_flash_malicious_method(self, unzip_dir, office_type=""): bin_clsid_flash = b"\x6E\xDB\x7C\xD2\x6D\xAE\xCF\x11\x96\xB8\x44\x45\x53\x54\x00\x00" ret = False flag_adobe_flash = False flag_persist_storage = False flag_bin_clsid_flash = False for (root, _, files) in os.walk(unzip_dir): for filename in files: file_path = os.path.join(root, filename) if bool(re.match('activeX\d{1,2}.xml', filename)): # e.g. document.xml.rels if filename not in self.activeX_xml.keys(): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: # xml_txt = f.read() self.activeX_xml[filename] = f.read() if filename not in self.xml_tree.keys(): utf8_parser = etree.XMLParser(encoding='utf-8') self.xml_tree = etree.fromstring( self.activeX_xml[filename], parser=utf8_parser) if self.xml_tree.tag == _name('{{{ax}}}ocx'): elements = self.xml_tree if elements.attrib[_name( '{{{ax}}}classid' )] == '{D27CDB6E-AE6D-11CF-96B8-444553540000}': # MSCOMCTL.OCX flag_adobe_flash = True if _name( '{{{ax}}}persistence' ) in elements.attrib.keys() and elements.attrib[ _name('{{{ax}}}persistence')] in ( 'persistStorage', 'persistStreamInit'): flag_persist_storage = True elif bool(re.match('activeX\d{1,2}.bin', filename)): if filename not in self.activeX_bin.keys(): with open(file_path, "r+b") as f: self.activeX_bin[filename] = f.read() if re.search(bin_clsid_flash, self.activeX_bin[filename]) is not None: flag_bin_clsid_flash = True if flag_adobe_flash and flag_persist_storage and flag_bin_clsid_flash: ret = True break return ret
def get_instr_text(self, unzip_dir): instr_dict = {} for (root, _, files) in os.walk(unzip_dir): for filename in files: file_path = os.path.join(root, filename) # dir search and find .xml instr_list = [] if filename.lower() == "document.xml" or bool( re.match('header\d{1}.xml', filename)) or bool( re.match('footer\d{1}.xml', filename)): # e.g. document.xml with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read() utf8_parser = etree.XMLParser(encoding='utf-8') try: ooxml = etree.fromstring(xml_txt, parser=utf8_parser) for elem in ooxml.iter(): # Paragraph instrText = elem.find(_name('{{{w}}}instrText')) if instrText is not None: # If it has OLE object instr_list.append(instrText.text) fldSimples = elem.findall( _name('{{{w}}}fldSimple')) if len(fldSimples) > 0: for fldSimple in fldSimples: if _name('{{{w}}}instr' ) in fldSimple.attrib.keys(): fldSimple_instr = fldSimple.attrib[ _name('{{{w}}}instr')] instr_list.append(fldSimple_instr) if not filename in instr_dict.keys(): instr_dict[filename] = "" instr_dict[filename] += self.unquote( "".join(instr_list).strip()) except xml.etree.ElementTree.ParseError as parse_err: logging.warning(parse_err) logging.warning("Error path: {file_path}".format( file_path=file_path)) return instr_dict # dict
def check_activeX_mscomctl(self, unzip_dir, office_type=""): # Precondition if office_type == 'ppt': return False ret = False flag_mscomctl = False flag_match_min_fileSize = False for (root, _, files) in os.walk(unzip_dir): for filename in files: file_path = os.path.join(root, filename) # dir search and find activeX[digit].xml if bool(re.match('activeX\d{1,2}.xml', filename)): # e.g. document.xml.rels if filename not in self.activeX_xml.keys(): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: # xml_txt = f.read() self.activeX_xml[filename] = f.read() if filename not in self.xml_tree.keys(): utf8_parser = etree.XMLParser(encoding='utf-8') self.xml_tree = etree.fromstring( self.activeX_xml[filename], parser=utf8_parser) if self.xml_tree.tag == _name('{{{ax}}}ocx'): elements = self.xml_tree if elements.attrib[_name( '{{{ax}}}classid' )] == '{1EFB6596-857C-11D1-B16A-00C0F0283628}': # MSCOMCTL.OCX flag_mscomctl = True elif bool(re.match('activeX\d{1,2}.bin', filename)): file_path = os.path.join(root, filename) if os.path.getsize(file_path) > 500 * 1024: flag_match_min_fileSize = True if flag_mscomctl and flag_match_min_fileSize: ret = True break return ret
def get_exteranl_ole_link_type(self, unzip_dir, office_type=""): # Precondition if office_type == 'xl': return False ret = False r_id = "" flag_ole_link = False flag_external = False for (root, _, files) in os.walk(unzip_dir): for filename in files: # dir search and find .xml _, ext = os.path.splitext(filename) file_path = os.path.join(root, filename) try: if ext == ".xml": # e.g. document.xml with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read() xp = xml_parser.XmlParser() utf8_parser = etree.XMLParser(encoding='utf-8') ooxml = etree.fromstring(xml_txt, parser=utf8_parser) for elem in ooxml.iter(): o_oleobject = elem.find(_name('{{{o}}}OLEObject')) if o_oleobject is not None: # If it has OLE object xp.parse_o_oleobject(o_oleobject) if xp.oleobject_attrib[ 'Type'] == "Link" and xp.oleobject_attrib[ 'child'][ 'o_LinkType'] == "EnhancedMetaFile": r_id = xp.oleobject_attrib['r_id'] flag_ole_link = True elif xp.oleobject_attrib[ 'Type'] == "Link" and xp.oleobject_attrib[ 'child']['o_LinkType'] == "Picture": if r"\f 0" in xp.oleobject_attrib['child'][ 'o_FieldCodes']: r_id = xp.oleobject_attrib['r_id'] flag_ole_link = True if ext == '.rels': # e.g. document.xml.rels if filename not in self.external_rels.keys(): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read().encode("utf-8") xp = xml_parser.XmlParser() xp.parse_relationship(xml_txt) self.external_rels[filename] = xp.relationships for relationship in self.external_rels[filename]: if relationship['id'] == r_id and relationship[ 'target_mode'] == "External": flag_external = True if flag_ole_link and flag_external: ret = True break except etree.ParseError as parse_err: logging.warning(parse_err) logging.warning( "Error path: {file_path}".format(file_path=file_path)) ret = False return ret
def get_external_ole_packagershell(self, unzip_dir, office_type=""): # Precondition if office_type != 'ppt': return False ret = False r_id = "" flag_package_shell = False flag_cmd = False flag_embed = False flag_embedding_ole = False for (root, _, files) in os.walk(unzip_dir): for filename in files: # dir search and find .xml file_path = os.path.join(root, filename) if bool(re.match('slide\d{1}.xml.rels', filename)): if filename not in self.external_rels.keys(): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read().encode("utf-8") xp = xml_parser.XmlParser() xp.parse_relationship(xml_txt) self.external_rels[filename] = xp.relationships for relationship in self.external_rels[filename]: if relationship['id'] == r_id: decode_url = urllib.parse.unquote( relationship['target']) if "../embeddings/oleObject" in decode_url: flag_embedding_ole = True elif bool(re.match('slide\d{1}.xml', filename)): with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: xml_txt = f.read() xp = xml_parser.XmlParser() utf8_parser = etree.XMLParser(encoding='utf-8') ooxml = etree.fromstring(xml_txt, parser=utf8_parser) for elem in ooxml.iter(): p_ole = elem.find(_name('{{{p}}}oleObj')) if p_ole is not None: # If it has OLE object elements = xp.parse_object(p_ole) if elements['attrib']['progId'] == 'Package': flag_package_shell = True r_id = elements['attrib'][_name('{{{r}}}id')] for sub in elements['sub']: if sub.tag == _name('{{{p}}}embed'): flag_embed = True break p_cmd = elem.find(_name('{{{p}}}cmd')) if p_cmd is not None: # If it has OLE object elements = xp.parse_object(p_cmd) if 'type' in elements['attrib'].keys( ) and 'cmd' in elements['attrib'].keys(): if elements['attrib'][ 'type'] == 'verb' and elements[ 'attrib']['cmd'] == '3': flag_cmd = True if flag_package_shell and flag_embed and flag_cmd and flag_embedding_ole: ret = True break return ret