def check_external_framset_linkedToFile(self, unzip_dir, office_type=""):
     # Precondition
     if office_type != 'word':
         return False
     ret = False
     r_id = ""
     flag_link2file = False
     flag_external = False
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             file_path = os.path.join(root, filename)
             # dir search and find .xml
             if filename == "webSettings.xml" or filename == "settings.xml":
                 with open(file_path,
                           'r',
                           encoding='utf-8',
                           errors='ignore') as f:
                     xml_txt = f.read()
                 try:
                     xp = xml_parser.XmlParser()
                     utf8_parser = etree.XMLParser(encoding='utf-8')
                     ooxml = etree.fromstring(xml_txt, parser=utf8_parser)
                     for elem in ooxml.iter():
                         p_ole = elem.find(_name('{{{w}}}frame'))
                         if p_ole is not None:  # If it has OLE object
                             elements = xp.parse_object(p_ole)
                             for sub in elements['sub']:
                                 if sub.tag == _name(
                                         '{{{w}}}sourceFileName'):
                                     r_id = sub.attrib[_name('{{{r}}}id')]
                                 elif sub.tag == _name(
                                         '{{{w}}}linkedToFile'):
                                     flag_link2file = True
                 except etree.ParseError as parseErr:
                     logging.warning(parseErr)
                     logging.warning("file path: {file_path}".format(
                         file_path=file_path))
             if filename == 'webSettings.xml.rels' or filename == "settings.xml.rels":
                 file_path = os.path.join(root, filename)
                 if filename not in self.external_rels.keys():
                     with open(file_path,
                               'r',
                               encoding='utf-8',
                               errors='ignore') as f:
                         xml_txt = f.read().encode("utf-8")
                     xp = xml_parser.XmlParser()
                     xp.parse_relationship(xml_txt)
                     self.external_rels[filename] = xp.relationships
                 for relationship in self.external_rels[filename]:
                     if relationship['id'] == r_id and relationship[
                             'target_mode'] == "External":
                         flag_external = True
             if flag_link2file and flag_external:
                 ret = True
                 break
     return ret
 def get_exteranl_ole_link(self, unzip_dir, office_type=""):
     # Precondition
     if office_type != 'ppt':
         return False
     ret = False
     r_id = ""
     flag_ole_link = False
     flag_external = False
     flag_target_hta = False
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             _, ext = os.path.splitext(filename)
             file_path = os.path.join(root, filename)
             # dir search and find .xml
             if bool(re.match('slide\d{1}.xml', filename)):
                 with open(file_path,
                           'r',
                           encoding='utf-8',
                           errors='ignore') as f:
                     xml_txt = f.read()
                 xp = xml_parser.XmlParser()
                 utf8_parser = etree.XMLParser(encoding='utf-8')
                 ooxml = etree.fromstring(xml_txt, parser=utf8_parser)
                 for elem in ooxml.iter():
                     p_ole = elem.find(_name('{{{p}}}oleObj'))
                     if p_ole is not None:  # If it has OLE object
                         elements = xp.parse_object(p_ole)
                         r_id = elements['attrib'][_name('{{{r}}}id')]
                         for sub in elements['sub']:
                             if sub.tag == _name('{{{p}}}link'):
                                 flag_ole_link = True
                                 break
             if filename == 'slide1.xml.rels':  # e.g. document.xml.rels
                 if filename not in self.external_rels.keys():
                     with open(file_path,
                               'r',
                               encoding='utf-8',
                               errors='ignore') as f:
                         xml_txt = f.read().encode("utf-8")
                     xp = xml_parser.XmlParser()
                     xp.parse_relationship(xml_txt)
                     self.external_rels[filename] = xp.relationships
                 for relationship in self.external_rels[filename]:
                     if relationship['id'] == r_id and relationship[
                             'target_mode'] == "External":
                         flag_external = True
                         decode_url = urllib.parse.unquote(
                             relationship['target'])
                         _, ext = os.path.splitext(decode_url)
                         if ext == '.hta':
                             flag_target_hta = True
                             break
             if flag_ole_link and flag_external and flag_target_hta:
                 ret = True
                 break
     return ret
 def get_soap_moniker_object(self, unzip_dir, office_type=""):
     ret = False
     flag_external = False
     flag_script_moniker = False
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             # dir search and find .xml
             _, ext = os.path.splitext(filename)
             file_path = os.path.join(root, filename)
             if ext == '.rels':  # e.g. document.xml.rels
                 if filename not in self.external_rels.keys():
                     with open(file_path,
                               'r',
                               encoding='utf-8',
                               errors='ignore') as f:
                         xml_txt = f.read().encode("utf-8")
                     xp = xml_parser.XmlParser()
                     xp.parse_relationship(xml_txt)
                     self.external_rels[filename] = xp.relationships
                 for relationship in self.external_rels[filename]:
                     if relationship['target_mode'] == "External":
                         flag_external = True
                         decode_url = urllib.parse.unquote(
                             relationship['target'])
                         if decode_url.lower()[0:9] == "soap:wsdl":
                             flag_script_moniker = True
                             break
             if flag_external and flag_script_moniker:
                 ret = True
                 break
     return ret
 def get_externals(self, unzip_dir):
     ret = False
     external_files = []
     for (root, _, files) in os.walk(unzip_dir):
         # print(root, files)
         for filename in files:
             _, ext = os.path.splitext(filename)
             file_path = os.path.join(root, filename)
             # dir search and find .xml
             try:
                 if ext == '.rels':  # e.g. document.xml.rels
                     if filename not in self.external_rels.keys():
                         with open(file_path,
                                   'r',
                                   encoding='utf-8',
                                   errors='ignore') as f:
                             xml_txt = f.read().encode("utf-8")
                         xp = xml_parser.XmlParser()
                         xp.parse_relationship(xml_txt)
                         self.external_rels[filename] = xp.relationships
                     for relationship in self.external_rels[filename]:
                         if relationship['target_mode'] == "External":
                             ret = True
                             if filename not in external_files:
                                 external_files.append(filename)
             except etree.ParseError as parse_err:
                 logging.exception(parse_err)
                 logging.exception(
                     "Error path: {file_path}".format(file_path=file_path))
                 ret = False
     return ret, external_files
 def check_dynamic_load_externals(self, unzip_dir, office_type=""):
     ret = False
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             # dir search and find .xml
             _, ext = os.path.splitext(filename)
             file_path = os.path.join(root, filename)
             try:
                 if ext == '.rels':  # e.g. document.xml.rels
                     if filename not in self.external_rels.keys():
                         with open(file_path,
                                   'r',
                                   encoding='utf-8',
                                   errors='ignore') as f:
                             xml_txt = f.read().encode("utf-8")
                         xp = xml_parser.XmlParser()
                         xp.parse_relationship(xml_txt)
                         self.external_rels[filename] = xp.relationships
                     for relationship in self.external_rels[filename]:
                         if relationship['target_mode'] == "External":
                             if relationship['type'] in external_types:
                                 decode_url = urllib.parse.unquote(
                                     relationship['target'])
                                 _, ext = os.path.splitext(decode_url)
                                 if ext in ('.doc', '.docx', '.docm',
                                            '.dotm', '.scr', '.exe'):
                                     ret = True
                             elif relationship[
                                     'type'] == external_types_frame:
                                 ret = True
                                 break
                 if ret is True: break  # Escape
             except etree.ParseError as parse_err:
                 logging.warning(parse_err)
                 logging.warning(
                     "Error path: {file_path}".format(file_path=file_path))
                 ret = False
     return ret
 def get_exteranl_ole_link_type(self, unzip_dir, office_type=""):
     # Precondition
     if office_type == 'xl':
         return False
     ret = False
     r_id = ""
     flag_ole_link = False
     flag_external = False
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             # dir search and find .xml
             _, ext = os.path.splitext(filename)
             file_path = os.path.join(root, filename)
             try:
                 if ext == ".xml":  # e.g. document.xml
                     with open(file_path,
                               'r',
                               encoding='utf-8',
                               errors='ignore') as f:
                         xml_txt = f.read()
                     xp = xml_parser.XmlParser()
                     utf8_parser = etree.XMLParser(encoding='utf-8')
                     ooxml = etree.fromstring(xml_txt, parser=utf8_parser)
                     for elem in ooxml.iter():
                         o_oleobject = elem.find(_name('{{{o}}}OLEObject'))
                         if o_oleobject is not None:  # If it has OLE object
                             xp.parse_o_oleobject(o_oleobject)
                             if xp.oleobject_attrib[
                                     'Type'] == "Link" and xp.oleobject_attrib[
                                         'child'][
                                             'o_LinkType'] == "EnhancedMetaFile":
                                 r_id = xp.oleobject_attrib['r_id']
                                 flag_ole_link = True
                             elif xp.oleobject_attrib[
                                     'Type'] == "Link" and xp.oleobject_attrib[
                                         'child']['o_LinkType'] == "Picture":
                                 if r"\f 0" in xp.oleobject_attrib['child'][
                                         'o_FieldCodes']:
                                     r_id = xp.oleobject_attrib['r_id']
                                     flag_ole_link = True
                 if ext == '.rels':  # e.g. document.xml.rels
                     if filename not in self.external_rels.keys():
                         with open(file_path,
                                   'r',
                                   encoding='utf-8',
                                   errors='ignore') as f:
                             xml_txt = f.read().encode("utf-8")
                         xp = xml_parser.XmlParser()
                         xp.parse_relationship(xml_txt)
                         self.external_rels[filename] = xp.relationships
                     for relationship in self.external_rels[filename]:
                         if relationship['id'] == r_id and relationship[
                                 'target_mode'] == "External":
                             flag_external = True
                 if flag_ole_link and flag_external:
                     ret = True
                     break
             except etree.ParseError as parse_err:
                 logging.warning(parse_err)
                 logging.warning(
                     "Error path: {file_path}".format(file_path=file_path))
                 ret = False
     return ret
 def get_external_ole_packagershell(self, unzip_dir, office_type=""):
     # Precondition
     if office_type != 'ppt':
         return False
     ret = False
     r_id = ""
     flag_package_shell = False
     flag_cmd = False
     flag_embed = False
     flag_embedding_ole = False
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             # dir search and find .xml
             file_path = os.path.join(root, filename)
             if bool(re.match('slide\d{1}.xml.rels', filename)):
                 if filename not in self.external_rels.keys():
                     with open(file_path,
                               'r',
                               encoding='utf-8',
                               errors='ignore') as f:
                         xml_txt = f.read().encode("utf-8")
                     xp = xml_parser.XmlParser()
                     xp.parse_relationship(xml_txt)
                     self.external_rels[filename] = xp.relationships
                 for relationship in self.external_rels[filename]:
                     if relationship['id'] == r_id:
                         decode_url = urllib.parse.unquote(
                             relationship['target'])
                         if "../embeddings/oleObject" in decode_url:
                             flag_embedding_ole = True
             elif bool(re.match('slide\d{1}.xml', filename)):
                 with open(file_path,
                           'r',
                           encoding='utf-8',
                           errors='ignore') as f:
                     xml_txt = f.read()
                 xp = xml_parser.XmlParser()
                 utf8_parser = etree.XMLParser(encoding='utf-8')
                 ooxml = etree.fromstring(xml_txt, parser=utf8_parser)
                 for elem in ooxml.iter():
                     p_ole = elem.find(_name('{{{p}}}oleObj'))
                     if p_ole is not None:  # If it has OLE object
                         elements = xp.parse_object(p_ole)
                         if elements['attrib']['progId'] == 'Package':
                             flag_package_shell = True
                         r_id = elements['attrib'][_name('{{{r}}}id')]
                         for sub in elements['sub']:
                             if sub.tag == _name('{{{p}}}embed'):
                                 flag_embed = True
                                 break
                     p_cmd = elem.find(_name('{{{p}}}cmd'))
                     if p_cmd is not None:  # If it has OLE object
                         elements = xp.parse_object(p_cmd)
                         if 'type' in elements['attrib'].keys(
                         ) and 'cmd' in elements['attrib'].keys():
                             if elements['attrib'][
                                     'type'] == 'verb' and elements[
                                         'attrib']['cmd'] == '3':
                                 flag_cmd = True
             if flag_package_shell and flag_embed and flag_cmd and flag_embedding_ole:
                 ret = True
                 break
     return ret