Python get_words Examples, analyzer.mics.funcs.get_words Python Examples

Example #1

0

Show file

    def analyze_dex(self, data):
        '''
        start analyzing dex logic (r2p timeout = 10) for individual dex
        add description to strings, get words and wordsstripped from the dex 
        '''
        r2p = r2open(data["Location"]["File"], flags=['-2'])
        r2p.cmd("e anal.timeout = 5")
        r2p.cmd("aaaa;")
        k = 'APK_DEX_1'
        data[k] = {
            "Classes": [],
            "Externals": [],
            "Symbols": [],
            "Bigfunctions": [],
            "Suspicious": [],
            "_Classes": ["Type", "Name"],
            "_Externals": ["Type", "Name"],
            "_Symbols": ["Type", "Address", "X", "Name"],
            "_Bigfunctions": ["Size", "Name"],
            "_Suspicious": ["Location", "Function", "Xrefs"]
        }
        data[k]["Classes"] = self.get_all_classes(r2p)
        data[k]["Externals"] = self.get_all_externals(r2p)
        data[k]["Symbols"] = self.get_all_symbols(r2p)
        data[k]["Bigfunctions"] = self.big_functions(r2p)
        data[k]["Suspicious"] = self.check_sus(r2p)
        get_words(data, data["Location"]["File"])

        #future plan; force closing - try,except
        r2p.quit()

Example #2

0

Show file

File: readpackets.py Project: jack51706/analyzer

    def analyze(self, data):
        '''
        start analyzing pcap logic, add descriptions and get words and wordsstripped from the file 

        '''
        data["PCAP"] = deepcopy(self.datastruct)
        packets = scapy.rdpcap(data["Location"]["File"])
        all, ports, ips, rarp, rdns, http, urlshttp, domains = self.read_all_packets(
            packets)
        data["PCAP"]["Domains"] = domains
        data["PCAP"]["URLs"] = urlshttp
        data["PCAP"]["ARP"] = rarp
        data["PCAP"]["DNS"] = rdns
        data["PCAP"]["HTTP"] = http
        data["PCAP"]["ALL"] = all
        data["PCAP"]["PORTS"] = ports
        data["PCAP"]["IP4S"] = ips
        self.waf.analyze(data["PCAP"]["HTTP"], data["PCAP"]["WAF"], "waf.json")
        add_description("Ports", data["PCAP"]["ALL"], "SourcePort")
        add_description("Ports", data["PCAP"]["ALL"], "DestinationPort")
        add_description("Ports", data["PCAP"]["PORTS"], "Port")
        add_description("DNSServers", data["PCAP"]["IP4S"], "IP")
        add_description("ReservedIP", data["PCAP"]["IP4S"], "IP")
        add_description("CountriesIPs", data["PCAP"]["IP4S"], "IP")
        get_words(data, data["Location"]["File"])

Example #3

0

Show file

File: emailparser.py Project: jack51706/analyzer

 def analyze(self, data, parsed):
     '''
     start analyzing exe logic, add descriptions and get words and wordsstripped from array 
     '''
     Streams = []
     Parts = []
     Mixed = []
     Headers = []
     data["EMAIL"] = deepcopy(self.datastruct)
     f = data["FilesDumps"][data["Location"]["File"]]
     message = message_from_bytes(f)
     Headers = self.get_headers(data["EMAIL"]["General"], message)
     self.get_content(data["EMAIL"], data["Location"]["File"])
     Parts = self.get_content_multi(data, message)
     if self.check_attachment_and_make_dir(data, message):
         Streams = self.get_attachment(data, message)
     else:
         pass
     Mixed = Streams + Parts + Headers
     if len(Mixed) > 0:
         get_words_multi_filesarray(
             data, Mixed)  #have to be bytes < will check this later on
     else:
         get_words(data, data["Location"]["File"])
     parsed.type = "email"

Example #4

0

Show file

    def analyze(self, data):
        '''
        start analyzing cod logic, get words and wordsstripped from the file
        '''
        with open(data["Location"]["File"], 'rb') as file:

            data["COD"] = deepcopy(self.datastruct)
            _temp = []
            temp_f = file.read(sizeof(Header))
            header = Header.from_buffer_copy(temp_f)
            file.read(header.Codesize)
            dataraw = file.read(header.Datasize)
            _data = _Data.from_buffer_copy(dataraw)
            r_offset = _data.Exportedstringoffset
            rall = int((_data.Databytesoffset-_data.Exportedstringoffset)/sizeof(ResourceData))
            for _ in range(rall):
                temp_sig = ""
                resource_data = ResourceData.from_buffer_copy(dataraw[r_offset:])
                if resource_data.Size > 1:
                    temp_sig = "".join("{:02x}".format(x) for x in dataraw[resource_data.DataPointer:resource_data.DataPointer+10])
                _temp.append({"DataPointer":resource_data.DataPointer,
                              "Size":resource_data.Size, "Sig":temp_sig,
                              "Data":(dataraw[resource_data.DataPointer:resource_data.DataPointer+resource_data.Size]).decode("utf-8", "ignore")})
                #print(dataraw[resource_data.Dataptr:resource_data.Dataptr+resource_data.Size])
                r_offset = r_offset + sizeof(ResourceData)
            for temp_x, temp_y in header._fields_:
                if isinstance(getattr(header, temp_x), int):
                    data["COD"]["Header"].update({temp_x:hex(getattr(header, temp_x))})
            for temp_x, temp_y in _data._fields_:
                if isinstance(getattr(_data, temp_x), int):
                    data["COD"]["Data"].update({temp_x:hex(getattr(_data, temp_x))})
            data["COD"]["Resources"] = _temp
            file.seek(0)
            data["COD"]["Symbols"] = self.get_functions_old(file.read())
            get_words(data, data["Location"]["File"])

Example #5

0

Show file

 def analyze(self,data):
     '''
     start analyzing exe logic, add descriptions and get words and wordsstripped from the file 
     '''
     data["PE"] = deepcopy(self.datastruct)
     data["ICONS"] = {"ICONS":[]}
     pe = PE(data["Location"]["File"])
     ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint
     section = self.find_entry_point_function(pe,ep)
     sig = section.get_data(ep, 12)
     singinhex = "".join("{:02x}".format(x) for x in sig)
     data["PE"]["General"] = {   "PE Type" : self.what_type(pe),
                                 "Entrypoint": pe.OPTIONAL_HEADER.AddressOfEntryPoint,
                                 "Entrypoint Section":section.Name.decode("utf-8",errors="ignore").strip("\00"),
                                 "Header checksum": hex(pe.OPTIONAL_HEADER.CheckSum),
                                 "Verify checksum": hex(pe.generate_checksum()),
                                 "Match checksum":pe.verify_checksum(),
                                 "Sig":singinhex,
                                 "imphash":pe.get_imphash(),
                                 "warning":pe.get_warnings() if len(pe.get_warnings())> 0 else "None",
                                 "Timestamp":datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp).strftime('%Y-%m-%d %H:%M:%S')}
     data["PE"]["Characteristics"] = self.get_characteristics(pe)
     data["PE"]["Singed"],data["PE"]["SignatureExtracted"] = self.check_if_singed(pe)
     data["PE"]["Stringfileinfo"] = self.get_string_file_info(pe)
     data["PE"]["Sections"] = self.get_sections(pe)
     data["PE"]["Dlls"] = self.get_dlls(pe)
     data["PE"]["Resources"],data["PE"]["Manifest"],data["ICONS"]["ICONS"] = self.get_recourse(pe)
     data["PE"]["Imported functions"] = self.get_imported_functions(pe)
     data["PE"]["Exported functions"] = self.get_exported_functions(pe)
     add_description("WinApis",data["PE"]["Imported functions"],"Function")
     add_description("ManHelp",data["PE"]["Imported functions"],"Function")
     add_description("WinDlls",data["PE"]["Dlls"],"Dll")
     add_description("WinSections",data["PE"]["Sections"],"Section")
     add_description("WinResources",data["PE"]["Resources"],"Resource")
     get_words(data,data["Location"]["File"])

Example #6

0

Show file

 def analyze_dex(self, data):
     '''
     start analyzing dex logic (r2p timeout = 10) for individual dex
     add description to strings, get words and wordsstripped from the dex
     '''
     r2p = r2open(data["Location"]["File"], flags=['-2'])
     r2p.cmd("e anal.timeout = 5")
     r2p.cmd("aaaa;")
     self.dex_wrapper(data, r2p, 'APK_DEX_1')
     get_words(data, data["Location"]["File"])
     r2p.quit()

Example #7

0

Show file

File: qbfile.py Project: sashka3076/analyzer

 def check_sig(self, data):
     '''
     start unknown files logic, this file is not detected by otehr modules
     if file is archive, then unpack and get words, wordsstripped otherwise
     get words, wordsstripped from the file only
     '''
     if  data["Details"]["Properties"]["mime"] == "application/java-archive" or \
         data["Details"]["Properties"]["mime"] == "application/zip" or \
         data["Details"]["Properties"]["mime"] == "application/zlib":
         unpack_file(data, data["Location"]["File"])
         get_words_multi_files(data, data["Packed"]["Files"])
     else:
         get_words(data, data["Location"]["File"])

Example #8

0

Show file

File: pdfparser.py Project: jack51706/analyzer

    def analyze(self, data):
        '''
        start analyzing pdf logic, get pdf objects, 
        get words and wordsstripped from buffers if streams exist 
        otherwise get words and wordsstripped from file
        '''
        _Streams = []
        data["PDF"] = deepcopy(self.datastruct)
        f = data["FilesDumps"][data["Location"]["File"]]
        objlen, objs = self.get_object(f)
        strlen, strs, _Streams = self.get_stream(f)
        jslen, jslist = self.get_js(f)
        jalen, jaslist = self.get_javascript(f)
        oalen, oalist = self.get_openaction(f)
        llen, llist = self.get_lunch(f)
        ulen, ulist = self.get_uri(f)
        alen, alist = self.get_action(f)
        gtrlen, gtrlist = self.get_gotor(f)
        rmlen, rmlist = self.get_richmedia(f)
        aalen, aalist = self.get_aa(f)

        data["PDF"]["Count"] = {
            "Object": objlen,
            "Stream": strlen,
            "JS": jslen,
            "Javascript": jalen,
            "OpenAction": oalen,
            "Launch": llen,
            "URI": ulen,
            "Action": alen,
            "GoTo": gtrlen,
            "RichMedia": rmlen,
            "AA": aalen
        }

        data["PDF"]["Object"] = objs
        data["PDF"]["JS"] = jslist
        data["PDF"]["Javascript"] = jaslist
        data["PDF"]["OpenAction"] = oalist
        data["PDF"]["Launch"] = llist
        data["PDF"]["URI"] = ulist
        data["PDF"]["Action"] = alist
        data["PDF"]["GoTo"] = gtrlist
        data["PDF"]["RichMedia"] = rmlist
        data["PDF"]["AA"] = aalist
        data["PDF"]["Stream"] = strs

        if len(_Streams) > 0:
            get_words_multi_filesarray(data, _Streams)
        else:
            get_words(data, _Streams)

Example #9

0

Show file

 def analyze(self, data):
     '''
     start analyzing ole logic 
     '''
     data["OLE"] = self.datastruct
     f = data["FilesDumps"][data["Location"]["File"]]
     self.get_general(data["OLE"]["General"], f)
     data["OLE"]["Objects"], objects = self.get_streams(f)
     data["OLE"]["Macro"] = self.extract_macros(data["Location"]["File"])
     #data["OLE"]["Objects"],objects = self.get_objects(data,f)
     if len(objects) > 0:
         get_words_multi_filesarray(data, objects)
     else:
         get_words(data, data["Location"]["File"])

Example #10

0

Show file

 def analyze(self, data):
     '''
     start analyzing exe logic, add descriptions and get words and wordsstripped from array
     '''
     data["HTML"] = deepcopy(self.datastruct)
     temp_f = data["FilesDumps"][data["Location"]["File"]].lower()
     soup = BeautifulSoup(temp_f, 'html.parser')
     self.get_all_hrefs(data["HTML"]["hrefs"], soup)
     self.get_all_srcs(data["HTML"]["srcs"], soup)
     self.get_a(data["HTML"]["A"], soup)
     self.get_scripts(data["HTML"]["Scripts"], soup)
     self.get_iframes(data["HTML"]["Iframes"], soup)
     self.get_links(data["HTML"]["Links"], soup)
     self.get_forms(data["HTML"]["Forms"], soup)
     get_words(data, data["Location"]["File"])

Example #11

0

Show file

File: macho.py Project: sashka3076/analyzer

 def analyze_macho(self, data):
     '''
     start analyzing macho logic, add descriptions and get words and wordsstripped from the file
     '''
     macho = MachO.MachO(data["Location"]["File"])
     data["MACHO"] = deepcopy(self.datastruct)
     fbuffer = data["FilesDumps"][data["Location"]["File"]]
     data["MACHO"]["General"]: {}
     data["MACHO"]["Sections"] = self.get_sections(macho, fbuffer)
     data["MACHO"]["Libraries"] = self.get_libs(macho)
     data["MACHO"]["Symbols"] = self.get_symbols(macho)
     data["MACHO"]["Undefined Symbols"] = self.get_undef_symbols(macho)
     data["MACHO"]["External Symbols"] = self.get_extdef_symbols(macho)
     data["MACHO"]["Local Symbols"] = self.get_local_symbols(macho)
     add_description("ManHelp", data["MACHO"]["Symbols"], "Symbol")
     get_words(data, data["Location"]["File"])

Example #12

0

Show file

File: linuxelf.py Project: sashka3076/analyzer

 def analyze(self, data):
     '''
     start analyzing elf logic, add description to strings and get words and wordsstripped from the file
     '''
     with open(data["Location"]["File"], 'rb') as file_1, open(data["Location"]["File"], 'rb') as file_2:
         data["ELF"] = deepcopy(self.datastruct)
         elf = ELFFile(file_1)
         data["ELF"]["General"] = {"ELF Type" :elf.header.e_type,
                                   "ELF Machine" :elf.header.e_machine,
                                   "Entropy":get_entropy(file_2.read()),
                                   "Entrypoint":hex(elf.header.e_entry),
                                   "Interpreter":self.get_iter(elf)}
         data["ELF"]["Sections"] = self.get_section(elf)
         data["ELF"]["Dynamic"] = self.get_dynamic(elf)
         data["ELF"]["Symbols"] = self.get_symbols(elf)
         data["ELF"]["Relocations"] = self.get_relocations(elf)
         add_description("ManHelp", data["ELF"]["Symbols"], "Symbol")
         add_description("LinuxSections", data["ELF"]["Sections"], "Section")
         get_words(data, data["Location"]["File"])

Example #13

0

Show file

File: msgparser.py Project: telolet347/analyzer

 def analyze(self, data, parsed):
     '''
     start analyzing exe logic, add descriptions and get words and wordsstripped from array (need to implement from extract_msg.dev_classes import Message)
     '''
     streams = []
     parts = []
     mixed = []
     headers = []
     data["MSG"] = deepcopy(self.datastruct)
     message = Message(data["Location"]["File"])
     headers = self.get_headers(data["MSG"]["General"], message)
     self.get_content(data["MSG"], message)
     if self.check_attachment_and_make_dir(data, message):
         streams = self.get_attachment(data, message)
     else:
         pass
     mixed = streams + parts + headers
     if len(mixed) > 0:
         get_words_multi_filesarray(
             data, mixed)  # have to be bytes < will check this later on
     else:
         get_words(data, data["Location"]["File"])
     parsed.type = "msg"

Example #14

0

Show file

File: windowspe.py Project: telolet347/analyzer

 def analyze(self, data):
     '''
     start analyzing exe logic, add descriptions and get words and wordsstripped from the file
     '''
     data["PE"] = deepcopy(self.datastruct)
     data["ICONS"] = {"ICONS": []}
     pe_info = PE(data["Location"]["File"])
     ep_info = pe_info.OPTIONAL_HEADER.AddressOfEntryPoint
     section = self.find_entry_point_function(pe_info, ep_info)
     singinhex = "UnKnown"
     en_section_name = "UnKnown"
     sig_instructions = "UnKnown"
     with ignore_excpetion(Exception):
         sig = section.get_data(ep_info, 52)
         singinhex = "".join("{:02x}".format(x) for x in sig)
         r2p = r2open("-", flags=['-2'])
         r2p.cmd("e anal.timeout = 5")
         temp_sig_instructions = r2p.cmd(
             "pad {}".format(singinhex)).split("\n")[:8]
         sig_instructions = "\n".join(temp_sig_instructions)
     with ignore_excpetion(Exception):
         en_section_name = section.Name.decode("utf-8",
                                               errors="ignore").strip("\00")
     data["PE"]["General"] = {
         "PE Type":
         self.what_type(pe_info),
         "Entrypoint":
         pe_info.OPTIONAL_HEADER.AddressOfEntryPoint,
         "Entrypoint Section":
         en_section_name,
         "Header checksum":
         hex(pe_info.OPTIONAL_HEADER.CheckSum),
         "Verify checksum":
         hex(pe_info.generate_checksum()),
         "Match checksum":
         pe_info.verify_checksum(),
         "Sig":
         singinhex,
         "imphash":
         pe_info.get_imphash(),
         "warning":
         pe_info.get_warnings()
         if len(pe_info.get_warnings()) > 0 else "None",
         "Timestamp":
         datetime.fromtimestamp(pe_info.FILE_HEADER.TimeDateStamp).strftime(
             '%Y-%m-%d %H:%M:%S')
     }
     data["PE"]["Characteristics"] = self.get_characteristics(pe_info)
     data["PE"]["Singed"], data["PE"][
         "SignatureExtracted"] = self.check_if_singed(pe_info)
     data["PE"]["Stringfileinfo"] = self.get_string_file_info(pe_info)
     data["PE"]["Sections"] = self.get_sections(pe_info)
     data["PE"]["Dlls"] = self.get_dlls(pe_info)
     data["PE"]["Resources"], data["PE"]["Manifest"], data["ICONS"][
         "ICONS"] = self.get_recourse(pe_info)
     data["PE"]["Imported functions"] = self.get_imported_functions(pe_info)
     data["PE"]["Exported functions"] = self.get_exported_functions(pe_info)
     data["PE"]["Entrypoint"] = sig_instructions
     add_description("WinApis", data["PE"]["Imported functions"],
                     "Function")
     add_description("ManHelp", data["PE"]["Imported functions"],
                     "Function")
     add_description("WinDlls", data["PE"]["Dlls"], "Dll")
     add_description("WinSections", data["PE"]["Sections"], "Section")
     add_description("WinResources", data["PE"]["Resources"], "Resource")
     get_words(data, data["Location"]["File"])