Beispiel #1
0
 def testFilter(self):
     scout = ApiScout()
     results = {
         'test_1': [(0x0, ), (0x10, ), (0x14, ), (0x30, ), (0x40, ),
                    (0x44, ), (0x48, )],
         'test_2': [(0x0, ), (0x18, ), (0x1c, )],
         'test_3': [(0x0, )],
     }
     # no arguments = no filtering
     filtered = scout.filter(results, 0, 0, 0)
     self.assertEqual(results, filtered)
     # filtering by range:
     filtered = scout.filter(results, 0x14, 0x34, 0)
     expected = {
         'test_1': [(0x14, ), (0x30, )],
         'test_2': [(0x18, ), (0x1c, )],
         'test_3': []
     }
     self.assertEqual(expected, filtered)
     # filtering by distance:
     filtered = scout.filter(results, 0, 0, 0x4)
     expected = {
         'test_1': [(0x10, ), (0x14, ), (0x40, ), (0x44, ), (0x48, )],
         'test_2': [(0x18, ), (0x1c, )],
         'test_3': []
     }
     self.assertEqual(expected, filtered)
Beispiel #2
0
 def testResolveAddressCandidates(self):
     scout = ApiScout()
     scout.api_maps["test_map"] = {0x1000: ("test.dll", "TestApi", 32)}
     result_hit = ('test.dll', 'TestApi', 32)
     result_miss = ('', '', '')
     self.assertEqual(result_hit,
                      scout._resolveApiByAddress("test_map", 0x1000))
     self.assertEqual(result_miss,
                      scout._resolveApiByAddress("test_map", 0))
Beispiel #3
0
def main():
    tools = IdaTools()
    parameters = tools.formGetParameters()
    if parameters:
        scout = ApiScout()
        scout.ignoreAslrOffsets(parameters["ignore_aslr_offset"])
        for path in parameters["api_dbs"]:
            scout.loadDbFile(path)
        bitness_string = "32bit and 64bit" if scout.has_64bit else "32bit"
        segments = tools.getAllMemoryFromIda()
        base_address = tools.getBaseAddress()
        all_results = {}
        for segment_address, binary in sorted(segments.items()):
            scout.setLoadOffset(segment_address - base_address)
            print("Scanning %d bytes @0x%x in %s mode." %
                  (len(binary), segment_address, bitness_string))
            updateResults(all_results, scout.crawl(binary))
        selected_apis = tools.formSelectResults(all_results)
        if selected_apis:
            tools.importTypeLibraries()
            num_renamed, num_skipped, num_xrefs_adapted = tools.applyApiNames(
                selected_apis)
            print("Annotated %d APIs and adapted %d Xrefs(%d skipped)." %
                  (num_renamed, num_xrefs_adapted, num_skipped))
        else:
            print("No APIs selected for annotation, closing.")
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Demo: Use apiscout with a prepared api database (created using DatabaseBuilder.py) to crawl a dump for imports and render the results.'
    )
    parser.add_argument(
        '-f',
        '--filter',
        type=int,
        default=0,
        help='Filter out APIs that do not have a neighbour within N bytes.')
    parser.add_argument(
        '-i',
        '--ignore_aslr',
        action='store_true',
        help=
        'Do not apply the per-module ASLR offset potentially contained in a API DB file.'
    )
    parser.add_argument('binary_path',
                        type=str,
                        default='',
                        help='Path to the memory dump to crawl.')
    parser.add_argument(
        'db_path',
        type=str,
        nargs='*',
        help=
        'Path to the DB(s). If no argument is given, use all files found in "./dbs"'
    )

    args = parser.parse_args()
    if args.binary_path:
        binary = ""
        if os.path.isfile(args.binary_path):
            with open(args.binary_path, "rb") as f_binary:
                binary = f_binary.read()
        if not args.db_path:
            args.db_path = get_all_db_files()
        scout = ApiScout()
        # override potential ASLR offsets that are stored in the API DB files.
        scout.ignoreAslrOffsets(args.ignore_aslr)
        # load DB file
        for db_path in args.db_path:
            scout.loadDbFile(db_path)
        print("Using '{}' to analyze '{}.".format(args.db_path,
                                                  args.binary_path))
        num_apis_loaded = scout.getNumApisLoaded()
        filter_info = " - neighbour filter: 0x%x" % args.filter if args.filter else ""
        print("Buffer size is {} bytes, {} APIs loaded{}.\n".format(
            len(binary), num_apis_loaded, filter_info))
        results = scout.crawl(binary)
        filtered_results = scout.filter(results, 0, 0, args.filter)
        print(scout.render(filtered_results))
    else:
        parser.print_help()
Beispiel #5
0
 def testIterators(self):
     scout = ApiScout()
     result_dwords = [
         dword
         for dword in scout.iterateAllDwords(b"\x00\x00\x00\x00\x01\x02")
     ]
     result_qwords = [
         dword for dword in scout.iterateAllQwords(
             b"\x00\x00\x00\x00\x01\x02\x00\x00\x00")
     ]
     self.assertEqual([(0, 0), (1, 16777216), (2, 33619968)], result_dwords)
     self.assertEqual([(0, 2203318222848), (1, 8606711808)], result_qwords)
Beispiel #6
0
 def __init__(self, title, api_results, flags=0):
     Choose.__init__(self,
                      title,
                      [["#", 6], ["Offset", 14], ["API Address", 14], ["DLL", 20], ["API", 35]],
                      embedded=True, width=140, height=20, flags=flags)
     self.row_count = 0
     self.base_address = [ea for ea in idautils.Segments()][0]
     self.scout = ApiScout()
     self.scout.setBaseAddress(self.base_address)
     self.api_results = api_results
     self.all_items = self.populate(api_results)
     self.items = self.populate(api_results)
     self.icon = 4
     self.selcount = 0
Beispiel #7
0
class ApiChooser(Choose):
    """
    A simple chooser to be used as an embedded chooser
    """
    def __init__(self, title, api_results, flags=0):
        Choose.__init__(self,
                         title,
                         [["#", 6], ["Offset", 14], ["API Address", 14], ["DLL", 20], ["API", 35]],
                         embedded=True, width=140, height=20, flags=flags)
        self.row_count = 0
        self.base_address = [ea for ea in idautils.Segments()][0]
        self.scout = ApiScout()
        self.scout.setBaseAddress(self.base_address)
        self.api_results = api_results
        self.all_items = self.populate(api_results)
        self.items = self.populate(api_results)
        self.icon = 4
        self.selcount = 0

    def filterDisplay(self, from_addr, to_addr, distance):
        filtered_items = self.scout.filter(self.api_results, from_addr, to_addr, distance)
        self.items = self.populate(filtered_items)

    def populate(self, api_results):
        api_rows = []
        unified_results = set([])
        for key in api_results:
            unified_results.update(api_results[key])
        for index, entry in enumerate(sorted(unified_results)):
            dll_name = "{} ({}bit)".format(entry[2], entry[4])
            api_rows.append(["%d" % (index + 1), "0x%x" % (self.base_address + entry[0]), "0x%x" % entry[1], dll_name, entry[3]])
            self.row_count += 1
        return api_rows

    def OnClose(self):
        pass

    def getItems(self, l):
        items = []
        for index in l:
            items.append([int(self.items[index][1], 16), int(self.items[index][2], 16), self.items[index][3], str(self.items[index][4])])
        return items

    def OnGetLine(self, n):
        return self.items[n]

    def OnGetSize(self):
        n = len(self.items)
        return n
Beispiel #8
0
 def testCrawlRealData(self):
     test_binary = ""
     this_dir = os.path.abspath(os.path.join(os.path.dirname(__file__)))
     binary_path = os.path.join(this_dir, "example_dump.bin")
     with open(binary_path, "rb") as f_in:
         test_binary = f_in.read()
     db_path = os.path.join(this_dir, "minimal_db.json")
     scout = ApiScout(db_path)
     results = {
         u'Windows 7':
         [(4, 2105895504, u'KernelBase.dll', u'InterlockedIncrement', 32),
          (12, 8792746496016, u'KernelBase.dll', u'WaitForSingleObjectEx',
           64)]
     }
     self.assertEqual(results, scout.crawl(test_binary))
Beispiel #9
0
 def testLoadDb(self):
     this_dir = os.path.abspath(os.path.join(os.path.dirname(__file__)))
     db_path = os.path.join(this_dir, "minimal_db.json")
     scout = ApiScout(db_path)
     expected_maps = {
         u'Windows 7': {
             8792746496016:
             (u'KernelBase.dll', u'WaitForSingleObjectEx', 64),
             2105895504: (u'KernelBase.dll', u'InterlockedIncrement', 32),
             4119: (u'noversion.dll', u'SomeAPI', 32)
         }
     }
     self.assertEqual(expected_maps, scout.api_maps)
     with self.assertRaises(ValueError):
         scout.loadDbFile("Error")
Beispiel #10
0
 def _initScout(self, db_paths, winapi1024_path):
     scout = ApiScout()
     # override potential ASLR offsets that are stored in the API DB files.
     scout.ignoreAslrOffsets(True)
     # load DB file
     for db_path in db_paths:
         scout.loadDbFile(db_path)
     # load WinApi1024 vector
     scout.loadWinApi1024(winapi1024_path)
     return scout
Beispiel #11
0
 def testRender(self):
     scout = ApiScout()
     results = {
         'test_1': [(16, 0x1032, 'test32.dll', 'TestApi32', 32),
                    (40, 0x1064, 'test64.dll', 'TestApi64', 64)]
     }
     expected_hits = [
         'Results for API DB: test_1',
         'idx: offset    ; VA                ; DLL                           ; API',
         '  1: 0x00000010;         0x00001032; test32.dll (32bit)            ; TestApi32',
         '---------------------------------------------------------------------------------------------------------------------------------',
         '  2: 0x00000028; 0x0000000000001064; test64.dll (64bit)            ; TestApi64',
         'DLLs: 2, APIs: 2'
     ]
     rendered = scout.render(results)
     for hit in expected_hits:
         self.assertTrue(hit in rendered)
     expected_no_result = "No results for API map: test_2\n"
     self.assertEqual(expected_no_result, scout.render({"test_2": []}))
Beispiel #12
0
    def __init__(
        self,
        host,
        port,
        user,
        password,
        threshold=40,
        secure=False,
        filepath=None,
        filename=None,
        folder_path=None,
    ):
        """Connects to neo4j database, loads options and set connectors.
        @raise CuckooReportError: if unable to connect.
        """
        self.threshold = int(threshold)
        self.graph = Graph(host=host,
                           user=user,
                           password=password,
                           secure=secure,
                           port=port)
        self.filepath = filepath
        self.filename = filename
        self.folder_path = folder_path
        self.scout = ApiScout()
        self.scout.setBaseAddress(0)
        self.scout.loadWinApi1024(
            os.path.abspath(os.path.join(os.path.dirname(__file__))) + os.sep +
            "data" + os.sep + "winapi1024v1.txt")

        self.magictest = magic.Magic(uncompress=True)
        CWD = os.path.abspath(os.path.dirname(__file__))
        USERDB = os.path.join(CWD, os.path.normpath("data/UserDB.TXT"))
        with open(USERDB, "rt") as f:
            sig_data = f.read()
            self.signatures = peutils.SignatureDatabase(data=sig_data)

        if self.folder_path:
            self.files = self.get_files(folder_path)
Beispiel #13
0
 def testCrawlToyData(self):
     test_binary = b"\00" * 0x10 + b"\x34\x12\x00\x00" + b"\00" * 0x10 + b"\x78\x56\x00\x00\x00\x00\x00\x00" + b"\00" * 0x10
     scout = ApiScout()
     scout.api_maps["test_1"] = {0x1234: ("test.dll", "TestApi", 32)}
     scout.api_maps["test_2"] = {0x5678: ("test2.dll", "TestApi2", 64)}
     scout.has_64bit = True
     results = {
         'test_2': [(36, 22136, 'test2.dll', 'TestApi2', 64, None, 1)],
         'test_1': [(16, 4660, 'test.dll', 'TestApi', 32, None, 1)]
     }
     self.assertEqual(results, scout.crawl(test_binary))
Beispiel #14
0
 def testCrawlToyData(self):
     test_binary = "\00" * 0x10 + struct.pack(
         "I", 0x1234) + "\00" * 0x10 + struct.pack("Q",
                                                   0x5678) + "\00" * 0x10
     scout = ApiScout()
     scout.api_maps["test_1"] = {0x1234: ("test.dll", "TestApi", 32)}
     scout.api_maps["test_2"] = {0x5678: ("test2.dll", "TestApi2", 64)}
     scout.has_64bit = True
     results = {
         'test_2': [(36, 22136, 'test2.dll', 'TestApi2', 64)],
         'test_1': [(16, 4660, 'test.dll', 'TestApi', 32)]
     }
     self.assertEqual(results, scout.crawl(test_binary))
Beispiel #15
0
def main():
    parser = argparse.ArgumentParser(
        description='Demo: Use apiscout to match WinApi1024 vectors.')
    parser.add_argument('vector_a',
                        type=str,
                        default='',
                        help='compressed version of first vector.')
    parser.add_argument('-v',
                        '--vector_b',
                        type=str,
                        default='',
                        help='compressed version of second vector.')
    parser.add_argument('-c',
                        '--collection',
                        type=str,
                        default='',
                        help='Path to a collection of compressed vectors.')
    parser.add_argument('-n',
                        '--max_results',
                        type=int,
                        default=5,
                        help='Maximum number of family results to show.')

    args = parser.parse_args()
    scout = ApiScout()
    # load WinApi1024 vector
    scout.loadWinApi1024(get_winapi1024_path())
    if args.vector_a and args.vector_b:
        score = scout.matchVectors(args.vector_a, args.vector_b)
        print("Result of matching vectors:")
        print("Vector A: {}".format(args.vector_a))
        print("Vector B: {}".format(args.vector_b))
        print("Score: {}".format(score))
    elif args.vector_a and args.collection:
        collection_result = scout.matchVectorCollection(
            args.vector_a, args.collection)
        print(
            scout.renderVectorCollectionResults(collection_result,
                                                args.max_results))
    else:
        parser.print_help()
Beispiel #16
0
class Api():
    """Stores and analyze malwares info in neo4j"""
    def __init__(self,
                 host,
                 port,
                 user,
                 password,
                 threshold=40,
                 secure=False,
                 filepath=None,
                 filename=None,
                 folder_path=None):
        """Connects to neo4j database, loads options and set connectors.
        @raise CuckooReportError: if unable to connect.
        """
        self.threshold = int(threshold)
        self.graph = Graph(host=host,
                           user=user,
                           password=password,
                           secure=secure,
                           port=port)
        self.filepath = filepath
        self.filename = filename
        self.folder_path = folder_path
        self.scout = ApiScout()
        self.scout.setBaseAddress(0)
        self.scout.loadWinApi1024(
            os.path.abspath(os.path.join(os.path.dirname(__file__))) + os.sep +
            "data" + os.sep + "winapi1024v1.txt")

        self.magictest = magic.Magic(uncompress=True)
        CWD = os.path.abspath(os.path.dirname(__file__))
        USERDB = os.path.join(CWD, os.path.normpath("data/UserDB.TXT"))
        with open(USERDB, 'rt') as f:
            sig_data = f.read()
            self.signatures = peutils.SignatureDatabase(data=sig_data)

        if self.folder_path:
            self.files = self.get_files(folder_path)

    def check_file(self, f):
        if magic.from_file(f).find('PE32') == -1:
            return False
        if magic.from_file(f).find('self-extracting') != -1:
            return False
        try:
            pe = pefile.PE(f)
            matches = self.signatures.match_all(pe, ep_only=True)
            if matches:
                return False
            return True
        except:
            return False

    def get_files(self, folder_path):
        files_end = []

        files = []
        for root, dirnames, filenames in os.walk(folder_path):
            for filename in fnmatch.filter(filenames, '*'):
                files.append(os.path.join(root, filename))

        for filepath in files:
            if not self.check_file(filepath):
                continue
            json_path = "/".join(filepath.split(
                "/")[:-2]) + "/" + filepath.split("/")[-3] + ".json"
            if not os.path.exists(json_path):
                json_path = "/".join(filepath.split(
                    "/")[:-1]) + "/" + filepath.split("/")[-2] + ".json"
            if not os.path.exists(json_path):
                continue
            with open(json_path, 'r') as f:
                file_family = json.loads("".join([
                    str(x) for x in f.readlines()
                ])).get('common_name').replace(" ", "_")
            files_end.append((filepath, file_family))

        print(len(files_end), "Files da caricare")
        return files_end

    def get_digest(self, file):
        """ return hash, impuzzy and scout """
        md5 = hashlib.md5()
        sha1 = hashlib.sha1()
        sha256 = hashlib.sha256()

        try:
            impfuzzy = pyimpfuzzy.get_impfuzzy(file)
        except:
            impfuzzy = ""

        if os.path.isfile(file):
            with open(file, "rb") as f_binary:
                binary = f_binary.read()
        try:
            scout_ev = self.scout.evaluateImportTable(binary, is_unmapped=True)
            scout_result = self.scout.getWinApi1024Vectors(scout_ev).get(
                'import_table', {}).get('vector', None)
            scout_confidence = self.scout._apivector.getVectorConfidence(
                scout_result)
        except:
            with open('fail_list.txt', 'a') as f:
                f.write(file + "\n")
            scout_result = None
            scout_confidence = None

        with open(file, "rb") as f:
            while True:
                buf = f.read(2047)
                if not buf:
                    break
                md5.update(buf)
                sha1.update(buf)
                sha256.update(buf)

        return scout_result, impfuzzy, md5.hexdigest(), sha1.hexdigest(
        ), sha256.hexdigest(), scout_confidence

    def impfuzzy_comp(self, list, list_new):
        ssdeep = re.compile("^[0-9]{1,5}:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$",
                            re.DOTALL)
        complist = []
        list_len = len(list_new)
        i = 0
        for item_new in list_new:
            i += 1
            if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
                for j in range(i, list_len):
                    if re.search(ssdeep,
                                 list_new[j][2]) and len(list_new[j][2]) < 150:
                        complist.append([
                            item_new[0], list_new[j][0],
                            pyimpfuzzy.hash_compare(item_new[2],
                                                    list_new[j][2])
                        ])

        if list:
            for item_new in list_new:
                if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
                    for item in list:
                        if re.search(ssdeep, item[2]) and len(item[2]) < 150:
                            complist.append([
                                item_new[0], item[0],
                                pyimpfuzzy.hash_compare(item_new[2], item[2])
                            ])

        return complist

    def scout_comp(self, list, list_new):
        complist = []
        list_len = len(list_new)
        i = 0
        for item_new in list_new:
            i += 1
            for j in range(i, list_len):
                complist.append([
                    item_new[0], list_new[j][0],
                    int(
                        self.scout.matchVectors(item_new[3], list_new[j][3]) *
                        100)
                ])
        for item_new in list_new:
            for item in list:
                complist.append([
                    item_new[0], item[0],
                    int(self.scout.matchVectors(item_new[3], item[3]) * 100)
                ])
        return complist

    def process(self):

        hashlist = []
        hashlist_new = []
        nodes = []
        edges = []
        relationships = []

        # recover all actual data
        database = self.graph.run(
            "MATCH (m:Malware) RETURN m.id, m.name, m.impfuzzy, m.scout_result, m.scout_confidence, m.md5, m.sha1, m.sha256, m.tag"
        ).data()
        if database:
            for d in database:
                hashlist.append([
                    d["m.id"], d["m.name"], d["m.impfuzzy"],
                    d["m.scout_result"], d["m.scout_confidence"], d["m.md5"],
                    d["m.sha1"], d["m.sha256"], d["m.tag"]
                ])

        nodes_count = len(database)
        i = nodes_count

        relation_data = self.graph.run(
            "MATCH (m1:Malware)-[s:same]-(m2:Malware) RETURN m1.id, m2.id, s.value"
        ).data()
        if relation_data:
            for r in relation_data:
                relationships.append([r["m1.id"], r["m2.id"], r["s.value"]])
        for x in range(nodes_count):
            nodes.append(x)

        # if massive check for each file
        if self.folder_path:
            for item in self.files:
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    item[0])
                if scout_result in ("", 'A171', None):
                    continue

                query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
                objs = self.graph.run(query).data()
                if not objs and sha256 not in [x[5] for x in hashlist_new]:
                    nodes.append(i)
                    hashlist_new.append([
                        i, item[0].split("/")[-1], impfuzzy, scout_result,
                        scout_confidence, md5, sha1, sha256, item[1]
                    ])
                    i += 1
                else:
                    continue
        else:
            # if single we are in the reporting module
            # if file is tested it need to have valid apiscout vector
            if self.check_file(self.filepath):
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    self.filepath)
                if scout_result in ("", 'A171', None):
                    return {}
            else:
                return {}

            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256

            objs = self.graph.run(query).data()
            if not objs:
                nodes.append(nodes_count)
                hashlist_new.append([
                    nodes_count, self.filename, impfuzzy, scout_result,
                    scout_confidence, md5, sha1, sha256, None
                ])
            else:
                return self.search_hash(sha256)

        # Calculate apiscout correlation
        result_list = self.scout_comp(hashlist, hashlist_new)

        if len(database) != len(nodes):
            for edge in result_list + relationships:
                if edge[2] > self.threshold:
                    edges.append([[edge[0], edge[1]], edge[2]])
                else:
                    edges.append([[edge[0], edge[1]], 0])
            pyl = PyLouvain(nodes, edges)
            partition, modularity = pyl.apply_method()

        # Create node
        tx = self.graph.begin()

        for hash in hashlist_new + hashlist:
            i = 0
            for a in partition:
                i += 1
                if hash[0] in a:
                    tx.append(
                        statement_c, {
                            "id": hash[0],
                            "name": hash[1],
                            "impfuzzy": hash[2],
                            "scout_result": hash[3],
                            "scout_confidence": hash[4],
                            "md5": hash[5],
                            "sha1": hash[6],
                            "sha256": hash[7],
                            "tag": hash[8],
                            "cluster": i
                        })

        # Create relationship
        for result in result_list:
            if result[2] > self.threshold:
                tx.append(statement_r, {
                    "id1": result[0],
                    "id2": result[1],
                    "value_scout": result[2]
                })

        tx.process()
        tx.commit()

        # recover info
        if self.filename:
            return self.search_hash(sha256)

    def process_file(self, filepath, filename):
        self.filepath = filepath
        self.filename = filename
        return self.process()

    def search_hash(self, data):

        return_dict = {}

        # identify hash type
        HASHES = (
            ("md5", "^[a-fA-F0-9]{32}$"),
            ("sha1", "^[a-fA-F0-9]{40}$"),
            ("sha256", "^[a-fA-F0-9]{64}$"),
        )
        res = None
        for items in HASHES:
            if re.match(items[1], data):
                res = items[0]
        # No hash type match return
        if res == None:
            return {}

        family_query = "MATCH (m1:Malware) WHERE m1.%s=\"%s\" MATCH (m1:Malware)-[s:same]-(m2:Malware) MATCH (m2:Malware) WHERE m2.cluster = m1.cluster RETURN distinct m2.tag as tag, max(s.value) as max order by max(s.value) desc" % (
            res, data)
        file_query = "MATCH (m1:Malware) WHERE m1.%s=\"%s\" MATCH (m1:Malware)-[s:same]-(m2:Malware) MATCH (m2:Malware) WHERE m2.cluster = m1.cluster RETURN m2.tag as tag, m2.sha256 as sha256, max(s.value) as max order by max(s.value) desc LIMIT 10" % (
            res, data)
        cluster_count_query = "MATCH (m1:Malware) where m1.%s=\"%s\" MATCH (m2:Malware)-[p:same]->(m3:Malware) where m2.cluster = m1.cluster and m3.cluster = m1.cluster RETURN count(p.value) as total" % (
            res, data)
        cluster_query = "MATCH (m1:Malware) where m1.%s=\"%s\" MATCH (m2:Malware)-[p:same]->(m3:Malware) where m2.cluster = m1.cluster and m3.cluster = m1.cluster RETURN m2.sha256, m2.tag, p.value, m3.sha256, m3.cluster, m3.tag" % (
            res, data)
        item_query = "MATCH (m:Malware) WHERE m.%s=\"%s\" RETURN m" % (res,
                                                                       data)
        item_data = self.graph.run(item_query).data()
        cluster_count_list = self.graph.run(cluster_count_query).data()

        cluster_count = cluster_count_list[0]['total'] if len(
            cluster_count_list) > 0 else None
        return_dict['info'] = item_data[0]['m'] if len(item_data) > 0 else None

        family_objs = self.graph.run(family_query).data()
        if family_objs:
            return_dict['families'] = family_objs
            return_dict['files'] = self.graph.run(file_query).data()
            if cluster_count and cluster_count < 100:
                return_dict['cluster'] = self.graph.run(cluster_query).data()
            else:
                return_dict['cluster_count'] = cluster_count
        return return_dict
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Demo: Use apiscout with a prepared api database (created using DatabaseBuilder.py) to crawl a dump for imports and render the results.'
    )
    parser.add_argument(
        '-f',
        '--filter',
        type=int,
        default=0,
        help='Filter out APIs that do not have a neighbour within N bytes.')
    parser.add_argument(
        '-i',
        '--ignore_aslr',
        action='store_true',
        help=
        'Do not apply the per-module ASLR offset potentially contained in a API DB file.'
    )
    parser.add_argument(
        '-c',
        '--collection_file',
        type=str,
        default='',
        help=
        'Optionally match the output against a WinApi1024 vector collection file.'
    )
    parser.add_argument(
        '-b',
        '--base_addr',
        type=str,
        default='',
        help='Set base address to given value (int or 0x-hex format).')
    parser.add_argument(
        '-t',
        '--import_table_only',
        action='store_true',
        help=
        'Do not crawl for API references but only parse the import table instead - assumes an unmapped PE file as input.'
    )
    parser.add_argument('binary_path',
                        type=str,
                        default='',
                        help='Path to the memory dump to crawl.')
    parser.add_argument(
        'db_path',
        type=str,
        nargs='*',
        help=
        'Path to the DB(s). If no argument is given, use all files found in "./dbs"'
    )

    args = parser.parse_args()
    if args.binary_path:
        binary = ""
        if os.path.isfile(args.binary_path):
            with open(args.binary_path, "rb") as f_binary:
                binary = f_binary.read()
        scout = ApiScout()
        base_addr = get_base_addr(args)
        print("Using base adress 0x{:x} to infer reference counts.".format(
            base_addr))
        scout.setBaseAddress(base_addr)
        # override potential ASLR offsets that are stored in the API DB files.
        scout.ignoreAslrOffsets(args.ignore_aslr)
        # load DB file
        db_paths = []
        if args.db_path:
            db_paths = args.db_path
        elif not args.import_table_only:
            db_paths = get_all_db_files()
        for db_path in db_paths:
            scout.loadDbFile(db_path)
        # load WinApi1024 vector
        scout.loadWinApi1024(get_winapi1024_path())
        # scout the binary
        results = {}
        if args.import_table_only:
            print("Parsing Import Table for\n  {}.".format(args.binary_path))
            results = scout.evaluateImportTable(binary, is_unmapped=True)
        else:
            print("Using \n  {}\nto analyze\n  {}.".format(
                "\n  ".join(db_paths), args.binary_path))
            num_apis_loaded = scout.getNumApisLoaded()
            filter_info = " - neighbour filter: 0x%x" % args.filter if args.filter else ""
            print("Buffer size is {} bytes, {} APIs loaded{}.\n".format(
                len(binary), num_apis_loaded, filter_info))
            results = scout.crawl(binary)
        filtered_results = scout.filter(results, 0, 0, args.filter)
        print(scout.render(filtered_results))
        print(scout.renderVectorResults(filtered_results))
        if args.collection_file:
            print(
                scout.renderResultsVsCollection(filtered_results,
                                                args.collection_file))
    else:
        parser.print_help()
Beispiel #18
0
 def testCompleteCoverage(self):
     scout = ApiScout()
     scout.setBaseAddress(0x1000)
     self.assertEqual(scout.base_address, 0x1000)
Beispiel #19
0
def test_base(x=vector_list[0], y=vector_list[1]):
    from apiscout.ApiScout import ApiScout
    _apiscout = ApiScout()
    _apiscout.setBaseAddress(0)
    _apiscout.loadWinApi1024('data/winapi1024v1.txt')
    return _apiscout.matchVectors(x, y)