Python ApiScout.matchVectorsの例

プログラミング言語: Python

名前空間/パッケージ名: apiscout.ApiScout

クラス/型: ApiScout

メソッド/関数: matchVectors

hotexamples.comのコード掲載数: 3

Python ApiScout.matchVectors - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのapiscout.ApiScout.ApiScout.matchVectorsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ApiScout(17)

crawl(6)

setBaseAddress(5)

loadDbFile(5)

loadWinApi1024(5)

filter(4)

ignoreAslrOffsets(4)

render(3)

matchVectors(3)

evaluateImportTable(2)

getNumApisLoaded(2)

has_64bit(2)

renderVectorResults(1)

renderVectorCollectionResults(1)

renderResultsVsCollection(1)

iterateAllQwords(1)

matchVectorCollection(1)

_resolveApiByAddress(1)

iterateAllDwords(1)

getWinApi1024Vectors(1)

setLoadOffset(1)

コード例 #1

ファイルを表示

def main():
    parser = argparse.ArgumentParser(
        description='Demo: Use apiscout to match WinApi1024 vectors.')
    parser.add_argument('vector_a',
                        type=str,
                        default='',
                        help='compressed version of first vector.')
    parser.add_argument('-v',
                        '--vector_b',
                        type=str,
                        default='',
                        help='compressed version of second vector.')
    parser.add_argument('-c',
                        '--collection',
                        type=str,
                        default='',
                        help='Path to a collection of compressed vectors.')
    parser.add_argument('-n',
                        '--max_results',
                        type=int,
                        default=5,
                        help='Maximum number of family results to show.')

    args = parser.parse_args()
    scout = ApiScout()
    # load WinApi1024 vector
    scout.loadWinApi1024(get_winapi1024_path())
    if args.vector_a and args.vector_b:
        score = scout.matchVectors(args.vector_a, args.vector_b)
        print("Result of matching vectors:")
        print("Vector A: {}".format(args.vector_a))
        print("Vector B: {}".format(args.vector_b))
        print("Score: {}".format(score))
    elif args.vector_a and args.collection:
        collection_result = scout.matchVectorCollection(
            args.vector_a, args.collection)
        print(
            scout.renderVectorCollectionResults(collection_result,
                                                args.max_results))
    else:
        parser.print_help()

コード例 #2

ファイルを表示

ファイル: main.py プロジェクト: henryiii/malwareclustering

def test_base(x=vector_list[0], y=vector_list[1]):
    from apiscout.ApiScout import ApiScout
    _apiscout = ApiScout()
    _apiscout.setBaseAddress(0)
    _apiscout.loadWinApi1024('data/winapi1024v1.txt')
    return _apiscout.matchVectors(x, y)

コード例 #3

ファイルを表示

class Api():
    """Stores and analyze malwares info in neo4j"""
    def __init__(self,
                 host,
                 port,
                 user,
                 password,
                 threshold=40,
                 secure=False,
                 filepath=None,
                 filename=None,
                 folder_path=None):
        """Connects to neo4j database, loads options and set connectors.
        @raise CuckooReportError: if unable to connect.
        """
        self.threshold = int(threshold)
        self.graph = Graph(host=host,
                           user=user,
                           password=password,
                           secure=secure,
                           port=port)
        self.filepath = filepath
        self.filename = filename
        self.folder_path = folder_path
        self.scout = ApiScout()
        self.scout.setBaseAddress(0)
        self.scout.loadWinApi1024(
            os.path.abspath(os.path.join(os.path.dirname(__file__))) + os.sep +
            "data" + os.sep + "winapi1024v1.txt")

        self.magictest = magic.Magic(uncompress=True)
        CWD = os.path.abspath(os.path.dirname(__file__))
        USERDB = os.path.join(CWD, os.path.normpath("data/UserDB.TXT"))
        with open(USERDB, 'rt') as f:
            sig_data = f.read()
            self.signatures = peutils.SignatureDatabase(data=sig_data)

        if self.folder_path:
            self.files = self.get_files(folder_path)

    def check_file(self, f):
        if magic.from_file(f).find('PE32') == -1:
            return False
        if magic.from_file(f).find('self-extracting') != -1:
            return False
        try:
            pe = pefile.PE(f)
            matches = self.signatures.match_all(pe, ep_only=True)
            if matches:
                return False
            return True
        except:
            return False

    def get_files(self, folder_path):
        files_end = []

        files = []
        for root, dirnames, filenames in os.walk(folder_path):
            for filename in fnmatch.filter(filenames, '*'):
                files.append(os.path.join(root, filename))

        for filepath in files:
            if not self.check_file(filepath):
                continue
            json_path = "/".join(filepath.split(
                "/")[:-2]) + "/" + filepath.split("/")[-3] + ".json"
            if not os.path.exists(json_path):
                json_path = "/".join(filepath.split(
                    "/")[:-1]) + "/" + filepath.split("/")[-2] + ".json"
            if not os.path.exists(json_path):
                continue
            with open(json_path, 'r') as f:
                file_family = json.loads("".join([
                    str(x) for x in f.readlines()
                ])).get('common_name').replace(" ", "_")
            files_end.append((filepath, file_family))

        print(len(files_end), "Files da caricare")
        return files_end

    def get_digest(self, file):
        """ return hash, impuzzy and scout """
        md5 = hashlib.md5()
        sha1 = hashlib.sha1()
        sha256 = hashlib.sha256()

        try:
            impfuzzy = pyimpfuzzy.get_impfuzzy(file)
        except:
            impfuzzy = ""

        if os.path.isfile(file):
            with open(file, "rb") as f_binary:
                binary = f_binary.read()
        try:
            scout_ev = self.scout.evaluateImportTable(binary, is_unmapped=True)
            scout_result = self.scout.getWinApi1024Vectors(scout_ev).get(
                'import_table', {}).get('vector', None)
            scout_confidence = self.scout._apivector.getVectorConfidence(
                scout_result)
        except:
            with open('fail_list.txt', 'a') as f:
                f.write(file + "\n")
            scout_result = None
            scout_confidence = None

        with open(file, "rb") as f:
            while True:
                buf = f.read(2047)
                if not buf:
                    break
                md5.update(buf)
                sha1.update(buf)
                sha256.update(buf)

        return scout_result, impfuzzy, md5.hexdigest(), sha1.hexdigest(
        ), sha256.hexdigest(), scout_confidence

    def impfuzzy_comp(self, list, list_new):
        ssdeep = re.compile("^[0-9]{1,5}:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$",
                            re.DOTALL)
        complist = []
        list_len = len(list_new)
        i = 0
        for item_new in list_new:
            i += 1
            if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
                for j in range(i, list_len):
                    if re.search(ssdeep,
                                 list_new[j][2]) and len(list_new[j][2]) < 150:
                        complist.append([
                            item_new[0], list_new[j][0],
                            pyimpfuzzy.hash_compare(item_new[2],
                                                    list_new[j][2])
                        ])

        if list:
            for item_new in list_new:
                if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
                    for item in list:
                        if re.search(ssdeep, item[2]) and len(item[2]) < 150:
                            complist.append([
                                item_new[0], item[0],
                                pyimpfuzzy.hash_compare(item_new[2], item[2])
                            ])

        return complist

    def scout_comp(self, list, list_new):
        complist = []
        list_len = len(list_new)
        i = 0
        for item_new in list_new:
            i += 1
            for j in range(i, list_len):
                complist.append([
                    item_new[0], list_new[j][0],
                    int(
                        self.scout.matchVectors(item_new[3], list_new[j][3]) *
                        100)
                ])
        for item_new in list_new:
            for item in list:
                complist.append([
                    item_new[0], item[0],
                    int(self.scout.matchVectors(item_new[3], item[3]) * 100)
                ])
        return complist

    def process(self):

        hashlist = []
        hashlist_new = []
        nodes = []
        edges = []
        relationships = []

        # recover all actual data
        database = self.graph.run(
            "MATCH (m:Malware) RETURN m.id, m.name, m.impfuzzy, m.scout_result, m.scout_confidence, m.md5, m.sha1, m.sha256, m.tag"
        ).data()
        if database:
            for d in database:
                hashlist.append([
                    d["m.id"], d["m.name"], d["m.impfuzzy"],
                    d["m.scout_result"], d["m.scout_confidence"], d["m.md5"],
                    d["m.sha1"], d["m.sha256"], d["m.tag"]
                ])

        nodes_count = len(database)
        i = nodes_count

        relation_data = self.graph.run(
            "MATCH (m1:Malware)-[s:same]-(m2:Malware) RETURN m1.id, m2.id, s.value"
        ).data()
        if relation_data:
            for r in relation_data:
                relationships.append([r["m1.id"], r["m2.id"], r["s.value"]])
        for x in range(nodes_count):
            nodes.append(x)

        # if massive check for each file
        if self.folder_path:
            for item in self.files:
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    item[0])
                if scout_result in ("", 'A171', None):
                    continue

                query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
                objs = self.graph.run(query).data()
                if not objs and sha256 not in [x[5] for x in hashlist_new]:
                    nodes.append(i)
                    hashlist_new.append([
                        i, item[0].split("/")[-1], impfuzzy, scout_result,
                        scout_confidence, md5, sha1, sha256, item[1]
                    ])
                    i += 1
                else:
                    continue
        else:
            # if single we are in the reporting module
            # if file is tested it need to have valid apiscout vector
            if self.check_file(self.filepath):
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    self.filepath)
                if scout_result in ("", 'A171', None):
                    return {}
            else:
                return {}

            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256

            objs = self.graph.run(query).data()
            if not objs:
                nodes.append(nodes_count)
                hashlist_new.append([
                    nodes_count, self.filename, impfuzzy, scout_result,
                    scout_confidence, md5, sha1, sha256, None
                ])
            else:
                return self.search_hash(sha256)

        # Calculate apiscout correlation
        result_list = self.scout_comp(hashlist, hashlist_new)

        if len(database) != len(nodes):
            for edge in result_list + relationships:
                if edge[2] > self.threshold:
                    edges.append([[edge[0], edge[1]], edge[2]])
                else:
                    edges.append([[edge[0], edge[1]], 0])
            pyl = PyLouvain(nodes, edges)
            partition, modularity = pyl.apply_method()

        # Create node
        tx = self.graph.begin()

        for hash in hashlist_new + hashlist:
            i = 0
            for a in partition:
                i += 1
                if hash[0] in a:
                    tx.append(
                        statement_c, {
                            "id": hash[0],
                            "name": hash[1],
                            "impfuzzy": hash[2],
                            "scout_result": hash[3],
                            "scout_confidence": hash[4],
                            "md5": hash[5],
                            "sha1": hash[6],
                            "sha256": hash[7],
                            "tag": hash[8],
                            "cluster": i
                        })

        # Create relationship
        for result in result_list:
            if result[2] > self.threshold:
                tx.append(statement_r, {
                    "id1": result[0],
                    "id2": result[1],
                    "value_scout": result[2]
                })

        tx.process()
        tx.commit()

        # recover info
        if self.filename:
            return self.search_hash(sha256)

    def process_file(self, filepath, filename):
        self.filepath = filepath
        self.filename = filename
        return self.process()

    def search_hash(self, data):

        return_dict = {}

        # identify hash type
        HASHES = (
            ("md5", "^[a-fA-F0-9]{32}$"),
            ("sha1", "^[a-fA-F0-9]{40}$"),
            ("sha256", "^[a-fA-F0-9]{64}$"),
        )
        res = None
        for items in HASHES:
            if re.match(items[1], data):
                res = items[0]
        # No hash type match return
        if res == None:
            return {}

        family_query = "MATCH (m1:Malware) WHERE m1.%s=\"%s\" MATCH (m1:Malware)-[s:same]-(m2:Malware) MATCH (m2:Malware) WHERE m2.cluster = m1.cluster RETURN distinct m2.tag as tag, max(s.value) as max order by max(s.value) desc" % (
            res, data)
        file_query = "MATCH (m1:Malware) WHERE m1.%s=\"%s\" MATCH (m1:Malware)-[s:same]-(m2:Malware) MATCH (m2:Malware) WHERE m2.cluster = m1.cluster RETURN m2.tag as tag, m2.sha256 as sha256, max(s.value) as max order by max(s.value) desc LIMIT 10" % (
            res, data)
        cluster_count_query = "MATCH (m1:Malware) where m1.%s=\"%s\" MATCH (m2:Malware)-[p:same]->(m3:Malware) where m2.cluster = m1.cluster and m3.cluster = m1.cluster RETURN count(p.value) as total" % (
            res, data)
        cluster_query = "MATCH (m1:Malware) where m1.%s=\"%s\" MATCH (m2:Malware)-[p:same]->(m3:Malware) where m2.cluster = m1.cluster and m3.cluster = m1.cluster RETURN m2.sha256, m2.tag, p.value, m3.sha256, m3.cluster, m3.tag" % (
            res, data)
        item_query = "MATCH (m:Malware) WHERE m.%s=\"%s\" RETURN m" % (res,
                                                                       data)
        item_data = self.graph.run(item_query).data()
        cluster_count_list = self.graph.run(cluster_count_query).data()

        cluster_count = cluster_count_list[0]['total'] if len(
            cluster_count_list) > 0 else None
        return_dict['info'] = item_data[0]['m'] if len(item_data) > 0 else None

        family_objs = self.graph.run(family_query).data()
        if family_objs:
            return_dict['families'] = family_objs
            return_dict['files'] = self.graph.run(file_query).data()
            if cluster_count and cluster_count < 100:
                return_dict['cluster'] = self.graph.run(cluster_query).data()
            else:
                return_dict['cluster_count'] = cluster_count
        return return_dict