def testLoadFingerprintCollectionFile(self): THIS_FILE_PATH = str(os.path.abspath(__file__)) PROJECT_ROOT = str(os.path.abspath(os.sep.join([THIS_FILE_PATH, "..", ".."]))) collection_path = os.sep.join([PROJECT_ROOT , "dbs", "collection_example.csv"]) apivector = ApiVector() loaded_vectors = apivector._loadCollectionData(collection_path) self.assertTrue("win.urlzone" in loaded_vectors)
def vectorize(dataset, logs_folder, base, imports_type, data_dir, dataset_file=None, out_file=None): api_vector = ApiVector(base) vector_base = api_vector.getWinApi1024() vector_weights = detect_weights(os.path.basename(base), vector_base) vector_size = len(vector_base) if out_file is None: out_path = os.path.join(data_dir, "vectors") else: out_path = os.path.dirname(out_file) if not os.path.exists(out_path): try: os.makedirs(out_path) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise if out_file is None: out_file = os.path.join(out_path, "vectors_{}_{}_{}.json".format(imports_type, vector_weights, vector_size)) vectors_dict = { "dataset": unix_relpath(dataset_file, out_path) if dataset_file is not None else None, "base": { "path": unix_relpath(base, out_path), "imports_type": imports_type, "weights": vector_weights, "size": vector_size }, "vectors": {}, "coverage": { "average": 0, "median": 0, "details": {} } } for sample in dataset: sample_log_path = os.path.join(logs_folder, "{}.json".format(sample)) with open(sample_log_path, mode="r") as log_file: apis = json.load(log_file, cls=decoder, list_type=set) if imports_type == "all": apis = merge_two_dicts(apis["it"], apis["dynamic"]) else: apis = apis[imports_type] result = api_vector.getApiVectorFromApiDictionary(apis)["user_list"] coverage = result["percentage"] vectors_dict["vectors"][sample] = result["vector"] vectors_dict["coverage"]["details"][sample] = coverage vectors_dict["coverage"]["average"] += coverage vectors_dict["coverage"]["average"] /= len(vectors_dict["coverage"]["details"]) vectors_dict["coverage"]["median"] = median(list(vectors_dict["coverage"]["details"].values())) with open(out_file, mode="w") as vectors_file: json.dump(vectors_dict, vectors_file, indent=4)
def setUpClass(cls): super(ApiVectorTestSuite, cls).setUpClass() cls.vector = ApiVector() cls.vector._winapi1024 = [ ("kernel32.dll", "CreateActCtx", "execution", 928), ("kernel32.dll", "DeactivateActCtx", "execution", 927), ("kernel32.dll", "ReleaseActCtx", "execution", 922), ("ole32.dll", "CoCreateInstance", "execution", 105), ("ole32.dll", "CoCreateInstanceEx", "execution", 547), ("ole32.dll", "CoGetClassObject", "execution", 644), ("kernel32.dll", "Module32First", "execution", 445), ("kernel32.dll", "Module32Next", "execution", 560), ("advapi32.dll", "CreateService", "execution", 360), ("advapi32.dll", "DeleteService", "execution", 299) ] cls.vector._dllapi_only = [ ("kernel32.dll", "CreateActCtx"), ("kernel32.dll", "DeactivateActCtx"), ("kernel32.dll", "ReleaseActCtx"), ("ole32.dll", "CoCreateInstance"), ("ole32.dll", "CoCreateInstanceEx"), ("ole32.dll", "CoGetClassObject"), ("kernel32.dll", "Module32First"), ("kernel32.dll", "Module32Next"), ("advapi32.dll", "CreateService"), ("advapi32.dll", "DeleteService"), ] cls.vector._vector_ranks_only = [ 928, 927, 922, 105, 547, 644, 445, 560, 360, 299 ]
def main(args): with open(args.vectors_file, mode="r") as fp: vectors_file = json.load(fp) vectors = vectors_file["vectors"] vectors_dir = os.path.dirname(args.vectors_file) jobs_list = list(combinations(vectors.keys(), 2)) splitted_jobs = split_jobs(jobs_list, args.threads) logging.debug("Jobs count: {}".format(len(jobs_list))) logging.debug(" For each worker: {}".format([len(splitted) for splitted in splitted_jobs])) vectorbase_path = os.path.normpath(os.path.join(vectors_dir, vectors_file["base"]["path"])) if not os.path.isfile(vectorbase_path): logging.error("VectorBase file '{}' does not exist".format(vectorbase_path)) sys.exit(1) scores = {} freeze_support() # for Windows support with Pool(args.threads, initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) as pool: api_vector = ApiVector(vectorbase_path) logging.info("Calculating the scores...") results = [] for i in range(args.threads): task_args = (i, args.threads, api_vector, splitted_jobs[i], vectors, args.verbose) results.append(pool.apply_async(worker_function, args=task_args)) pool.close() pool.join() logging.info(" COMPLETED") logging.info("Merging the results... ") scores = merge_dictionaries([res.get() for res in results]) logging.info(" COMPLETED") out_dir = os.path.join(os.path.dirname(vectors_dir), "scores") base = vectors_file["base"] if args.out_file is None: args.out_file = os.path.join(out_dir, "scores_{}_{}_{}.json".format(base["imports_type"], base["weights"], base["size"])) if not os.path.exists(os.path.dirname(args.out_file)): try: os.makedirs(os.path.dirname(args.out_file)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise dataset_path = os.path.normpath(os.path.join(vectors_dir, vectors_file["dataset"])) scores_dict = { "dataset": os.path.relpath(dataset_path, out_dir), "base": vectors_file["base"], "scores": scores } scores_dict["base"]["path"] = os.path.relpath(vectorbase_path, out_dir) with open(args.out_file, mode="w") as opf: logging.info("Dumping scores to '{}'... ".format(os.path.basename(args.out_file))) json.dump(scores_dict, opf, indent=4) logging.info(" COMPLETED")
def testVectorCompression(self): apivector = ApiVector() for exponent in range(3, 13, 1): vector_length = 2**exponent apivector._winapi1024 = [x for x in range(vector_length)] random_vector = [ random.randint(0, 1) for i in range(vector_length) ] compressed = apivector.compress(random_vector) decompressed = apivector.decompress(compressed) n_decompressed = apivector.n_decompress(compressed) self.assertEqual(vector_length, len(decompressed)) self.assertEqual(vector_length, len(n_decompressed))
def testLoadWinApi1024Definition(self): module_path = os.path.dirname(os.path.realpath(apiscout.__file__)) LOG.info("Using module path %s" % module_path) winapi_path = os.sep.join([module_path, "data", "winapi1024v1.txt"]) apivector = ApiVector(winapi_path) self.assertEquals(len(apivector._winapi1024), 1024)
def testLoadWinApi1024Definition(self): THIS_FILE_PATH = str(os.path.abspath(__file__)) PROJECT_ROOT = str(os.path.abspath(os.sep.join([THIS_FILE_PATH, "..", ".."]))) winapi_path = os.sep.join([PROJECT_ROOT , "data", "winapi1024v1.txt"]) apivector = ApiVector(winapi_path) self.assertEquals(len(apivector._winapi1024), 1024)