print("StdCall Extractor started!") # Training set or test set? if sys.argv[1] == "train": main_path = os.path.join(root_path, "train") else: main_path = os.path.join(root_path, "test") # Generate paths data_dir = os.path.join(main_path, os.path.join("data", "stdcall")) data = [ os.path.join(data_dir, file) for file in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, file)) ] split_data = chunks(data, numWorkers) # Directory name output_dir = os.path.join(main_path, stdcall_feat_dir) clearPath(output_dir) # Parallel threading jobs = [] for item in enumerate(split_data): p = Process(target=worker, args=(item, output_dir)) jobs.append(p) p.start() for j in jobs: j.join()
# Save features pickle.dump(feat, open(os.path.join(output_dir, fn), "wb")) # Update progressbar pbar.update(idx + 1) pbar.finish() if __name__ == "__main__": print("Feature extractor started!") # Which dataset? Train or test? if sys.argv[1] == "train": main_path = os.path.join(root_path, "train") split_data = chunks(train_asm, numWorkers) else: main_path = os.path.join(root_path, "test") split_data = chunks(test_asm, numWorkers) # Directory name output_dir = os.path.join(main_path, feat_dir) clearPath(output_dir) # Multi threading jobs = [] for item in enumerate(split_data): p = Process(target=worker, args=(item, output_dir)) jobs.append(p) p.start()
# Save data data = calcGistFeatures(r.readBytes(item), kernels) pickle.dump(data, open(os.path.join(output_dir, fn), "wb")) # Update progressbar pbar.update(idx + 1) pbar.finish() if __name__ == "__main__": print("Byte image features extraction started!") if sys.argv[1] == "train": main_path = os.path.join(root_path, "train") split_data = chunks(train_bytes, numWorkers) else: main_path = os.path.join(root_path, "test") split_data = chunks(test_bytes, numWorkers) output_dir = os.path.join(main_path, byte_image_feat_dir) clearPath(output_dir) jobs = [] for item in enumerate(split_data): p = Process(target=worker, args=(item, output_dir)) jobs.append(p) p.start() for j in jobs: