def _reset(self): self.turns = 0 while True: # get the new environment if self.random_sample: self.sha256 = random.choice(self.available_sha256) else: # draw a sample at random self.sha256 = self.available_sha256[self.sample_iteration_index % len(self.available_sha256)] self.sample_iteration_index += 1 self.history[self.sha256] = {'actions': [], 'evaded': False} if self.cache: self.bytez = self.samples[self.sha256] else: try: self.bytez = interface.fetch_file(self.sha256) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage original_label = label_function(self.bytez) if original_label == 0: # skip this one, it's already benign, and the agent will learn nothing continue print("new sha256: {}".format(self.sha256)) self.observation_space = self.feature_extractor.extract(self.bytez) break # we're done here return np.asarray(self.observation_space)
def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=False): self.cache = cache self.available_sha256 = sha256list self.action_space = spaces.Discrete(len(ACTION_LOOKUP)) self.maxturns = maxturns self.feature_extractor = pefeatures.PEFeatureExtractor() self.random_sample = random_sample self.sample_iteration_index = 0 self.output_path = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), output_path) if not os.path.exists(output_path): os.makedirs(output_path) self.history = OrderedDict() self.samples = {} if self.cache: for sha256 in self.available_sha256: try: self.samples[sha256] = interface.fetch_file(self.sha256) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage self._reset()
def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=False): self.cache = cache self.available_sha256 = sha256list self.action_space = spaces.Discrete(len(ACTION_LOOKUP)) self.maxturns = maxturns self.feature_extractor = pefeatures.PEFeatureExtractor() self.random_sample = random_sample self.sample_iteration_index = 0 self.output_path = os.path.join( os.path.dirname( os.path.dirname( os.path.dirname( os.path.abspath(__file__)))), output_path) if not os.path.exists(output_path): os.makedirs(output_path) self.history = OrderedDict() self.samples = {} if self.cache: for sha256 in self.available_sha256: try: self.samples[sha256] = interface.fetch_file(self.sha256) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage self._reset()
def evaluate(action_function): success = [] misclassified = [] for sha256 in sha256_holdout: print("#########{}#########".format(sha256)) success_dict = defaultdict(list) bytez_o = interface.fetch_file(sha256) label = interface.get_label_windefender(bytez_o) if label == 0.0: misclassified.append(sha256) continue # already misclassified, move along bytez = bytez_o for _ in range(MAXTURNS): action = action_function(bytez) print(action) success_dict[sha256].append(action) bytez = manipulate.modify_without_breaking(bytez, [action]) new_label = interface.get_label_windefender(bytez) if new_label == 0.0: success.append(success_dict[sha256]) os.mkdir(PATH_SAVE + '/' + sha256) with open(PATH_SAVE + '/' + sha256 + '/orign', 'wb') as f: f.write(bytez_o) with open(PATH_SAVE + '/' + sha256 + '/modified', 'wb') as f: f.write(bytez) break return success, misclassified # evasion accuracy is len(success) / len(sha256_holdout)
def _reset(self): self.turns = 0 while True: # get the new environment if self.random_sample: self.sha256 = random.choice(self.available_sha256) else: # draw a sample at random self.sha256 = self.available_sha256[ self.sample_iteration_index % len(self.available_sha256) ] self.sample_iteration_index += 1 self.history[self.sha256] = {'actions': [], 'evaded': False} if self.cache: self.bytez = self.samples[self.sha256] else: try: self.bytez = interface.fetch_file(self.sha256) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage original_label = label_function(self.bytez) if original_label == 0: # skip this one, it's already benign, and the agent will learn nothing continue print("new sha256: {}".format(self.sha256)) self.observation_space = self.feature_extractor.extract(self.bytez) break # we're done here return np.asarray(self.observation_space)
def PCA_on_training_model(): file_list = interface.get_available_sha256() ex_list = np.array([ pefeatures.PEFeatureExtractor().extract(interface.fetch_file(b)) for b in file_list ]) print("all_samples: ", ex_list.shape) # nor_list = normalize(ex_list, axis=0) # nor_list = MinMaxScaler().fit_transform(ex_list) nor_list, data_min, data_max, scale_, min_ = MinMaxImp(ex_list) pca = PCA(n_components=0.99).fit(nor_list) U, S, V = pca._fit(nor_list) # dic_elements = {"n_component":pca.n_components_, "scale_":scale_, "min_":min_} dic_elements = {"n_component": pca.n_components_} np.save("pca_models/features.npy", ex_list) np.save("pca_models/nor_features.npy", nor_list) np.save("pca_models/U.npy", U) np.save("pca_models/S.npy", S) np.save("pca_models/V.npy", V) np.save("pca_models/scale.npy", scale_) np.save("pca_models/min.npy", min_) createDictCSV("pca_models/dic_elements.csv", dic_elements) print("reduced dimension: ", pca.n_components_) return ex_list, nor_list, U, S, V
def _reset(self): self.turns = 0 self.episode += 1 while True: # get the new environment if self.random_sample: self.sha256 = random.choice(self.available_sha256) else: # draw a sample at random self.sha256 = self.available_sha256[self.sample_iteration_index % len(self.available_sha256)] self.sample_iteration_index += 1 self.history[self.sha256] = {'actions': [], 'evaded': False} if self.cache: self.bytez = self.samples[self.sha256] else: try: self.bytez = interface.fetch_file(self.sha256, self.test) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage if self.test and self.episode > 0: with open( os.path.join( self.original_path, str(self.sample_iteration_index - 2) + self.sha256), 'wb') as outfile: outfile.write(self.bytez) original_label = label_function(self.bytez) if original_label == 0: # skip this one, it's already benign, and the graduation_agent will learn nothing continue self.tips = ' ' if not self.test else 'test ' if self.episode > 0: print( "--------------------------------------------------------------------------------" ) print("{}episode {} select training sample: {}".format( self.tips, self.episode, self.sha256)) if self.test: with open("test_log.txt", 'a+') as f: f.write("Process {} select sample: {}\n".format( self.sample_iteration_index - 2, self.sha256)) print( "--------------------------------------------------------------------------------" ) # self.observation_space = self.feature_extractor.extract2(self.bytez) # PCA self.observation_space = self.compute_observation(self.bytez) break # we're done here return np.asarray(self.observation_space)
def _reset(self): self.turns = 0 self.episode += 1 while True: # get the new environment if self.random_sample: self.sha256 = random.choice(self.available_sha256) else: # draw a sample at random self.sha256 = self.available_sha256[self.sample_iteration_index % len(self.available_sha256)] self.sample_iteration_index += 1 self.history[self.sha256] = {'actions': [], 'evaded': False} if self.cache: self.bytez = self.samples[self.sha256] else: try: self.bytez = interface.fetch_file(self.sha256) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage self.original_score = score_function(self.bytez) if self.original_score < malicious_threshold: # skip this one, it's already benign, and the graduation_agent will learn nothing continue if self.episode > 0: print( "--------------------------------------------------------------------------------" ) print("episode {} select training sample: {}".format( self.episode, self.sha256)) print( "--------------------------------------------------------------------------------" ) self.observation_space = self.feature_extractor.extract(self.bytez) break # we're done here return np.asarray(self.observation_space)
def evaluate(action_function): success = [] misclassified = [] for sha256 in sha256_holdout: success_dict = defaultdict(list) bytez = interface.fetch_file(sha256) label = interface.get_label_local(bytez) if label == 0.0: misclassified.append(sha256) continue # already misclassified, move along for _ in range(MAXTURNS): action = action_function(bytez) print(action) success_dict[sha256].append(action) bytez = manipulate.modify_without_breaking(bytez, [action]) new_label = interface.get_label_local(bytez) if new_label == 0.0: success.append(success_dict) break return success, misclassified # evasion accuracy is len(success) / len(sha256_holdout)
def evaluate( action_function ): success=[] misclassified = [] for sha256 in sha256_holdout: success_dict = defaultdict(list) bytez = interface.fetch_file(sha256) label = interface.get_label_local(bytez) if label == 0.0: misclassified.append(sha256) continue # already misclassified, move along for _ in range(MAXTURNS): action = action_function( bytez ) print(action) success_dict[sha256].append(action) bytez = manipulate.modify_without_breaking( bytez, [action] ) new_label = interface.get_label_local( bytez ) if new_label == 0.0: success.append(success_dict) break return success, misclassified # evasion accuracy is len(success) / len(sha256_holdout)
def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=True, test=False): # PCA部分 # features, nor_features, U, S, V, scale_, min_, pca_component = self.load_PCA_model() # self.PCA_V = V # self.feature_scale_ = scale_ # self.feature_min_ = min_ # self.PCA_component = pca_component self.total_turn = 0 self.episode = -1 # 共训练了多少轮 self.cache = cache self.available_sha256 = sha256list self.action_space = spaces.Discrete(len(ACTION_LOOKUP)) self.maxturns = maxturns self.feature_extractor = pefeatures.PEFeatureExtractor() self.random_sample = random_sample self.sample_iteration_index = 0 self.test = test self.output_path = os.path.join( os.path.dirname( os.path.dirname( os.path.dirname( os.path.abspath(__file__)))), output_path) if not os.path.exists(output_path): os.makedirs(output_path) self.history = OrderedDict() self.current_reward = 0 self.samples = {} if self.cache: for sha256 in self.available_sha256: try: self.samples[sha256] = interface.fetch_file(sha256, self.test) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage self._reset()
from gym_malware.envs.utils.pefeatures import PEFeatureExtractor # bytez = interface.fetch_file("Backdoor.Win32.Hupigon.zay") # features = PEFeatureExtractor().extract2(bytez) # features2 = PEFeatureExtractor().extract(bytez) # print(features.__len__()) # print(features2.__len__()) # bytez = interface.fetch_file("VirusShare_0b3c009aa4e461a00c0b3755976b485e") # # print(bytez) # features = PEFeatureExtractor().extract(bytez) # print(features.__len__()) # print(features) file_list = interface.get_available_sha256() np.set_printoptions(threshold=1e6) # run the tests extractor = PEFeatureExtractor() index = 0 for sha256 in file_list: print("{}:[file]:{}".format(index + 1, sha256)) bytez = interface.fetch_file(sha256) index = index + 1 print(extractor.extract2(bytez)) print(extractor.extract2(bytez).shape) if index > 1: break
import hashlib from gym_malware.envs.controls import manipulate2 as manipulate from gym_malware.envs.utils import interface # np.random.seed(322333) # random_action = lambda bytez: np.random.choice( list(manipulate.ACTION_TABLE.keys()) ) from other import lief_test random_action = lambda bytez: 'section_append' # original 32bit putty.exe in sha256 file name fileName = 'VirusShare_0a7e4d6c6006ed4a33b5fe0c181062c0' bytez = interface.fetch_file(fileName) lief_test.showPE(fileName) action = random_action(bytez) print(action) bytez_mod = manipulate.modify_without_breaking(bytez, [action]) with open("putty-mod32.exe", "wb") as new_file: new_file.write(bytez_mod) lief_test.showPE("putty-mod32.exe")
import lief from gym_malware.envs.utils import interface # 从bytez字节parse,该方法只能使用lief 0.7版本 # Mac系统下使用0.8要报错,原因未知 from gym_malware.envs.utils.extractPE import extractPE byte = interface.fetch_file("Backdoor.Win32.Hupigon.zay") binary = lief.PE.parse(byte) print(binary)