def PCA_on_training_model(): file_list = interface.get_available_sha256() ex_list = np.array([ pefeatures.PEFeatureExtractor().extract(interface.fetch_file(b)) for b in file_list ]) print("all_samples: ", ex_list.shape) # nor_list = normalize(ex_list, axis=0) # nor_list = MinMaxScaler().fit_transform(ex_list) nor_list, data_min, data_max, scale_, min_ = MinMaxImp(ex_list) pca = PCA(n_components=0.99).fit(nor_list) U, S, V = pca._fit(nor_list) # dic_elements = {"n_component":pca.n_components_, "scale_":scale_, "min_":min_} dic_elements = {"n_component": pca.n_components_} np.save("pca_models/features.npy", ex_list) np.save("pca_models/nor_features.npy", nor_list) np.save("pca_models/U.npy", U) np.save("pca_models/S.npy", S) np.save("pca_models/V.npy", V) np.save("pca_models/scale.npy", scale_) np.save("pca_models/min.npy", min_) createDictCSV("pca_models/dic_elements.csv", dic_elements) print("reduced dimension: ", pca.n_components_) return ex_list, nor_list, U, S, V
def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=False): self.cache = cache self.available_sha256 = sha256list self.action_space = spaces.Discrete(len(ACTION_LOOKUP)) self.maxturns = maxturns self.feature_extractor = pefeatures.PEFeatureExtractor() self.random_sample = random_sample self.sample_iteration_index = 0 self.output_path = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), output_path) if not os.path.exists(output_path): os.makedirs(output_path) self.history = OrderedDict() self.samples = {} if self.cache: for sha256 in self.available_sha256: try: self.samples[sha256] = interface.fetch_file(self.sha256) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage self._reset()
def test_models(model, score_model, test_random=False): total = 200 # baseline: choose actions at random if test_random: random_action = lambda bytez: np.random.choice( list(manipulate.ACTION_TABLE.keys())) random_success, misclassified = evaluate(random_action) total = len(sha256_holdout) - len( misclassified) # don't count misclassified towards success # option 1: Boltzmann sampling from Q-function network output softmax = lambda x: np.exp(x) / np.sum(np.exp(x)) boltzmann_action = lambda x: np.argmax( np.random.multinomial(1, softmax(x).flatten())) # option 2: maximize the Q value, ignoring stochastic action space best_action = lambda x: np.argmax(x) fe = pefeatures.PEFeatureExtractor() def model_policy(model): shp = (1, ) + tuple(model.input_shape[1:]) def f(bytez): # first, get features from bytez feats = fe.extract2(bytez) # feats = get_ob(bytez) q_values = model.predict(feats.reshape(shp))[0] action_index = best_action(q_values) # alternative: best_action return ACTION_LOOKUP[action_index] return f # compare to keras models with windowlength=1 dqn = load_model(model) # dqn = load_model('models/dqn.h5') dqn_success, _ = evaluate(model_policy(dqn)) dqn_score = load_model(score_model) # dqn_score = load_model('models/dqn_score.h5') score_success, _ = evaluate(model_policy(dqn_score)) # let's compare scores if test_random: random_result = "random:{}({}/{})".format( len(random_success) / total, len(random_success), total) else: random_result = "random:untested" print(random_result) blackbox_result = "blackbox:{}({}/{})".format( len(dqn_success) / total, len(dqn_success), total) print(blackbox_result) score_result = "score:{}({}/{})".format( len(score_success) / total, len(score_success), total) print(score_result) return random_result, blackbox_result, score_result
def test_models(model, score_model, agent_method, test_result, test_random=True, test_score=True): total = len(sha256_holdout) # baseline: choose actions at random if test_random: random_action = lambda bytez: np.random.choice(list(manipulate.ACTION_TABLE.keys())) random_success, misclassified = evaluate(random_action) total = len(sha256_holdout) - len(misclassified) # don't count misclassified towards success with open(test_result, 'a+') as f: random_result = "random: {}({}/{})\n".format(len(random_success) / total, len(random_success), total) f.write(random_result) f.write("==========================\n") fe = pefeatures.PEFeatureExtractor() def agent_policy(agent): def f(bytez): # first, get features from bytez feats = fe.extract(bytez) action_index = agent.act(feats) return ACTION_LOOKUP[action_index] return f # ddqn env = gym.make('malware-v0') agent = agent_method(env) model_list = get_model_dir_list(model) for mm in model_list: agent.load(mm) success, _ = evaluate(agent_policy(agent)) blackbox_result = "black: {}({}/{})".format(len(success) / total, len(success), total) with open(test_result, 'a+') as f: # 记录black各个model目录的结果 f.write("{}->{}\n".format(mm, blackbox_result)) with open(test_result, 'a+') as f: f.write("==========================\n") # score if test_score: env_score = gym.make('malware-score-v0') agent_score = agent_method(env_score) score_model_list = get_model_dir_list(score_model) for smm in score_model_list: agent_score.load(smm) score_success, _ = evaluate(agent_policy(agent_score)) score_result = "score: {}({}/{})".format(len(score_success) / total, len(score_success), total) with open(test_result, 'a+') as f: f.write("{}->{}\n".format(smm, score_result))
def test_model(): T = 80 # total mutations allowed success = 0 rn = dqeaf.RangeNormalize(-0.5, 0.5) fe = pefeatures.PEFeatureExtractor() episode = 0 for file in onlyfiles: try: with open(os.path.join(input_folder, file), 'rb') as infile: bytez = infile.read() except IOError: raise FileRetrievalFailure("Unable to read sha256 from") state = fe.extract(bytez) state_norm = rn(state) episode = episode + 1 state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to( device) for mutation in range(1, T): actions = model.forward(state_norm) print(actions) action = torch.argmax(actions).item() action = ACTION_LOOKUP[action] bytez = manipulate.modify_without_breaking(bytez, [action]) new_label = interface.get_score_local(bytez) print('episode : ' + str(episode)) print('mutation : ' + str(mutation)) print('test action : ' + str(action)) print('new label : ' + str(new_label)) state = fe.extract(bytez) state_norm = rn(state) state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to( device) if (new_label < 0.90): with open(os.path.join(output_folder, file + '.exe'), mode='wb') as file1: file1.write(bytes(bytez)) break
def tt_models(RL, test_result, test_random=True): total = len(sha256_holdout) # baseline: choose actions at random if test_random: random_action = lambda bytez: np.random.choice( list(manipulate.ACTION_TABLE.keys())) random_success, misclassified = evaluate(random_action) total = len(sha256_holdout) - len( misclassified) # don't count misclassified towards success with open(test_result, 'a+') as f: random_result = "random: {}({}/{})\n".format( len(random_success) / total, len(random_success), total) f.write(random_result) f.write("===========\n") fe = pefeatures.PEFeatureExtractor() def agent_policy(agent): def f(bytez): # first, get features from bytez feats = fe.extract(bytez) action_index = agent.act(feats) return ACTION_LOOKUP[action_index] return f # ddqn # env = gym.make('malware-test-v0') print("black box") success, _ = evaluate(agent_policy(RL)) blackbox_result = "black: {}({}/{})".format( len(success) / total, len(success), total) with open(test_result, 'a+') as f: # 记录black各个model目录的结果 f.write("{}\n".format(blackbox_result)) with open(test_result, 'a+') as f: f.write("==========================\n")
def test_model(): total_reward = 0 F = 200 #total test files T = 80 # total mutations allowed ratio = F * 0.5 # if number of mutations generated is half the total size success = 0 rn = RangeNormalize(-0.5, 0.5) fe = pefeatures.PEFeatureExtractor() for episode in range(1, F): state = env.reset() state_norm = rn(state) state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to( device) for mutation in range(1, T): actions = current_model.forward(state_norm) print(actions) action = torch.argmax(actions).item() next_state, reward, done, _ = env.step(action) print('episode : ' + str(episode)) print('mutation : ' + str(mutation)) print('test action : ' + str(action)) print('test reward : ' + str(reward)) state = next_state state_norm = rn(state) state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to( device) if (done): success = success + 1 break if success >= ratio: print('success : ' + str(success)) return True print('success : ' + str(success)) return False
def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=True, test=False): # PCA部分 # features, nor_features, U, S, V, scale_, min_, pca_component = self.load_PCA_model() # self.PCA_V = V # self.feature_scale_ = scale_ # self.feature_min_ = min_ # self.PCA_component = pca_component self.total_turn = 0 self.episode = -1 # 共训练了多少轮 self.cache = cache self.available_sha256 = sha256list self.action_space = spaces.Discrete(len(ACTION_LOOKUP)) self.maxturns = maxturns self.feature_extractor = pefeatures.PEFeatureExtractor() self.random_sample = random_sample self.sample_iteration_index = 0 self.test = test self.output_path = os.path.join( os.path.dirname( os.path.dirname( os.path.dirname( os.path.abspath(__file__)))), output_path) if not os.path.exists(output_path): os.makedirs(output_path) self.history = OrderedDict() self.current_reward = 0 self.samples = {} if self.cache: for sha256 in self.available_sha256: try: self.samples[sha256] = interface.fetch_file(sha256, self.test) except interface.FileRetrievalFailure: print("failed fetching file") continue # try a new sha256...this one can't be retrieved from storage self._reset()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--outdir', type=str, default='models') parser.add_argument('--test', action='store_true') parser.add_argument('--gpu', action='store_true') parser.add_argument('--final-exploration-steps', type=int, default=10 ** 4) parser.add_argument('--start-epsilon', type=float, default=1.0) parser.add_argument('--end-epsilon', type=float, default=0.1) parser.add_argument('--load', type=str, default=None) parser.add_argument('--steps', type=int, default=30000) parser.add_argument('--prioritized-replay', action='store_false') parser.add_argument('--episodic-replay', action='store_true') parser.add_argument('--replay-start-size', type=int, default=1000) parser.add_argument('--target-update-interval', type=int, default=10 ** 2) parser.add_argument('--target-update-method', type=str, default='hard') parser.add_argument('--soft-update-tau', type=float, default=1e-2) parser.add_argument('--update-interval', type=int, default=1) parser.add_argument('--eval-n-runs', type=int, default=80) parser.add_argument('--eval-interval', type=int, default=1000) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--minibatch-size', type=int, default=None) parser.add_argument('--test-random', action='store_true') parser.add_argument('--rounds', type=int, default=3) args = parser.parse_args() class QFunction(chainer.Chain): def __init__(self, obs_size, n_actions, n_hidden_channels=None): super(QFunction, self).__init__() if n_hidden_channels is None: n_hidden_channels = net_layers net = [] inpdim = obs_size for i, n_hid in enumerate(n_hidden_channels): net += [('l{}'.format(i), L.Linear(inpdim, n_hid))] # net += [('norm{}'.format(i), L.BatchNormalization(n_hid))] net += [('_act{}'.format(i), F.relu)] net += [('_dropout{}'.format(i), F.dropout)] inpdim = n_hid net += [('output', L.Linear(inpdim, n_actions))] with self.init_scope(): for n in net: if not n[0].startswith('_'): setattr(self, n[0], n[1]) self.forward = net def __call__(self, x, test=False): """ Args: x (ndarray or chainer.Variable): An observation test (bool): a flag indicating whether it is in test mode """ for n, f in self.forward: if not n.startswith('_'): x = getattr(self, n)(x) elif n.startswith('_dropout'): x = f(x, 0.1) else: x = f(x) return chainerrl.action_value.DiscreteActionValue(x) # 创建ddqn agent def create_ddqn_agent(env, args): obs_size = env.observation_space.shape[0] action_space = env.action_space n_actions = action_space.n # q_func = q_functions.FCStateQFunctionWithDiscreteAction( # obs_size, n_actions, # n_hidden_channels=args.n_hidden_channels, # n_hidden_layers=args.n_hidden_layers) q_func = QFunction(obs_size, n_actions) if args.gpu: q_func.to_gpu(args.gpu) # Draw the computational graph and save it in the output directory. if not args.test and not args.gpu: chainerrl.misc.draw_computational_graph( [q_func(np.zeros_like(env.observation_space, dtype=np.float32)[None])], os.path.join(args.outdir, 'model')) # Use epsilon-greedy for exploration explorer = explorers.LinearDecayEpsilonGreedy( args.start_epsilon, args.end_epsilon, args.final_exploration_steps, action_space.sample) # explorer = explorers.Boltzmann() # explorer = explorers.ConstantEpsilonGreedy( # epsilon=0.3, random_action_func=env.action_space.sample) opt = optimizers.Adam() opt.setup(q_func) rbuf_capacity = 5 * 10 ** 3 if args.episodic_replay: if args.minibatch_size is None: args.minibatch_size = 4 if args.prioritized_replay: betasteps = (args.steps - args.replay_start_size) // args.update_interval rbuf = replay_buffer.PrioritizedEpisodicReplayBuffer(rbuf_capacity, betasteps=betasteps) else: rbuf = replay_buffer.EpisodicReplayBuffer(rbuf_capacity) else: if args.minibatch_size is None: args.minibatch_size = 32 if args.prioritized_replay: betasteps = (args.steps - args.replay_start_size) // args.update_interval rbuf = replay_buffer.PrioritizedReplayBuffer(rbuf_capacity, betasteps=betasteps) else: rbuf = replay_buffer.ReplayBuffer(rbuf_capacity) # Chainer only accepts numpy.float32 by default, make sure # a converter as a feature extractor function phi. phi = lambda x: x.astype(np.float32, copy=False) agent = chainerrl.agents.DoubleDQN(q_func, opt, rbuf, gamma=args.gamma, explorer=explorer, replay_start_size=args.replay_start_size, target_update_interval=args.target_update_interval, update_interval=args.update_interval, phi=phi, minibatch_size=args.minibatch_size, target_update_method=args.target_update_method, soft_update_tau=args.soft_update_tau, episodic_update=args.episodic_replay, episodic_update_len=16) return agent # 开始训练 def train_agent(args, use_score=False): ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0' env = gym.make(ENV_NAME) ENV_TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0' test_env = gym.make(ENV_TEST_NAME) # np.random.seed(123) env.seed(123) # Set a random seed used in ChainerRL misc.set_random_seed(123) agent = create_ddqn_agent(env, args) q_hook = PlotHook('Average Q Value', ylabel='Average Action Value (Q)') loss_hook = PlotHook('Average Loss', plot_index=1, ylabel='Average Loss per Episode') reward_hook = PlotHook('Average Reward', plot_index=2, ylabel='Reward Value per Episode') scores_hook = TrainingScoresHook('scores.txt', args.outdir) chainerrl.experiments.train_agent_with_evaluation( agent, env, steps=args.steps, # Train the graduation_agent for this many rounds steps max_episode_len=env.maxturns, # Maximum length of each episodes eval_interval=args.eval_interval, # Evaluate the graduation_agent after every 1000 steps eval_n_runs=args.eval_n_runs, # 100 episodes are sampled for each evaluation outdir=args.outdir, # Save everything to 'result' directory step_hooks=[q_hook, loss_hook, scores_hook, reward_hook], successful_score=7, eval_env=test_env ) # 保证训练一轮就成功的情况下能成功打印scores.txt文件 scores_hook(None, None, 1000) return env, agent # 获取保存的模型目录 def get_latest_model_dir_from(basedir): dirs = os.listdir(basedir) lastmodel = -1 for d in dirs: try: if int(d) > lastmodel: lastmodel = int(d) except ValueError: continue assert lastmodel >= 0, "No saved models!" return os.path.join(basedir, str(lastmodel)) # kerasrl # def generate_dense_model(input_shape, nb_actions): # model = Sequential() # model.add(Flatten(input_shape=input_shape)) # # normalize before compute # model.add(BatchNormalization()) # model.add(Dropout(0.1)) # drop out the input to make model less sensitive to any 1 feature # # for layer in net_layers: # model.add(Dense(layer)) # model.add(ELU(alpha=1.0)) # model.add(Dropout(0.1)) # # model.add(Dense(nb_actions)) # model.add(Activation('linear')) # print(model.summary()) # # return model # # def train_keras_dqn_model(args): # ENV_NAME = 'malware-v0' # env = gym.make(ENV_NAME) # env.seed(123) # nb_actions = env.action_space.n # window_length = 1 # "experience" consists of where we were, where we are now # # # generate a policy model # model = generate_dense_model((window_length,) + env.observation_space.shape, nb_actions) # # # configure and compile our graduation_agent # # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values # policy = BoltzmannQPolicy() # # # memory can help a model during training # # for this, we only consider a single malware sample (window_length=1) for each "experience" # memory = SequentialMemory(limit=1000, ignore_episode_boundaries=False, window_length=window_length) # # # DQN graduation_agent as described in Mnih (2013) and Mnih (2015). # # http://arxiv.org/pdf/1312.5602.pdf # # http://arxiv.org/abs/1509.06461 # agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16, # enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg', # target_model_update=1e-2, policy=policy, batch_size=16) # # # keras-rl allows one to use and built-in keras optimizer # agent.compile(RMSprop(lr=1e-2), metrics=['mae']) # # # play the game. learn something! # agent.fit(env, nb_steps=args.steps, visualize=False, verbose=2) # # history_test = None # # if args.test: # # Set up the testing environment # TEST_NAME = 'malware-test-v0' # test_env = gym.make(TEST_NAME) # # # evaluate the graduation_agent on a few episodes, drawing randomly from the test samples2 # agent.test(test_env, nb_episodes=100, visualize=False) # history_test = test_env.history # # return env, agent # test if not args.test: print("training...") # 反复多次重新训练模型,避免手工操作 for _ in range(args.rounds): args.outdir = experiments.prepare_output_dir( args, args.outdir, argv=sys.argv) print('Output files are saved in {}'.format(args.outdir)) env, agent = train_agent(args) # env, agent = train_keras_dqn_model(args) with open(os.path.join(args.outdir, 'scores.txt'), 'a') as f: f.write( "total_turn/episode->{}({}/{})\n".format(env.total_turn / env.episode, env.total_turn, env.episode)) f.write("history:\n") count = 0 success_count = 0 for k, v in env.history.items(): count += 1 if v['evaded']: success_count += 1 f.write("{}:{}->{}\n".format(count, k, v['evaded_sha256'])) else: f.write("{}:{}->\n".format(count, k)) f.write("success count:{}".format(success_count)) f.write("{}".format(env.history)) # 标识成功失败 dirs = os.listdir(args.outdir) second_line = linecache.getline(os.path.join(args.outdir, 'scores.txt'), 2) success_score = second_line.strip('\n').split('\t')[3] # 训练提前结束,标识成功 success_flag = False for file in dirs: if file.endswith('_finish') and not file.startswith(str(args.steps)): success_flag = True break os.rename(args.outdir, '{}-{}{}'.format(args.outdir, success_score, '-success' if success_flag else '')) # 重置outdir到models args.outdir = 'models' else: print("testing...") model_fold = os.path.join(args.outdir, args.load) scores_file = os.path.join(model_fold, 'scores.txt') # baseline: choose actions at random if args.test_random: random_action = lambda bytez: np.random.choice(list(manipulate.ACTION_TABLE.keys())) random_success, misclassified = evaluate(random_action) total = len(sha256_holdout) - len(misclassified) # don't count misclassified towards success with open(scores_file, 'a') as f: random_result = "random: {}({}/{})\n".format(len(random_success) / total, len(random_success), total) f.write(random_result) f.write("==========================\n") total = len(sha256_holdout) fe = pefeatures.PEFeatureExtractor() def agent_policy(agent): def f(bytez): # first, get features from bytez feats = fe.extract2(bytez) action_index = agent.act(feats) return ACTION_LOOKUP[action_index] return f # ddqn env = gym.make('malware-test-v0') agent = create_ddqn_agent(env, args) mm = get_latest_model_dir_from(model_fold) agent.load(mm) success, _ = evaluate(agent_policy(agent)) blackbox_result = "black: {}({}/{})".format(len(success) / total, len(success), total) with open(scores_file, 'a') as f: f.write("{}->{}\n".format(mm, blackbox_result))
assert lastmodel >= 0, "No saved models!" return os.path.join(basedir, str(lastmodel)) if __name__ == '__main__': # baseline: choose actions at random random_action = lambda bytez: np.random.choice( list(manipulate.ACTION_TABLE.keys())) random_success, misclassified = evaluate(random_action) total = len(sha256_holdout) - len( misclassified) # don't count misclassified towards success ENV_NAME = 'malware-test-v0' env = gym.make(ENV_NAME) fe = pefeatures.PEFeatureExtractor() def agent_policy(agent): def f(bytez): # first, get features from bytez feats = fe.extract(bytez) action_index = agent.act(feats) return ACTION_LOOKUP[action_index] return f agent = create_acer_agent(env) # pull latest stored model last_model_dir = get_latest_model_from('models/acer_chainer') agent.load(last_model_dir) success, _ = evaluate(agent_policy(agent))
def compute_observation(bytez, feature_min_, V, PCA_component, feature_scale_): fe = pefeatures.PEFeatureExtractor() raw_features = fe.extract(bytez) # scaled_features = scale_min_imp(raw_features, feature_scale_, feature_min_) observation = np.dot(raw_features[np.newaxis, :], V.T[:, :PCA_component]) return observation