def __init__(self, info_dict=None, hyper_params_dict=None, reuse=False, name='SURROGATE'): if info_dict is None: print("Information of model should be provided.") return if hyper_params_dict is None: print("Hyper-parameters are needed.") return self.info_dict = info_dict self.info = utils.ParamWrapper(self.info_dict) self.hp_dict = hyper_params_dict self.hp_params = utils.ParamWrapper(self.hp_dict) self.feature_tp = self.info.feature_type self.feature_mp = self.info.feature_mapping_type self.feature_utility_rate = self.info.feature_utility_rate self.dataset_dir = self.info.dataset_dir self.name = name self.mal_dir = os.path.join(self.dataset_dir, config.get('dataset', 'malware_dir_name')) self.ben_dir = os.path.join(self.dataset_dir, config.get('dataset', 'benware_dir_name')) tmp_save_dir = config.get('experiments', 'surrogate_save_dir') if not os.path.exists(tmp_save_dir): os.mkdir(tmp_save_dir) self.save_dir = tmp_save_dir # self._data_preprocess() # model necessaries self.input_dim = len( utils.read_pickle(os.path.join( self.save_dir, 'vocabulary'))) # update in the future self.hidden_layers = self.hp_params.hidden_units self.output_dim = self.hp_params.output_dim # self.model_graph() super(SurrogateModel, self).__init__(self.info_dict, self.hp_dict, reuse, is_saving=False, name=self.name)
def __init__(self, info_dict = None, hyper_params = None, reuse=False, is_saving = True, init_graph = True, mode = 'train', name = 'DADV_NN_ENSEMBLE_MAX'): """ hardened deep ensemble incorporated with ''max'' attack and a diversifying method @param info_dict: None, @param hyper_params: hyper parameters, @param reuse: reuse the variables or not @param is_saving: option for saving weights @param init_graph: initialize graph @param mode: enable a mode for run the model, 'train' or 'test' @param name: model name """ self.is_saving = is_saving self.init_graph = init_graph self.mode = mode if info_dict is None: ADV_ENS_INFO.update(INFO) info_dict = ADV_ENS_INFO self.clf_info = utils.ParamWrapper(info_dict) if hyper_params is None: ADV_ENS_HP.update(MAX_ADV_TRAIN_HP) ADV_ENS_HP.update(DNN_HP) hyper_params = ADV_ENS_HP self.hp_params = utils.ParamWrapper(hyper_params) self.model_name = name self.base_model_method = [AdversarialTrainingDNN] * len(MAXIMIZER_METHOD_DICT) self.base_model_method.append(BasicDNNModel) self.base_model_count = len(self.base_model_method) assert self.base_model_count > 1, 'one base model at least' # initialization if self.clf_info.feature_tp == feature_type_scope_dict.keys()[0]: self.normalizer = utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'normalizer')) else: raise ValueError("Feature type is incompatible.") input_dim = len(utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'vocabulary'))) self.eta = self.hp_params.eta feature_reverser = DrebinFeatureReverse() allow_insert_array, allow_removal_array = feature_reverser.get_mod_array() # build attack graph maximizer_name_list = self.hp_params.maximizer_name_list self.inner_maximizers = [] self.trial_list = [] for maximizer_name in maximizer_name_list: maximizer_method = MAXIMIZER_METHOD_DICT[maximizer_name] maximizer_param = MAXIMIZER_PARAM_DICT[maximizer_name] inner_maximizer = maximizer_method(self, input_dim, allow_insert_array, allow_removal_array, self.normalizer, verbose=False, **maximizer_param ) self.inner_maximizers.append(inner_maximizer) self.trial_list.append(self.hp_params.trials_dict[maximizer_name]) # record the number of malware examples in a training batch self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False) super(DAdversarialDeepEnsembleMax, self).__init__(info_dict, hyper_params, reuse = reuse, is_saving=self.is_saving, init_graph= self.init_graph, mode = self.mode, name = name)
def __init__(self, info_dict=None, hyper_params=None, reuse=False, is_saving=True, init_graph=True, mode='train', name='BASIC_DNN'): """ build basic dnn model @param info_dict: None, @param hyper_params: hyper parameters, @param reuse: reuse the variables or not @param is_saving: option for saving weights @param init_graph: initialize graph @param mode: enable a mode for run the model, 'train' or 'test' @param name: model name """ super(BasicDNNModel, self).__init__() # model setup self.is_saving = is_saving self.init_graph = init_graph try: assert mode == 'train' or mode == 'test' except: raise AssertionError("'train' or 'test' mode, not others.") self.mode = mode if info_dict is not None: self.info_dict = info_dict else: self.info_dict = INFO self.info = utils.ParamWrapper(self.info_dict) if hyper_params is not None: self.hp_params_dict = hyper_params else: self.hp_params_dict = DNN_HP self.hp_params = utils.ParamWrapper(self.hp_params_dict) self.model_name = name if self.is_saving: self.save_dir = config.get('experiments', name.lower()) # feature extraction self.feature_tp = self.info.feature_type # drebin self.feature_mp = self.info.feature_mapping_type # binary self.dataset_dir = self.info.dataset_dir self.mal_dir = os.path.join(self.dataset_dir, config.get('dataset', 'malware_dir_name')) self.ben_dir = os.path.join(self.dataset_dir, config.get('dataset', 'benware_dir_name')) if not (os.path.exists( config.get('feature.' + self.feature_tp, 'dataX')) and os.path.exists( config.get('feature.' + self.feature_tp, 'datay')) and os.path.exists( config.get('feature.' + self.feature_tp, 'vocabulary')) and os.path.exists( config.get('feature.' + self.feature_tp, 'normalizer')) and os.path.exists(config.get('dataset', 'name_list'))): self._data_preprocess() # obtain some hyper-parameters self.input_dim = len( utils.read_pickle( config.get('feature.' + self.feature_tp, 'vocabulary'))) self.hidden_layers = self.hp_params.hidden_units self.output_dim = self.hp_params.output_dim tf.set_random_seed(self.hp_params.random_seed) if self.init_graph: self.model_graph(reuse=reuse)
def __init__(self, info_dict=None, hyper_params=None, reuse=False, is_saving=True, init_graph=True, mode='train', name='ADV_TRAINING_DNN_MAX'): """ hardened model incorporated with ''max'' attack @param info_dict: None, @param hyper_params: hyper parameters, @param reuse: reuse the variables or not @param is_saving: option for saving weights @param init_graph: initialize graph @param mode: enable a mode for run the model, 'train' or 'test' @param name: model name """ self.is_saving = is_saving self.init_graph = init_graph self.mode = mode if info_dict is None: info_dict = INFO self.info = utils.ParamWrapper( info_dict ) # get useful information, this will be over-wrote in father class self.feature_tp = self.info.feature_type if hyper_params is None: MAX_ADV_TRAIN_HP.update(DNN_HP) hyper_params = MAX_ADV_TRAIN_HP self.hp_params = utils.ParamWrapper(hyper_params) # initialization if self.feature_tp == feature_type_scope_dict.keys()[0]: self.normalizer = utils.read_pickle( config.get('feature.' + self.feature_tp, 'normalizer')) else: raise ValueError("Feature type is incompatible.") input_dim = len( utils.read_pickle( config.get('feature.' + self.feature_tp, 'vocabulary'))) self.eta = self.hp_params.eta feature_reverser = DrebinFeatureReverse() allow_insert_array, allow_removal_array = feature_reverser.get_mod_array( ) # build attack graph maximizer_name_list = self.hp_params.maximizer_name_list self.inner_maximizers = [] self.trial_list = [] for maximizer_name in maximizer_name_list: maximizer_method = MAXIMIZER_METHOD_DICT[maximizer_name] maximizer_param = MAXIMIZER_PARAM_DICT[maximizer_name] inner_maximizer = maximizer_method(self, input_dim, allow_insert_array, allow_removal_array, self.normalizer, verbose=False, **maximizer_param) self.inner_maximizers.append(inner_maximizer) self.trial_list.append(self.hp_params.trials_dict[maximizer_name]) # record the number of malware examples in a training batch self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False) super(AdversarialTrainingDNNMax, self).__init__(info_dict, hyper_params, reuse, self.is_saving, self.init_graph, self.mode, name)
def __init__(self, info_dict=None, hyper_params=None, reuse=False, is_saving=True, init_graph=True, mode='train', name='ADV_TRAINING_DNN'): """ hardened model incorporated with adversarial training @param info_dict: None, @param hyper_params: hyper parameters, @param reuse: reuse the variables or not @param is_saving: option for saving weights @param init_graph: initialize graph @param mode: enable a mode for run the model, 'train' or 'test' @param name: model name """ self.is_saving = is_saving self.init_graph = init_graph self.mode = mode if info_dict is None: info_dict = INFO # get useful information, this will be over-wrote in other class self.info = utils.ParamWrapper(info_dict) self.feature_tp = self.info.feature_type if hyper_params is None: ADV_TRAIN_HP.update(DNN_HP) hyper_params = ADV_TRAIN_HP # hyper_params contains information of using which attack self.hp_params = utils.ParamWrapper(hyper_params) # initialization if self.feature_tp == feature_type_scope_dict.keys()[0]: self.normalizer = utils.read_pickle( config.get('feature.' + self.feature_tp, 'normalizer')) else: raise ValueError("Feature type is incompatible.") input_dim = len( utils.read_pickle( config.get('feature.' + self.feature_tp, 'vocabulary'))) self.eta = self.hp_params.eta feature_reverser = DrebinFeatureReverse() allow_insert_array, allow_removal_array = feature_reverser.get_mod_array( ) inner_max_name = self.hp_params.maximizer_name inner_max_param = MAXIMIZER_PARAM_DICT[inner_max_name] self.inner_maximizer = MAXIMIZER_METHOD_DICT[inner_max_name]( self, input_dim, allow_insert_array, allow_removal_array, self.normalizer, verbose=False, **inner_max_param) self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False) super(AdversarialTrainingDNN, self).__init__(info_dict, hyper_params, reuse, self.is_saving, self.init_graph, self.mode, name + '_' + inner_max_name.upper())