def __init__(self, feature_mp='binary', use_default_feature=True): super(DrebinFeatureReverse, self).__init__(feature_type_scope_dict.keys()[0], feature_mp, use_default_feature) #load feature infomation try: if self.use_default_feature: self.normalizer = utils.read_pickle( cfg.config.get('feature.' + self.feature_type, 'normalizer')) self.vocab = utils.read_pickle( cfg.config.get('feature.' + self.feature_type, 'vocabulary')) self.vocab_info = utils.read_pickle( cfg.config.get('feature.' + self.feature_type, 'vocab_info')) else: # use surrogate feature meta-information self.normalizer = utils.read_pickle( os.path.join( cfg.config.get('experiments', 'surrogate_save_dir'), 'normalizer')) self.vocab = utils.read_pickle( os.path.join( cfg.config.get('experiments', 'surrogate_save_dir'), 'vocabulary')) self.vocab_info = utils.read_pickle( os.path.join( cfg.config.get('experiments', 'surrogate_save_dir'), 'vocab_info')) except Exception as ex: logger.error(str(ex)) raise IOError("Unable to load meta-information of feature.")
def _initilize(self): """initialization""" all_method_names = [] for mode, method_list in attack_method_dict.items(): if self.attack_method_name in method_list: self.attack_mode = mode all_method_names.extend(method_list) if not self.attack_method_name in all_method_names: raise ValueError("\n\t Attack method '{}' are supported".format( all_method_names)) if not self.attack_scenario in attack_scenario_dict.keys(): raise ValueError("\n\t Attack scenario '{}' are supported".format( attack_scenario_dict.keys())) if not self.targeted_model_name in targeted_model_names_dict.keys(): raise ValueError("\n\t targed model '{}' are supported".format( targeted_model_names_dict.keys())) # get the information of targeted model self.targeted_model = targeted_model_names_dict[ self.targeted_model_name](mode='test') self.targeted_model_info = self.targeted_model.info self.targeted_model_hp = self.targeted_model.hp_params if self.attack_scenario == WHITEBOX: self.targeted_model_of_attacker = self.targeted_model if self.targeted_model_of_attacker.feature_tp == feature_type_scope_dict.keys( )[0]: # 'drebin' self.feature_reverser = DrebinFeatureReverse( feature_mp=self.targeted_model_of_attacker.feature_mp) else: raise ValueError("Only " + ' '.join(feature_type_scope_dict.keys()) + " are supported.") if self.attack_scenario == GREYBOX: """ Training a dnn model as the surrogate model here. In the paper, we use the hardened model as the surrogate models """ sur_info_dict = {} sur_hp_dict = {} for k, v in self.other_args: if k in attack_scenario_dict[GREYBOX].keys(): attack_scenario_dict[GREYBOX][k] = v else: raise ValueError( "No '{}' key, please check it based on '{}'".format( k, ','.join(attack_scenario_dict[GREYBOX].keys()))) if attack_scenario_dict[GREYBOX][ 'algo_knowledge'] == 0000: # zero knowledge about algorithm sur_info_dict['learning_algorithm'] = 'DNN' sur_hp_dict = SUR_DNN_HP.copy() else: raise NotImplementedError if attack_scenario_dict[GREYBOX]['feature_knowledge'] == 1111: sur_info_dict[ 'feature_type'] = self.targeted_model_info.feature_type sur_info_dict[ 'feature_mapping_type'] = self.targeted_model_info.feature_mapping_type sur_info_dict['feature_utility_rate'] = 1. else: raise NotImplementedError if attack_scenario_dict[GREYBOX]['dataset_knowledge'] == 1111: sur_info_dict[ 'dataset_dir'] = self.targeted_model_info.dataset_dir else: raise NotImplementedError surrogate_model = SurrogateModel(sur_info_dict, sur_hp_dict, False) # surrogate_model.train() # surrogate_model.graph_reset() # reset the graph, avoiding the loading of adam parameters self.targeted_model_of_attacker = surrogate_model self.feature_reverser = DrebinFeatureReverse( feature_mp=surrogate_model.feature_mp, use_default_feature=True ) # may trigger issue, surrogate model will use default features if self.attack_scenario == BLACKBOX: for k, v in self.other_args: if k in attack_scenario_dict[BLACKBOX].keys(): attack_scenario_dict[BLACKBOX][k] = v else: raise ValueError( "No '{}' key, please check it based on '{}'".format( k, ','.join(attack_scenario_dict[BLACKBOX].keys())))
def __init__(self, info_dict = None, hyper_params = None, reuse=False, is_saving = True, init_graph = True, mode = 'train', name = 'DADV_NN_ENSEMBLE_MAX'): """ hardened deep ensemble incorporated with ''max'' attack and a diversifying method @param info_dict: None, @param hyper_params: hyper parameters, @param reuse: reuse the variables or not @param is_saving: option for saving weights @param init_graph: initialize graph @param mode: enable a mode for run the model, 'train' or 'test' @param name: model name """ self.is_saving = is_saving self.init_graph = init_graph self.mode = mode if info_dict is None: ADV_ENS_INFO.update(INFO) info_dict = ADV_ENS_INFO self.clf_info = utils.ParamWrapper(info_dict) if hyper_params is None: ADV_ENS_HP.update(MAX_ADV_TRAIN_HP) ADV_ENS_HP.update(DNN_HP) hyper_params = ADV_ENS_HP self.hp_params = utils.ParamWrapper(hyper_params) self.model_name = name self.base_model_method = [AdversarialTrainingDNN] * len(MAXIMIZER_METHOD_DICT) self.base_model_method.append(BasicDNNModel) self.base_model_count = len(self.base_model_method) assert self.base_model_count > 1, 'one base model at least' # initialization if self.clf_info.feature_tp == feature_type_scope_dict.keys()[0]: self.normalizer = utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'normalizer')) else: raise ValueError("Feature type is incompatible.") input_dim = len(utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'vocabulary'))) self.eta = self.hp_params.eta feature_reverser = DrebinFeatureReverse() allow_insert_array, allow_removal_array = feature_reverser.get_mod_array() # build attack graph maximizer_name_list = self.hp_params.maximizer_name_list self.inner_maximizers = [] self.trial_list = [] for maximizer_name in maximizer_name_list: maximizer_method = MAXIMIZER_METHOD_DICT[maximizer_name] maximizer_param = MAXIMIZER_PARAM_DICT[maximizer_name] inner_maximizer = maximizer_method(self, input_dim, allow_insert_array, allow_removal_array, self.normalizer, verbose=False, **maximizer_param ) self.inner_maximizers.append(inner_maximizer) self.trial_list.append(self.hp_params.trials_dict[maximizer_name]) # record the number of malware examples in a training batch self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False) super(DAdversarialDeepEnsembleMax, self).__init__(info_dict, hyper_params, reuse = reuse, is_saving=self.is_saving, init_graph= self.init_graph, mode = self.mode, name = name)
sys.path.append(proj_dir) from config import config from tools import utils from learner.feature_extractor import get_droid_feature, FeatureMapping, feature_type_scope_dict from learner.basic_DNN import BasicDNNModel, DNN_HP, INFO from attacker.feature_reverser import DrebinFeatureReverse from defender.at import MAXIMIZER_PARAM_DICT, MAXIMIZER_METHOD_DICT from defender.at import AdversarialTrainingDNN from defender.at import ADV_TRAIN_HP from defender.at_ma import MAX_ADV_TRAIN_HP ADV_ENS_INFO = { 'dataset_dir': config.get('dataset', 'dataset_root'), 'feature_tp': list(feature_type_scope_dict.keys())[0], #'drebin', 'feature_mapping_type': config.get('feature.drebin', 'feature_mp'), 'learning_algorithm': 'ADV_ENS_BASE' } ADV_ENS_HP = { 'lambda_2': 1. # the balanced factor for regularizing the base models } class DAdversarialDeepEnsembleMax(BasicDNNModel): def __init__(self, info_dict = None, hyper_params = None, reuse=False, is_saving = True,
import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import f1_score sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from learner.classification import * from tools import utils from learner.feature_extractor import get_droid_feature, FeatureMapping, DREBIN_FEAT_INFO, feature_type_scope_dict from config import config, logging logger = logging.getLogger("learning.basic_dnn") INFO = { 'dataset_dir': config.get('dataset', 'dataset_root'), 'feature_type': feature_type_scope_dict.keys()[0], # 'drebin', 'feature_mapping_type': config.get('feature.drebin', 'feature_mp'), 'use_interdependent_features': False, 'learning_algorithm': 'DNN' } DNN_HP = { 'random_seed': 23456, 'hidden_units': [160, 160], # DNN has two hidden layers with each having 160 neurons 'output_dim': 2, # malicious vs. benign 'n_epochs': 150, 'batch_size': 128, 'learning_rate': 0.001, 'optimizer': 'adam' # others are not supported }