def __init__(self,
                 info_dict=None,
                 hyper_params_dict=None,
                 reuse=False,
                 name='SURROGATE'):
        if info_dict is None:
            print("Information of model should be provided.")
            return
        if hyper_params_dict is None:
            print("Hyper-parameters are needed.")
            return

        self.info_dict = info_dict
        self.info = utils.ParamWrapper(self.info_dict)

        self.hp_dict = hyper_params_dict
        self.hp_params = utils.ParamWrapper(self.hp_dict)

        self.feature_tp = self.info.feature_type
        self.feature_mp = self.info.feature_mapping_type
        self.feature_utility_rate = self.info.feature_utility_rate
        self.dataset_dir = self.info.dataset_dir

        self.name = name

        self.mal_dir = os.path.join(self.dataset_dir,
                                    config.get('dataset', 'malware_dir_name'))
        self.ben_dir = os.path.join(self.dataset_dir,
                                    config.get('dataset', 'benware_dir_name'))

        tmp_save_dir = config.get('experiments', 'surrogate_save_dir')
        if not os.path.exists(tmp_save_dir):
            os.mkdir(tmp_save_dir)

        self.save_dir = tmp_save_dir

        # self._data_preprocess()

        # model necessaries
        self.input_dim = len(
            utils.read_pickle(os.path.join(
                self.save_dir, 'vocabulary')))  # update in the future
        self.hidden_layers = self.hp_params.hidden_units
        self.output_dim = self.hp_params.output_dim

        # self.model_graph()

        super(SurrogateModel, self).__init__(self.info_dict,
                                             self.hp_dict,
                                             reuse,
                                             is_saving=False,
                                             name=self.name)
    def __init__(self,
                 info_dict = None,
                 hyper_params = None,
                 reuse=False,
                 is_saving = True,
                 init_graph = True,
                 mode = 'train',
                 name = 'DADV_NN_ENSEMBLE_MAX'):
        """
        hardened deep ensemble incorporated with ''max'' attack and a diversifying method
        @param info_dict: None,
        @param hyper_params: hyper parameters,
        @param reuse: reuse the variables or not
        @param is_saving: option for saving weights
        @param init_graph: initialize graph
        @param mode: enable a mode for run the model, 'train' or 'test'
        @param name: model name
        """
        self.is_saving = is_saving
        self.init_graph = init_graph
        self.mode = mode
        if info_dict is None:
            ADV_ENS_INFO.update(INFO)
            info_dict = ADV_ENS_INFO
        self.clf_info = utils.ParamWrapper(info_dict)
        if hyper_params is None:
            ADV_ENS_HP.update(MAX_ADV_TRAIN_HP)
            ADV_ENS_HP.update(DNN_HP)
            hyper_params = ADV_ENS_HP
        self.hp_params = utils.ParamWrapper(hyper_params)
        self.model_name = name

        self.base_model_method = [AdversarialTrainingDNN] * len(MAXIMIZER_METHOD_DICT)
        self.base_model_method.append(BasicDNNModel)
        self.base_model_count = len(self.base_model_method)
        assert self.base_model_count > 1, 'one base model at least'

        # initialization
        if self.clf_info.feature_tp == feature_type_scope_dict.keys()[0]:
            self.normalizer = utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'normalizer'))
        else:
            raise ValueError("Feature type is incompatible.")
        input_dim = len(utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'vocabulary')))
        self.eta = self.hp_params.eta
        feature_reverser = DrebinFeatureReverse()
        allow_insert_array, allow_removal_array = feature_reverser.get_mod_array()

        # build attack graph
        maximizer_name_list = self.hp_params.maximizer_name_list
        self.inner_maximizers = []
        self.trial_list = []
        for maximizer_name in maximizer_name_list:
            maximizer_method = MAXIMIZER_METHOD_DICT[maximizer_name]
            maximizer_param = MAXIMIZER_PARAM_DICT[maximizer_name]
            inner_maximizer = maximizer_method(self,
                                               input_dim,
                                               allow_insert_array,
                                               allow_removal_array,
                                               self.normalizer,
                                               verbose=False,
                                               **maximizer_param
                                               )

            self.inner_maximizers.append(inner_maximizer)
            self.trial_list.append(self.hp_params.trials_dict[maximizer_name])

        # record the number of malware examples in a training batch
        self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False)

        super(DAdversarialDeepEnsembleMax, self).__init__(info_dict,
                                                          hyper_params,
                                                          reuse = reuse,
                                                          is_saving=self.is_saving,
                                                          init_graph= self.init_graph,
                                                          mode = self.mode,
                                                          name = name)
    def __init__(self,
                 info_dict=None,
                 hyper_params=None,
                 reuse=False,
                 is_saving=True,
                 init_graph=True,
                 mode='train',
                 name='BASIC_DNN'):
        """
        build basic dnn model
        @param info_dict: None,
        @param hyper_params: hyper parameters,
        @param reuse: reuse the variables or not
        @param is_saving: option for saving weights
        @param init_graph: initialize graph
        @param mode: enable a mode for run the model, 'train' or 'test'
        @param name: model name
        """
        super(BasicDNNModel, self).__init__()
        # model setup
        self.is_saving = is_saving
        self.init_graph = init_graph
        try:
            assert mode == 'train' or mode == 'test'
        except:
            raise AssertionError("'train' or 'test' mode, not others.")

        self.mode = mode
        if info_dict is not None:
            self.info_dict = info_dict
        else:
            self.info_dict = INFO
        self.info = utils.ParamWrapper(self.info_dict)
        if hyper_params is not None:
            self.hp_params_dict = hyper_params
        else:
            self.hp_params_dict = DNN_HP
        self.hp_params = utils.ParamWrapper(self.hp_params_dict)
        self.model_name = name

        if self.is_saving:
            self.save_dir = config.get('experiments', name.lower())

        # feature extraction
        self.feature_tp = self.info.feature_type  # drebin
        self.feature_mp = self.info.feature_mapping_type  # binary
        self.dataset_dir = self.info.dataset_dir

        self.mal_dir = os.path.join(self.dataset_dir,
                                    config.get('dataset', 'malware_dir_name'))
        self.ben_dir = os.path.join(self.dataset_dir,
                                    config.get('dataset', 'benware_dir_name'))

        if not (os.path.exists(
                config.get('feature.' + self.feature_tp, 'dataX'))
                and os.path.exists(
                    config.get('feature.' + self.feature_tp, 'datay'))
                and os.path.exists(
                    config.get('feature.' + self.feature_tp, 'vocabulary'))
                and os.path.exists(
                    config.get('feature.' + self.feature_tp, 'normalizer'))
                and os.path.exists(config.get('dataset', 'name_list'))):
            self._data_preprocess()

        # obtain some hyper-parameters
        self.input_dim = len(
            utils.read_pickle(
                config.get('feature.' + self.feature_tp, 'vocabulary')))
        self.hidden_layers = self.hp_params.hidden_units
        self.output_dim = self.hp_params.output_dim
        tf.set_random_seed(self.hp_params.random_seed)
        if self.init_graph:
            self.model_graph(reuse=reuse)
Exemple #4
0
    def __init__(self,
                 info_dict=None,
                 hyper_params=None,
                 reuse=False,
                 is_saving=True,
                 init_graph=True,
                 mode='train',
                 name='ADV_TRAINING_DNN_MAX'):
        """
        hardened model incorporated with ''max'' attack
        @param info_dict: None,
        @param hyper_params: hyper parameters,
        @param reuse: reuse the variables or not
        @param is_saving: option for saving weights
        @param init_graph: initialize graph
        @param mode: enable a mode for run the model, 'train' or 'test'
        @param name: model name
        """
        self.is_saving = is_saving
        self.init_graph = init_graph
        self.mode = mode
        if info_dict is None:
            info_dict = INFO
        self.info = utils.ParamWrapper(
            info_dict
        )  # get useful information, this will be over-wrote in father class
        self.feature_tp = self.info.feature_type

        if hyper_params is None:
            MAX_ADV_TRAIN_HP.update(DNN_HP)
            hyper_params = MAX_ADV_TRAIN_HP
        self.hp_params = utils.ParamWrapper(hyper_params)

        # initialization
        if self.feature_tp == feature_type_scope_dict.keys()[0]:
            self.normalizer = utils.read_pickle(
                config.get('feature.' + self.feature_tp, 'normalizer'))
        else:
            raise ValueError("Feature type is incompatible.")

        input_dim = len(
            utils.read_pickle(
                config.get('feature.' + self.feature_tp, 'vocabulary')))
        self.eta = self.hp_params.eta
        feature_reverser = DrebinFeatureReverse()
        allow_insert_array, allow_removal_array = feature_reverser.get_mod_array(
        )

        # build attack graph
        maximizer_name_list = self.hp_params.maximizer_name_list
        self.inner_maximizers = []
        self.trial_list = []
        for maximizer_name in maximizer_name_list:
            maximizer_method = MAXIMIZER_METHOD_DICT[maximizer_name]
            maximizer_param = MAXIMIZER_PARAM_DICT[maximizer_name]
            inner_maximizer = maximizer_method(self,
                                               input_dim,
                                               allow_insert_array,
                                               allow_removal_array,
                                               self.normalizer,
                                               verbose=False,
                                               **maximizer_param)

            self.inner_maximizers.append(inner_maximizer)
            self.trial_list.append(self.hp_params.trials_dict[maximizer_name])

        # record the number of malware examples in a training batch
        self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False)
        super(AdversarialTrainingDNNMax,
              self).__init__(info_dict, hyper_params, reuse, self.is_saving,
                             self.init_graph, self.mode, name)
Exemple #5
0
    def __init__(self,
                 info_dict=None,
                 hyper_params=None,
                 reuse=False,
                 is_saving=True,
                 init_graph=True,
                 mode='train',
                 name='ADV_TRAINING_DNN'):
        """
        hardened model incorporated with adversarial training
        @param info_dict: None,
        @param hyper_params: hyper parameters,
        @param reuse: reuse the variables or not
        @param is_saving: option for saving weights
        @param init_graph: initialize graph
        @param mode: enable a mode for run the model, 'train' or 'test'
        @param name: model name
        """
        self.is_saving = is_saving
        self.init_graph = init_graph
        self.mode = mode
        if info_dict is None:
            info_dict = INFO
        # get useful information, this will be over-wrote in other class
        self.info = utils.ParamWrapper(info_dict)
        self.feature_tp = self.info.feature_type

        if hyper_params is None:
            ADV_TRAIN_HP.update(DNN_HP)
            hyper_params = ADV_TRAIN_HP  # hyper_params contains information of using which attack
        self.hp_params = utils.ParamWrapper(hyper_params)

        # initialization
        if self.feature_tp == feature_type_scope_dict.keys()[0]:
            self.normalizer = utils.read_pickle(
                config.get('feature.' + self.feature_tp, 'normalizer'))
        else:
            raise ValueError("Feature type is incompatible.")
        input_dim = len(
            utils.read_pickle(
                config.get('feature.' + self.feature_tp, 'vocabulary')))
        self.eta = self.hp_params.eta
        feature_reverser = DrebinFeatureReverse()
        allow_insert_array, allow_removal_array = feature_reverser.get_mod_array(
        )

        inner_max_name = self.hp_params.maximizer_name
        inner_max_param = MAXIMIZER_PARAM_DICT[inner_max_name]
        self.inner_maximizer = MAXIMIZER_METHOD_DICT[inner_max_name](
            self,
            input_dim,
            allow_insert_array,
            allow_removal_array,
            self.normalizer,
            verbose=False,
            **inner_max_param)

        self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False)
        super(AdversarialTrainingDNN,
              self).__init__(info_dict, hyper_params, reuse, self.is_saving,
                             self.init_graph, self.mode,
                             name + '_' + inner_max_name.upper())