Example #1
0
 def __init__(self, feature_mp='binary', use_default_feature=True):
     super(DrebinFeatureReverse,
           self).__init__(feature_type_scope_dict.keys()[0], feature_mp,
                          use_default_feature)
     #load feature infomation
     try:
         if self.use_default_feature:
             self.normalizer = utils.read_pickle(
                 cfg.config.get('feature.' + self.feature_type,
                                'normalizer'))
             self.vocab = utils.read_pickle(
                 cfg.config.get('feature.' + self.feature_type,
                                'vocabulary'))
             self.vocab_info = utils.read_pickle(
                 cfg.config.get('feature.' + self.feature_type,
                                'vocab_info'))
         else:  # use surrogate feature meta-information
             self.normalizer = utils.read_pickle(
                 os.path.join(
                     cfg.config.get('experiments', 'surrogate_save_dir'),
                     'normalizer'))
             self.vocab = utils.read_pickle(
                 os.path.join(
                     cfg.config.get('experiments', 'surrogate_save_dir'),
                     'vocabulary'))
             self.vocab_info = utils.read_pickle(
                 os.path.join(
                     cfg.config.get('experiments', 'surrogate_save_dir'),
                     'vocab_info'))
     except Exception as ex:
         logger.error(str(ex))
         raise IOError("Unable to load meta-information of feature.")
    def _initilize(self):
        """initialization"""
        all_method_names = []
        for mode, method_list in attack_method_dict.items():
            if self.attack_method_name in method_list:
                self.attack_mode = mode
            all_method_names.extend(method_list)

        if not self.attack_method_name in all_method_names:
            raise ValueError("\n\t Attack method '{}' are supported".format(
                all_method_names))

        if not self.attack_scenario in attack_scenario_dict.keys():
            raise ValueError("\n\t Attack scenario '{}' are supported".format(
                attack_scenario_dict.keys()))

        if not self.targeted_model_name in targeted_model_names_dict.keys():
            raise ValueError("\n\t targed model '{}' are supported".format(
                targeted_model_names_dict.keys()))

        # get the information of targeted model
        self.targeted_model = targeted_model_names_dict[
            self.targeted_model_name](mode='test')
        self.targeted_model_info = self.targeted_model.info
        self.targeted_model_hp = self.targeted_model.hp_params

        if self.attack_scenario == WHITEBOX:
            self.targeted_model_of_attacker = self.targeted_model
            if self.targeted_model_of_attacker.feature_tp == feature_type_scope_dict.keys(
            )[0]:  # 'drebin'
                self.feature_reverser = DrebinFeatureReverse(
                    feature_mp=self.targeted_model_of_attacker.feature_mp)
            else:
                raise ValueError("Only " +
                                 ' '.join(feature_type_scope_dict.keys()) +
                                 " are supported.")

        if self.attack_scenario == GREYBOX:
            """
            Training a dnn model as the surrogate model here.
            In the paper, we use the hardened model as the surrogate models 
            """
            sur_info_dict = {}
            sur_hp_dict = {}
            for k, v in self.other_args:
                if k in attack_scenario_dict[GREYBOX].keys():
                    attack_scenario_dict[GREYBOX][k] = v
                else:
                    raise ValueError(
                        "No '{}' key, please check it based on '{}'".format(
                            k, ','.join(attack_scenario_dict[GREYBOX].keys())))

            if attack_scenario_dict[GREYBOX][
                    'algo_knowledge'] == 0000:  # zero knowledge about algorithm
                sur_info_dict['learning_algorithm'] = 'DNN'
                sur_hp_dict = SUR_DNN_HP.copy()
            else:
                raise NotImplementedError

            if attack_scenario_dict[GREYBOX]['feature_knowledge'] == 1111:
                sur_info_dict[
                    'feature_type'] = self.targeted_model_info.feature_type
                sur_info_dict[
                    'feature_mapping_type'] = self.targeted_model_info.feature_mapping_type
                sur_info_dict['feature_utility_rate'] = 1.
            else:
                raise NotImplementedError

            if attack_scenario_dict[GREYBOX]['dataset_knowledge'] == 1111:
                sur_info_dict[
                    'dataset_dir'] = self.targeted_model_info.dataset_dir
            else:
                raise NotImplementedError

            surrogate_model = SurrogateModel(sur_info_dict, sur_hp_dict, False)
            # surrogate_model.train()
            # surrogate_model.graph_reset() # reset the graph, avoiding the loading of adam parameters
            self.targeted_model_of_attacker = surrogate_model

            self.feature_reverser = DrebinFeatureReverse(
                feature_mp=surrogate_model.feature_mp,
                use_default_feature=True
            )  # may trigger issue, surrogate model will use default features

        if self.attack_scenario == BLACKBOX:
            for k, v in self.other_args:
                if k in attack_scenario_dict[BLACKBOX].keys():
                    attack_scenario_dict[BLACKBOX][k] = v
                else:
                    raise ValueError(
                        "No '{}' key, please check it based on '{}'".format(
                            k,
                            ','.join(attack_scenario_dict[BLACKBOX].keys())))
Example #3
0
    def __init__(self,
                 info_dict = None,
                 hyper_params = None,
                 reuse=False,
                 is_saving = True,
                 init_graph = True,
                 mode = 'train',
                 name = 'DADV_NN_ENSEMBLE_MAX'):
        """
        hardened deep ensemble incorporated with ''max'' attack and a diversifying method
        @param info_dict: None,
        @param hyper_params: hyper parameters,
        @param reuse: reuse the variables or not
        @param is_saving: option for saving weights
        @param init_graph: initialize graph
        @param mode: enable a mode for run the model, 'train' or 'test'
        @param name: model name
        """
        self.is_saving = is_saving
        self.init_graph = init_graph
        self.mode = mode
        if info_dict is None:
            ADV_ENS_INFO.update(INFO)
            info_dict = ADV_ENS_INFO
        self.clf_info = utils.ParamWrapper(info_dict)
        if hyper_params is None:
            ADV_ENS_HP.update(MAX_ADV_TRAIN_HP)
            ADV_ENS_HP.update(DNN_HP)
            hyper_params = ADV_ENS_HP
        self.hp_params = utils.ParamWrapper(hyper_params)
        self.model_name = name

        self.base_model_method = [AdversarialTrainingDNN] * len(MAXIMIZER_METHOD_DICT)
        self.base_model_method.append(BasicDNNModel)
        self.base_model_count = len(self.base_model_method)
        assert self.base_model_count > 1, 'one base model at least'

        # initialization
        if self.clf_info.feature_tp == feature_type_scope_dict.keys()[0]:
            self.normalizer = utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'normalizer'))
        else:
            raise ValueError("Feature type is incompatible.")
        input_dim = len(utils.read_pickle(config.get('feature.' + self.clf_info.feature_tp, 'vocabulary')))
        self.eta = self.hp_params.eta
        feature_reverser = DrebinFeatureReverse()
        allow_insert_array, allow_removal_array = feature_reverser.get_mod_array()

        # build attack graph
        maximizer_name_list = self.hp_params.maximizer_name_list
        self.inner_maximizers = []
        self.trial_list = []
        for maximizer_name in maximizer_name_list:
            maximizer_method = MAXIMIZER_METHOD_DICT[maximizer_name]
            maximizer_param = MAXIMIZER_PARAM_DICT[maximizer_name]
            inner_maximizer = maximizer_method(self,
                                               input_dim,
                                               allow_insert_array,
                                               allow_removal_array,
                                               self.normalizer,
                                               verbose=False,
                                               **maximizer_param
                                               )

            self.inner_maximizers.append(inner_maximizer)
            self.trial_list.append(self.hp_params.trials_dict[maximizer_name])

        # record the number of malware examples in a training batch
        self.batch_size_mal = tf.Variable(0, dtype=tf.int64, trainable=False)

        super(DAdversarialDeepEnsembleMax, self).__init__(info_dict,
                                                          hyper_params,
                                                          reuse = reuse,
                                                          is_saving=self.is_saving,
                                                          init_graph= self.init_graph,
                                                          mode = self.mode,
                                                          name = name)
Example #4
0
sys.path.append(proj_dir)

from config import config
from tools import utils
from learner.feature_extractor import get_droid_feature, FeatureMapping, feature_type_scope_dict
from learner.basic_DNN import BasicDNNModel, DNN_HP, INFO
from attacker.feature_reverser import DrebinFeatureReverse
from defender.at import MAXIMIZER_PARAM_DICT, MAXIMIZER_METHOD_DICT
from defender.at import AdversarialTrainingDNN
from defender.at import ADV_TRAIN_HP
from defender.at_ma import MAX_ADV_TRAIN_HP


ADV_ENS_INFO = {
    'dataset_dir': config.get('dataset', 'dataset_root'),
    'feature_tp': list(feature_type_scope_dict.keys())[0], #'drebin',
    'feature_mapping_type': config.get('feature.drebin', 'feature_mp'),
    'learning_algorithm': 'ADV_ENS_BASE'
}

ADV_ENS_HP = {
    'lambda_2': 1. # the balanced factor for regularizing the base models
}


class DAdversarialDeepEnsembleMax(BasicDNNModel):
    def __init__(self,
                 info_dict = None,
                 hyper_params = None,
                 reuse=False,
                 is_saving = True,
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from learner.classification import *
from tools import utils
from learner.feature_extractor import get_droid_feature, FeatureMapping, DREBIN_FEAT_INFO, feature_type_scope_dict
from config import config, logging

logger = logging.getLogger("learning.basic_dnn")

INFO = {
    'dataset_dir': config.get('dataset', 'dataset_root'),
    'feature_type': feature_type_scope_dict.keys()[0],  # 'drebin',
    'feature_mapping_type': config.get('feature.drebin', 'feature_mp'),
    'use_interdependent_features': False,
    'learning_algorithm': 'DNN'
}

DNN_HP = {
    'random_seed': 23456,
    'hidden_units':
    [160, 160],  # DNN has two hidden layers with each having 160 neurons
    'output_dim': 2,  # malicious vs. benign
    'n_epochs': 150,
    'batch_size': 128,
    'learning_rate': 0.001,
    'optimizer': 'adam'  # others are not supported
}