예제 #1
0
def _get_data(data_pth=None, data_name=None, direction='src_dst', feat='iat_size', header=False,
              overwrite=False):
    """Load data from data_pth if data_path exists, otherwise, generate data from pcap fiels

    Parameters
    ----------
    data_pth:
    data_name
    direction
    feat
    header
    overwrite

    Returns
    -------
        X: features
        y: labels
    """
    if overwrite:
        if pth.exists(data_pth): os.remove(data_pth)

    if not pth.exists(data_pth):
        data_pth = generate_data_speed_up(data_name, feat_type=feat, header=header, direction=direction,
                                          out_file=data_pth,
                                          overwrite=overwrite)

    return load_data(data_pth)
예제 #2
0
def _get_data(data_pth=None,
              data_name=None,
              direction='src_dst',
              feat='iat_size',
              header=False,
              overwrite=False):
    if overwrite:
        if pth.exists(data_pth): os.remove(data_pth)

    if not pth.exists(data_pth):
        data_pth = generate_data_speed_up(data_name,
                                          feat_type=feat,
                                          header=header,
                                          direction=direction,
                                          out_file=data_pth,
                                          overwrite=overwrite)

    return load_data(data_pth)
예제 #3
0
def minimal_model_cost(model_name,
                       model_params_file,
                       test_file,
                       params,
                       project_params_file,
                       is_parallel=False):
    #######################################################################################################
    # 1. create a new model from saved parameters
    params = {'is_kjl': False, 'is_nystrom': False}
    if 'OCSVM' in model_name:
        #######################################################################################################
        # load params
        model_params = load_data(model_params_file)

        #######################################################################################################
        # create a new model
        # 'OCSVM(rbf)':
        oc = OCSVM()
        oc.kernel = model_params['kernel']
        oc._gamma = model_params[
            '_gamma']  # only used for 'rbf', 'linear' doeesn't need '_gamma'
        oc.gamma = oc._gamma

        oc.support_vectors_ = model_params['support_vectors_']
        oc._dual_coef_ = model_params[
            '_dual_coef_']  # Coefficients of the support vectors in the decision function.
        oc.dual_coef_ = oc._dual_coef_
        oc._intercept_ = model_params['_intercept_']
        oc.intercept_ = oc._intercept_

        oc.support_ = np.zeros(
            oc.support_vectors_.shape[0], dtype=np.int32
        )  # np.empty((1,), dtype=np.int32) #  # model_params['support_']  # Indices of support vectors.
        oc._n_support = model_params[
            '_n_support']  # Number of support vectors for each class.
        oc._sparse = model_params[
            '_sparse']  # spare_kernel_compute or dense_kernel_compute
        oc.shape_fit_ = model_params[
            'shape_fit_']  # to check if the dimension of train set and test set is the same.
        oc.probA_ = np.zeros(
            0
        )  # model_params['probA_']  # /* pairwise probability information */, not used. its values = [].
        oc.probB_ = np.zeros(
            0
        )  # model_params['probB_']  # /* pairwise probability information */, not used its values = [].
        oc.offset_ = -1 * model_params[
            '_intercept_']  # model_params['offset_']

        project = None
        if 'KJL' in model_name:  # KJL-OCSVM
            # load params
            project_params = load_data(project_params_file)

            project = KJL(None)
            project.sigma = project_params['sigma']
            project.Xrow = project_params['Xrow']
            project.U = project_params['U']
            params['is_kjl'] = True
        elif 'Nystrom' in model_name:  # Nystrom-OCSVM
            # load params
            project_params = load_data(project_params_file)
            project = NYSTROM(None)
            project.sigma = project_params['sigma']
            project.Xrow = project_params['Xrow']
            project.eigvec_lambda = project_params['eigvec_lambda']
            params['is_nystrom'] = True

    elif 'GMM' in model_name:
        #######################################################################################################
        # load params
        model_params = load_data(model_params_file)

        # GMM params
        oc = GMM()
        oc.covariance_type = model_params['covariance_type']
        oc.weights_ = model_params['weights_']
        oc.means_ = model_params['means_']
        # oc.precisions_ = model_params['precisions_']
        oc.precisions_cholesky_ = model_params['precisions_cholesky_']

        project = None
        if 'KJL' in model_name:  # KJL-GMM
            # load params
            project_params = load_data(project_params_file)

            project = KJL(None)
            project.sigma = project_params['sigma']
            project.Xrow = project_params['Xrow']
            project.U = project_params['U']
            params['is_kjl'] = True
        elif 'Nystrom' in model_name:  # Nystrom-GMM
            # load params
            project_params = load_data(project_params_file)
            project = NYSTROM(None)
            project.sigma = project_params['sigma']
            project.Xrow = project_params['Xrow']
            project.eigvec_lambda = project_params['eigvec_lambda']
            params['is_nystrom'] = True
    else:
        raise NotImplementedError()

    #######################################################################################################
    # 2. load test set and evaluate the model
    X_test, y_test = load_data(test_file)

    # Evaluation
    # average time
    # minimal model cost
    num = 1
    if is_parallel:
        with Parallel(n_jobs=10,
                      verbose=0,
                      backend='loky',
                      pre_dispatch=1,
                      batch_size=1) as parallel:
            # outs = parallel(delayed(_test)(oc, X_test, y_test, params=params, project=project) for _ in range(num))
            outs = parallel(
                delayed(_test)(copy.deepcopy(oc),
                               copy.deepcopy(X_test),
                               copy.deepcopy(y_test),
                               params=copy.deepcopy(params),
                               project=copy.deepcopy(project))
                for _ in range(num))
        auc, test_time = list(zip(*outs))
        auc = np.mean(auc)
        test_time = np.mean(test_time)
    else:
        auc = []
        test_time = []
        for _ in range(num):
            # auc_, test_time_ = _test(oc, X_test, y_test, params, project)
            auc_, test_time_ = _test(copy.deepcopy(oc),
                                     copy.deepcopy(X_test),
                                     copy.deepcopy(y_test),
                                     params=copy.deepcopy(params),
                                     project=copy.deepcopy(project))
            auc.append(auc_)
            test_time.append(test_time_)
        auc = np.mean(auc)
        test_time = np.mean(test_time)

    return auc, X_test, test_time