Exemple #1
0
def check_np_traning():
    traverse_list = ['a9a']
    for name in traverse_list:
        n_features = _config.datasets[name]['n_features']
        is_multi = _config.datasets[name]['is_multi']
        has_test = _config.datasets[name]['has_test']

        try:
            X_train, y_train, X_test, y_test = get_libsvm_data(
                name, n_features, is_multi, has_test)
        except (Exception):
            continue

        # -------------------
        # Train np model
        # -------------------

        # svc
        kernels = ['linear', 'rbf', 'poly', 'sigmoid']
        for kernel in kernels:
            print('---{} svc---'.format(kernel))
            svc = construct_np_model(model_type='svc', kernel=kernel)
            svc = train_np_model(svc, X_train, y_train)
            eval_np_model(svc, X_train, y_train, X_test, y_test)
            alpha_i = np.abs(svc.dual_coef_[0])
            print('max_alpha_i:{}'.format(np.max(alpha_i)))
            print('min_alpha_i:{}'.format(np.min(alpha_i)))
Exemple #2
0
def check_kernel(kernel, dataset_name):

    name = dataset_name
    n_features = _config.datasets[name]['n_features']
    is_multi = _config.datasets[name]['is_multi']
    has_test = _config.datasets[name]['has_test']

    X_train, y_train, X_test, y_test = get_libsvm_data(name,
                                                       n_features,
                                                       is_multi,
                                                       has_test,
                                                       show=False)

    svc = construct_np_model(model_type='svc', kernel=kernel)
    svc = train_np_model(svc, X_train, y_train)

    X_fit = X_train[svc.support_]
    coef = svc.dual_coef_
    intercept = svc.intercept_
    params = svc.get_params()
    if 'gamma' in params:
        params['gamma'] = 1 / (
            n_features *
            X_train.var()) if params['gamma'] == 'scale' else params['gamma']

    print('----show kernel metrics---')
    km_interface = cal_km(params, X_fit, X_train[:10], type='interface')
    km_realize = cal_km(params, X_fit, X_train[:10], type='realize')

    print('\n sklearn:')
    print(km_interface)
    print(km_interface.shape)

    print('\n realize:')
    print(km_realize)
    print(km_realize.shape)
    print('')

    print('---show hand-10 fx ---')
    fx_interface = cal_fx(km_interface, coef, intercept)
    fx_realize = cal_fx(km_realize, coef, intercept)
    print('\n fx_interface:')
    print(fx_interface)
    print('\n fx_realize:')
    print(fx_realize)
    print('\n decsion function:')
    print(svc.decision_function(X_train[:10]))
    print('\n y_pred:')
    print(svc.predict(X_train[:10]))
    print('')

    print('---show composite fx---')
    km1 = cal_km(params, X_fit[:5000], X_train[:10], type='realize')
    fx1 = cal_fx(km1, coef[:, :5000], 0)
    km2 = cal_km(params, X_fit[5000:], X_train[:10], type='realize')
    fx2 = cal_fx(km2, coef[:, 5000:], 0)
    print('\n fx1-5000 ins')
    print(fx1)
    print('\n fx2-others')
    print(fx2)
    print('\n batch fx = fx1 + fx2 + intercept')
    print(fx1 + fx2 + intercept)
    print('')
Exemple #3
0
def conv_check(kernel, dataset_name):
    # 训练svc
    name = dataset_name
    n_features = _config.datasets[name]['n_features']
    is_multi = _config.datasets[name]['is_multi']
    has_test = _config.datasets[name]['has_test']

    X_train, y_train, X_test, y_test = get_libsvm_data(name,
                                                       n_features,
                                                       is_multi,
                                                       has_test,
                                                       show=False)

    svc = construct_np_model(model_type='svc', kernel=kernel)
    svc = train_np_model(svc, X_train, y_train)

    X_fit = X_train[svc.support_]
    y_fit = y_train[svc.support_]
    coef = svc.dual_coef_
    intercept = svc.intercept_
    params = svc.get_params()
    if 'gamma' in params:
        params['gamma'] = 1 / (
            n_features *
            X_train.var()) if params['gamma'] == 'scale' else params['gamma']

    # 数据分类别分批次输入
    map_size = 128
    n_blocks = [2, 2, 2, 2]

    # 根据coef对X_fit等升序排序
    sorted_idx = np.argsort(coef[0])
    sorted_coef = coef[0][sorted_idx].reshape(-1, 1)
    sorted_X_fit = X_fit[sorted_idx]
    sorted_y_fit = y_fit[sorted_idx]

    # 舍弃低权重的instance
    fit_dataloaders, n_fit = get_fit_dataloaders(sorted_X_fit, sorted_coef,
                                                 sorted_y_fit, _config.labels,
                                                 map_size)

    # 展示原分布
    print('----data distribution----')
    print('train data - num:{}, distribution:{}'.format(
        X_train.shape[0], sorted(Counter(y_train))))
    print('fit data - num:{}, distribution:{}'.format(X_fit.shape[0],
                                                      sorted(Counter(y_fit))))
    print(' ')
    # 确定压缩率(如果map_size过小,90%会报错)
    rate_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

    for rate in rate_list:
        # 计算每个map压缩的数量
        n_compressed = X_fit.shape[0] * (1 - rate)
        rate = n_compressed / n_fit
        n_generate = int(rate * map_size)
        conv_module = CNN_Res18(Basic_Block, n_blocks, n_generate)
        if torch.cuda.is_available():
            conv_module = conv_module.cuda()

        X_gen = None
        y_gen = None
        is_none = True
        for label in _config.labels:
            fit_dataloader = fit_dataloaders[label]
            for data in fit_dataloader:
                X_map, coef_map = data
                X_map = Variable(X_map)
                if torch.cuda.is_available():
                    X_map = X_map.cuda()
                X_gen_temp = conv_module(X_map)
                y_gen_temp = np.full((X_gen_temp.shape[0], ), label)
                if is_none:
                    print(X_gen_temp.size())
                    X_gen = X_gen_temp.cpu().detach().numpy()
                    y_gen = y_gen_temp
                    is_none = False
                else:
                    X_gen = np.concatenate(
                        (X_gen, X_gen_temp.cpu().detach().numpy()), axis=0)
                    y_gen = np.concatenate((y_gen, y_gen_temp), axis=0)
        #展示压缩后分布
        print('{:.2f}% data - num:{}, distribution:{}'.format(
            rate * 100, X_gen.shape[0], sorted(Counter(y_gen).items())))
Exemple #4
0
def datamodule_check(kernel, dataset_name):
    # 训练svc
    name = dataset_name
    n_features = _config.datasets[name]['n_features']
    is_multi = _config.datasets[name]['is_multi']
    has_test = _config.datasets[name]['has_test']

    X_train, y_train, X_test, y_test = get_libsvm_data(name,
                                                       n_features,
                                                       is_multi,
                                                       has_test,
                                                       show=False)

    svc = construct_np_model(model_type='svc', kernel=kernel)
    svc = train_np_model(svc, X_train, y_train)

    X_fit = X_train[svc.support_]
    y_fit = y_train[svc.support_]
    coef = svc.dual_coef_
    intercept = svc.intercept_
    params = svc.get_params()
    if 'gamma' in params:
        params['gamma'] = 1 / (
            n_features *
            X_train.var()) if params['gamma'] == 'scale' else params['gamma']

    # 数据分类别分批次输入
    map_size = 128

    # 根据coef对X_fit等升序排序
    sorted_idx = np.argsort(coef[0])
    sorted_coef = coef[0][sorted_idx].reshape(-1, 1)
    sorted_X_fit = X_fit[sorted_idx]
    sorted_y_fit = y_fit[sorted_idx]

    # 舍弃低权重的instance
    fit_dataloaders, n_fit = get_fit_dataloaders(sorted_X_fit, sorted_coef,
                                                 sorted_y_fit, _config.labels,
                                                 map_size)

    for label in _config.labels:
        fit_dataloader = fit_dataloaders[label]
    for data in fit_dataloader:
        X_map, coef_map = data
        coef_map = coef_map.view(1, -1)
        gdm = GeneralDataModule(_config.datasets[name]['n_features'],
                                _config.n_classes,
                                _config.labels,
                                batch_size=64)
        gdm.setup(X_train, X_test, params, X_map[0][0].numpy(),
                  coef_map.numpy(), label)

        train_loader = gdm.train_dataloader()
        val_loader = gdm.val_dataloader()
        test_loader = gdm.test_dataloader()

        print('train data')
        for train_data in train_loader:
            X_batch, fx_batch = train_data
            print('X size:', X_batch.size())
            print('fx size:', fx_batch.size())
            print(fx_batch)
            break

        print('val data')
        for val_data in val_loader:
            X_batch, fx_batch = val_data
            print(X_batch.size())
            print('fx size:', fx_batch.size())
            print(fx_batch)
            break

        print('test data')
        for test_data in test_loader:
            X_batch, fx_batch = test_data
            print(X_batch.size())
            print('fx size:', fx_batch.size())
            print(fx_batch)
            break
        break
Exemple #5
0
    is_multi = datasets[name]['is_multi']
    has_test = datasets[name]['has_test']

    # -------------------
    # Load Data
    # -------------------

    print('...Load data')
    X_train, y_train, X_test, y_test = get_libsvm_data(name, n_features, is_multi, has_test)

    # -------------------
    # Train np model
    # -------------------

    print('\n... Train np model')
    np_model = construct_np_model(model_type=model_type, kernel=kernel)
    np_model = train_np_model(np_model, X_train, y_train)
    eval_np_model(np_model, X_train, y_train, X_test, y_test)

    # 获取参数并计算fx
    X_fit = X_train[np_model.support_]
    y_fit = y_train[np_model.support_]
    params = np_model.get_params()
    if 'gamma' in params:
        params['gamma'] = 1 / (n_features * X_train.var()) if params['gamma'] == 'scale' else params['gamma']
    coef = np_model.dual_coef_
    intercept = np_model.intercept_

    km = cal_km(params, X_fit, X_test[:10], type='realize')
    fx = cal_fx(km, coef, intercept)
    print('\nfx-realize:')