Beispiel #1
0
def test():
    federal_info = fed_conf_coordinator

    sec_param = {"key_exchange_size": 2048}

    trainer = make_protocol(OTP_SA_FT, federal_info, sec_param)
    result = trainer.exchange()
Beispiel #2
0
def test():
    fed_conf_guest['session']['identity'] = 'client'
    federal_info = fed_conf_guest

    sec_param = {
        "symmetric_algo": "aes",
    }

    algo_param = {
        'n': 10,
        'k': 1
    }

    protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param)

    import random

    def obfuscator(in_list, n):
        fake_list = [random.randint(0, 100) for i in range(n - len(in_list))]
        index = random.randint(0, n - 1)
        joint_list = fake_list[:index] + in_list + fake_list[index:]
        return joint_list, index

    result = protocol.exchange('50', obfuscator)
    print(result)
Beispiel #3
0
def test_invisible_inquiry():
    # 联邦通信信息,输入,根据当前的配置环境做相应修改
    fed_conf_guest['session']['identity'] = 'client'
    federal_info = fed_conf_guest

    # 安全参数,输入
    sec_param = {
        "symmetric_algo": "aes",
    }

    # 算法参数,输入
    algo_param = {'n': 10, 'k': 1}

    protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param)

    import random

    # 模拟的混淆函数,用于生成和查询id相同格式的混淆id
    def obfuscator(in_list, n):
        fake_list = [random.randint(0, 100) for i in range(n - len(in_list))]
        index = random.randint(0, n - 1)
        joint_list = fake_list[:index] + in_list + fake_list[index:]
        return joint_list, index

    # 模拟的匿踪查询函数
    def query_fun(in_list):
        result = [str(int(i) * 100) for i in in_list]
        return result

    # 输入的查询id从1到10做10次不同测试,将调用匿踪查询后server返回的结果和本地计算结果进行比较,验证正确性
    for i in range(10):
        federal_result = protocol.exchange(str(i), obfuscator)  # 联邦匿踪查询结果
        local_result = query_fun([str(i)])[0]  # 本地查询结果,查询函数同联邦匿踪查询函数
        assert federal_result == local_result
Beispiel #4
0
def test_train():
    def setup_seed(seed):
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

    setup_seed(0)

    data = load_boston()
    x_train = data.data[:, :6]
    # x_train = x_train / np.max(np.abs(x_train), axis=0)

    x_train_min = np.min(x_train, axis=0)
    x_train_max = np.max(x_train, axis=0)
    x_train = (x_train - x_train_min) / (x_train_max - x_train_min)
    y_train = data.target

    train_param = {
        'lr': 0.1,
        'num_epochs': 10,
        'iter_per_epoch': 8,
        'batch_size': 64
    }

    my_dataset = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
    my_dataloader = DataLoader(my_dataset,
                               train_param['batch_size'],
                               drop_last=False)

    class LinearRegression(nn.Module):
        def __init__(self, in_dim):
            super().__init__()
            self.theta = nn.Parameter(torch.randn((in_dim)))

    model = LinearRegression(6)
    print(model.state_dict())

    # criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), train_param['lr'])

    federal_info = fed_conf_guest

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    protocol = make_protocol(HE_LINEAR_FT, federal_info, sec_param, None)

    for epoch in range(train_param['num_epochs']):
        print(f"epoch: {epoch}")
        for i, data in enumerate(my_dataloader):
            feature, label = data
            u = (feature * model.theta).sum(dim=1)
            loss = protocol.exchange(u.detach().numpy(),
                                     label.detach().numpy())
            loss = torch.as_tensor(loss)
            gradient = torch.mean(feature * loss.unsqueeze(-1), dim=0)
            optimizer.zero_grad()
            model.theta.grad = torch.as_tensor(gradient).float()
            optimizer.step()
            print('theta:', model.theta)
Beispiel #5
0
def test_he_linear_ft():
    federal_info = fed_conf_coordinator

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    trainer = make_protocol(HE_LINEAR_FT,
                            federal_info,
                            sec_param,
                            algo_param=None)
    trainer.exchange()
Beispiel #6
0
def test():
    federal_info = fed_conf_coordinator
    sec_param = {"key_exchange_size": 2048}

    #对齐
    share = make_protocol(SAL, federal_info, sec_param)
    share.align()

    #验证
    share.verify()
Beispiel #7
0
def test():
    federal_info = fed_conf_coordinator

    sec_param = {
        "he_algo": 'paillier',
        "he_key_length": 1024
    }

    predict = make_protocol(HE_LR_FP, federal_info, sec_param)
    predict.exchange()
Beispiel #8
0
def test():
    u = np.random.uniform(-1, 1, (32, ))
    print(u)

    federal_info = fed_conf_host

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    predict = make_protocol(HE_LR_FP, federal_info, sec_param)
    result = predict.exchange(u)
    print(result)
Beispiel #9
0
def test():
    federal_info = fed_conf_coordinator

    sec_param = {
        "he_algo": "paillier",
        "he_key_length": 1024,
        "key_exchange_size": 2048
    }

    trainer = make_protocol(HE_SA_FT, federal_info, sec_param)
    result = trainer.exchange()
Beispiel #10
0
def test_secure_alignment():
    federal_info = fed_conf_coordinator
    sec_param = {"key_exchange_size": 2048}

    iters = 2
    #对齐
    share = make_protocol(SAL, federal_info, sec_param)
    for i in range(iters):
        share.align()

        # 验证
        share.verify()
Beispiel #11
0
def test_train():
    # 固定随机数种子,保证每次运行网络的时候相同输入的输出是固定的
    def setup_seed(seed):
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

    setup_seed(0)

    data = load_breast_cancer()
    host_train_data = data.data[:, 20:]  # host取20列之后的特征作为联邦训练的特征向量(训练数据共30维特征)

    # 对数据做归一化处理,归一化方法guest和host需要统一
    host_train_min = np.min(host_train_data, axis=0)
    host_train_max = np.max(host_train_data, axis=0)
    host_train_data = (host_train_data - host_train_min) / (host_train_max -
                                                            host_train_min)

    # PyTorch加载训练数据
    host_dataset = TensorDataset(torch.Tensor(host_train_data))
    host_dataloader = DataLoader(host_dataset,
                                 train_param['batch_size'],
                                 drop_last=False)

    # 模型使用随机梯度下降的方法,并使用设定的学习率来最小化训练模型中的误差
    class LogisticRegression(nn.Module):
        def __init__(self, in_dim):
            super().__init__()
            self.theta = nn.Parameter(torch.randn((in_dim)))

    model = LogisticRegression(10)

    optimizer = torch.optim.SGD(model.parameters(), train_param['lr'])

    # 联邦通信初始化,从外部调入,与上面调用示例中的federal_info相同,根据实际部署可以调整server, role, local_id, job_id等
    federal_info = fed_conf_host

    # 安全参数,使用的加密方法为paillier加密,密钥长度为1024位,与guest保持一致
    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    # HE_OTP_LR_FT2协议初始化
    protocol = make_protocol(HE_OTP_LR_FT2, federal_info, sec_param, None)

    # 参与联邦训练过程,目标使guest的loss值不断降低或达到一定阈值
    for epoch in range(train_param['num_epochs']):
        for i, data in enumerate(host_dataloader):
            feature = data[0]
            gradient = protocol.exchange(model.theta.detach().numpy(),
                                         feature.numpy())
            optimizer.zero_grad()
            model.theta.grad = torch.Tensor(gradient)
            optimizer.step()
Beispiel #12
0
def test():
    federal_info = fed_conf_guest

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    algo_param = {'adjust_value': 0.5}

    iv_ffs = make_protocol(IV_FFS, federal_info, sec_param, algo_param)
    table = pd.read_csv(os.path.join(os.path.dirname(__file__),
                                     'shap_finance_c.csv'),
                        nrows=300)
    label = pd.Series(table['Label'])
    iv_ffs.exchange(label=label)
Beispiel #13
0
def test():
    federal_info = fed_conf_host
    sec_param = {"key_exchange_size": 2048}

    host_data = list(map(str, range(160)))

    #对齐
    share = make_protocol(SAL, federal_info, sec_param)
    result = share.align(host_data)
    print(result)

    #验证
    is_align = share.verify(result)
    print(is_align)
Beispiel #14
0
def test_predict():
    len_u1 = 2

    federal_info = fed_conf_coordinator

    sec_param = {
        "he_algo": 'paillier',
        "he_key_length": 1024
    }

    protocol = make_protocol(HE_LR_FP, federal_info, sec_param, None)

    for i in range(len_u1):
        protocol.exchange()
Beispiel #15
0
def test_train():
    # 联邦通信初始化,从外部调入,与上面调用示例中的federal_info相同,根据实际部署可以调整server, role, local_id, job_id等
    federal_info = fed_conf_coordinator

    # 安全参数,使用的加密方法为paillier加密,密钥长度为1024位
    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    # HE_OTP_LR_FT2协议初始化
    trainer = make_protocol(HE_OTP_LR_FT2, federal_info, sec_param)

    # 训练过程
    for epoch in range(train_param['num_epochs']):
        for i in range(train_param['train_rounds']):
            trainer.exchange()
Beispiel #16
0
def test():
    theta = [[
        np.random.uniform(-1, 1, (2, 4)).astype(np.float32),
        np.random.uniform(-1, 1, (2, 6)).astype(np.float32)
    ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]]
    print(theta)

    federal_info = fed_conf_host

    sec_param = {"key_exchange_size": 2048}

    trainer = make_protocol(OTP_SA_FT, federal_info, sec_param)
    result = trainer.exchange(theta)
    print(result)
Beispiel #17
0
def test_predict():
    # u1 = [np.array([0.1, 0.05, -3.6, 25.8], dtype=np.float32), np.array([-0.5, 11.2, 9.5], dtype=np.float32)]
    u2 = [
        np.array([0.3, -14, -2.5, 1.7], dtype=np.float32),
        np.array([0.2, 1.2, -5.6], dtype=np.float32)
    ]

    # expected_u = [u1[i] + u2[i] for i in range(len(u1))]

    federal_info = fed_conf_host

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    protocol = make_protocol(HE_LR_FP, federal_info, sec_param, None)

    for i in range(len(u2)):
        protocol.exchange(u2[i])
Beispiel #18
0
def test():
    theta = [[
        np.random.uniform(-1, 1, (2, 4)).astype(np.float32),
        np.random.uniform(-1, 1, (2, 6)).astype(np.float32)
    ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]]
    print(theta)

    federal_info = fed_conf_guest

    sec_param = {"key_exchange_size": 2048}

    trainer = make_protocol(OTP_SA_FT, federal_info, sec_param)
    result = trainer.exchange(theta)
    var_chan = make_variable_channel('test_otp_sa_ft',
                                     fed_conf_guest["federation"]["host"][0],
                                     fed_conf_guest["federation"]["guest"][0])
    var_chan.send(theta, tag='theta')
Beispiel #19
0
def test():
    theta = [[
        np.random.uniform(-1, 1, (2, 4)).astype(np.float32),
        np.random.uniform(-1, 1, (2, 6)).astype(np.float32)
    ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]]
    print(theta)

    federal_info = fed_conf_guest

    sec_param = {
        "he_algo": "paillier",
        "he_key_length": 1024,
        "key_exchange_size": 2048
    }

    trainer = make_protocol(HE_SA_FT, federal_info, sec_param)
    result = trainer.exchange(theta)
    print(result)
Beispiel #20
0
def test_secure_alignment():
    federal_info = fed_conf_guest

    sec_param = {"key_exchange_size": 2048}

    data_1 = [list(map(str, range(1000, 2000))), list(range(3000, 5000))]
    data_2 = [list(map(str, range(1600))), list(range(4200))]

    # 对齐
    share = make_protocol(SAL, federal_info, sec_param)
    for i, data in enumerate(data_1):
        result = share.align(data)
        _, idx_1, _ = np.intersect1d(data_1[i], data_2[i], return_indices=True)
        local_res = [data[j] for j in idx_1]
        assert sorted(result) == sorted(local_res)

        # 验证
        is_align = share.verify(result)
        assert is_align is True
Beispiel #21
0
def test():
    federal_info = fed_conf_host

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    algo_param = {'adjust_value': 0.5}

    iv_ffs = make_protocol(IV_FFS, federal_info, sec_param, algo_param)
    table = pd.read_csv(os.path.join(os.path.dirname(__file__),
                                     'shap_finance_c.csv'),
                        nrows=300)
    data = pd.Series(table['Occupation'])
    split_info = {
        'split_points':
        np.array([0.0, 1.5, 3.01, 4.15, 6.02, 7.04, 8.28, 10.1])
    }
    iv_value = iv_ffs.exchange(feature=data,
                               is_continuous=True,
                               split_info=split_info)
    print(iv_value)
Beispiel #22
0
def test():
    fed_conf_host['session']['identity'] = 'server'
    federal_info = fed_conf_host

    sec_param = {
        "symmetric_algo": "aes",
    }

    algo_param = {
        'n': 10,
        'k': 1
    }

    protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param)

    def query_fun(in_list):
        result = [str(int(i) * 100) for i in in_list]
        return result

    protocol.exchange(query_fun)
Beispiel #23
0
def test_he_linear_ft():

    # host和guest的随机初始状态相同
    prng = RandomState(0)
    guest_u = np.array(prng.uniform(-1, 1, (8, )))
    host_u = np.array(prng.uniform(-1, 1, (8, )))

    guest_labels = np.array(prng.randint(0, 2, (8, )))

    federal_info = fed_conf_host

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    trainer = make_protocol(HE_LINEAR_FT,
                            federal_info,
                            sec_param,
                            algo_param=None)

    result = trainer.exchange(host_u)
    assert almost_equal(result, guest_u + host_u - guest_labels)
Beispiel #24
0
def test_invisible_inquiry():
    fed_conf_host['session']['identity'] = 'server'
    federal_info = fed_conf_host

    sec_param = {
        "symmetric_algo": "aes",
    }

    algo_param = {'n': 10, 'k': 1}

    protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param)

    # 模拟的匿踪查询函数
    def query_fun(in_list):
        result = [str(int(i) * 100) for i in in_list]
        return result

    # 连续做10次匿踪查询,并将结果返回给client端
    for i in range(10):
        protocol.exchange(query_fun)
Beispiel #25
0
def test_he_otp_lr_ft1():
    federal_info = fed_conf_guest

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    prng = RandomState(0)
    guest_theta = prng.uniform(-1, 1, (6, ))
    guest_features = prng.uniform(-1, 1, (32, 6))
    guest_labels = prng.randint(0, 2, (32, ))

    host_theta = prng.uniform(-1, 1, (6, ))
    host_features = prng.uniform(-1, 1, (32, 6))

    def calu_grad(guest_theta, guest_features, guest_labels, host_theta,
                  host_features):
        u2 = host_theta.dot(host_features.T)
        u1 = guest_theta.dot(guest_features.T)
        u = u1 + u2
        h_x = 1 / (1 + np.exp(-u))

        batch_size = guest_features.shape[0]
        grads = (-1 / batch_size) * ((guest_labels - h_x).dot(guest_features))
        return h_x, grads

    # print(guest_theta, guest_features, guest_labels)

    trainer = make_protocol(HE_OTP_LR_FT1,
                            federal_info,
                            sec_param,
                            algo_param=None)

    # 联邦计算结果
    fed_h_x, fed_grads = trainer.exchange(guest_theta, guest_features,
                                          guest_labels)

    # 本地计算结果
    local_h_x, local_grads = calu_grad(guest_theta, guest_features,
                                       guest_labels, host_theta, host_features)

    assert almost_equal(fed_h_x, local_h_x)
    assert almost_equal(fed_grads, local_grads)
Beispiel #26
0
def test():
    theta = [[
        np.random.uniform(-1, 1, (2, 4)).astype(np.float32),
        np.random.uniform(-1, 1, (2, 6)).astype(np.float32)
    ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]]
    print(theta)

    federal_info = fed_conf_host

    sec_param = {"key_exchange_size": 2048}

    trainer = make_protocol(OTP_SA_FT, federal_info, sec_param)
    result = trainer.exchange(theta)
    var_chan = make_variable_channel('test_otp_sa_ft',
                                     fed_conf_host["federation"]["host"][0],
                                     fed_conf_host["federation"]["guest"][0])
    guest_theta = var_chan.recv(tag='theta')
    sum_theta = iterative_add(theta, guest_theta)
    # 本地计算的平均梯度
    avg_theta = iterative_divide(sum_theta, 2.0)
    assert almost_equal(result, avg_theta)
Beispiel #27
0
def test_train():
    def setup_seed(seed):
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

    setup_seed(0)

    federal_info = fed_conf_coordinator

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    train_param = {
        'lr': 0.1,
        'num_epochs': 10,
        'iter_per_epoch': 8,
        'batch_size': 64
    }

    protocol = make_protocol(HE_LINEAR_FT, federal_info, sec_param, None)

    for i in range(train_param['iter_per_epoch'] * train_param['num_epochs']):
        protocol.exchange()
Beispiel #28
0
def test_train():
    def setup_seed(seed):
        torch.manual_seed(seed)  # 为CPU设置随机种子
        torch.cuda.manual_seed_all(seed)  # 为所有GPU设置随机种子
        torch.backends.cudnn.deterministic = True  # CuDNN卷积使用确定性算法

    setup_seed(0)  # 固定随机数种子,保证每次运行网络的时候相同输入的输出是固定的

    data = load_breast_cancer()  # 取sklearn中的数据为训练数据
    guest_train_data = data.data[:, :20]  # guest取前20列特征作为联邦训练的特征向量

    # 对数据先做归一化处理,归一化方法为max-min方法
    guest_train_min = np.min(guest_train_data, axis=0)
    guest_train_max = np.max(guest_train_data, axis=0)
    guest_train_data = (guest_train_data -
                        guest_train_min) / (guest_train_max - guest_train_min)

    # guest提供目标label数据
    guest_train_label = data.target

    # PyTorch加载训练数据
    guest_dataset = TensorDataset(torch.Tensor(guest_train_data),
                                  torch.Tensor(guest_train_label))
    guest_dataloader = DataLoader(guest_dataset,
                                  train_param['batch_size'],
                                  drop_last=False)

    # 模型使用随机梯度下降的方法,并使用设定的学习率来最小化训练模型中的误差
    class LogisticRegression(nn.Module):
        def __init__(self, in_dim):
            super().__init__()
            self.theta = nn.Parameter(torch.randn((in_dim)))

    model = LogisticRegression(20)
    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), train_param['lr'])

    # 联邦通信初始化,从外部调入,与上面调用示例中的federal_info相同,根据实际部署可以调整server, role, local_id, job_id等
    federal_info = fed_conf_guest

    # 安全参数,使用的加密方法为paillier加密,密钥长度为1024位
    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    # HE_OTP_LR_FT2协议初始化
    protocol = make_protocol(HE_OTP_LR_FT2, federal_info, sec_param, None)

    loss_list = []
    # 训练过程,目标使loss值不断降低,将每次训练的loss值记录
    for epoch in range(train_param['num_epochs']):
        for i, data in enumerate(guest_dataloader):
            feature, label = data
            predict, gradient = protocol.exchange(model.theta.detach().numpy(),
                                                  feature.numpy(),
                                                  label.numpy())
            loss = criterion(torch.Tensor(predict), label)
            loss_list.append(loss.item())
            optimizer.zero_grad()
            model.theta.grad = torch.Tensor(gradient)
            optimizer.step()

    # 为了观察方便,打印loss值随着训练次数迭代的过程,命名guest_train_results_loss.png并保存在当前目录下
    pl.plot(list(range(1,
                       len(loss_list) + 1)),
            loss_list,
            'r-',
            label='loss value')
    pl.legend()
    pl.xlabel('iters')
    pl.ylabel('loss')
    pl.title('logistic regression loss in training')
    pl.savefig(
        os.path.join(os.path.dirname(__file__),
                     "guest_train_results_loss.png"))
Beispiel #29
0
def test_iv_ffs():
    federal_info = fed_conf_host

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    algo_param = {'adjust_value': 0.5}

    def iv_calu(label, data, split_info):
        bin_hist = dict()
        bin_hist['index'] = []
        edges = split_info['split_points']
        for i, value in enumerate(edges):
            if i != 0:
                value_l = split_info['split_points'][i - 1]
                value_r = split_info['split_points'][i]
                func = lambda x: value_l < x <= value_r
            else:
                value_j = split_info['split_points'][0]
                func = lambda x: x <= value_j
            index_f = data[data.apply(func)]
            bin_hist['index'].append(np.array(index_f.index))
        value_e = split_info['split_points'][-1]
        func = lambda x: x > value_e
        index_f = data[data.apply(func)]
        bin_hist['index'].append(np.array(index_f.index))

        good_num = []
        bad_num = []
        for i in bin_hist['index']:
            good_num.append(sum(label[i]))
            bad_num.append(len(i) - sum(label[i]))
        good_num = np.array(good_num)
        bad_num = np.array(bad_num)

        good_all_count = sum(label)
        bad_all_count = len(label) - sum(label)
        iv = 0
        for i, good_num_value in enumerate(good_num):
            if good_num_value == 0 or bad_num[i] == 0:
                calc_value = math.log(
                    (bad_num[i] / bad_all_count + algo_param['adjust_value']) /
                    (good_num_value / good_all_count +
                     algo_param['adjust_value']))
            else:
                calc_value = math.log((bad_num[i] / bad_all_count) /
                                      (good_num_value / good_all_count))
            iv += ((bad_num[i] / bad_all_count) -
                   (good_num_value / good_all_count)) * calc_value
        return iv

    iv_ffs = make_protocol(IV_FFS, federal_info, sec_param, algo_param)
    table = pd.read_csv(os.path.join(os.path.dirname(__file__),
                                     'shap_finance_c.csv'),
                        nrows=300)
    data = pd.Series(table['Occupation'])
    label = pd.Series(table['Label'])
    split_info = {
        'split_points':
        np.array([0.0, 1.5, 3.01, 4.15, 6.02, 7.04, 8.28, 10.1])
    }
    # 联邦计算的iv值
    iv_value = iv_ffs.exchange(feature=data,
                               is_continuous=True,
                               split_info=split_info)
    # 本地计算的iv值
    local_iv_value = iv_calu(label, data, split_info)

    assert iv_value == local_iv_value