예제 #1
0
def get_sample_num(path):
    """
    get encrypted sample nums
    """
    reader = aby3.load_aby3_shares(path, id=role, shape=(party_num, ))
    for n in reader():
        return n
예제 #2
0
def get_mpc_test_dataloader(feature_file, feature_shape, role, batch_size):
    """
    Read feature test data for prediction.

    """
    feature_reader = aby3.load_aby3_shares(feature_file,
                                           id=role,
                                           shape=feature_shape)
    batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True)
    return batch_feature
예제 #3
0
def get_shares(path):
    """
    collect encrypted feature stats from all data owners
    """
    data = []
    for i in range(party_num):
        reader = aby3.load_aby3_shares(
            path + '.' + str(i), id=role, shape=(feat_num, ))
        data.append([x for x in reader()])
    data = np.array(data).reshape([party_num, share_num, feat_num])
    return np.transpose(data, axes=[1, 0, 2])
예제 #4
0
    def test_load_aby3_shares(self):
        share = np.arange(1, 19).reshape((2, 3, 3)).astype(np.int64)
        tmp_part_name = './tmp_test_load_aby3_shares.part0'
        with open(tmp_part_name, 'wb') as f:
            f.write(share.tostring())
        part_name = './tmp_test_load_aby3_shares'
        default_loader = aby3.load_aby3_shares(part_name=part_name,
                                               id=0,
                                               shape=(3, 3))
        default_loading_data = next(default_loader())
        self.assertTrue(np.allclose(default_loading_data, share))

        loader = aby3.load_aby3_shares(part_name=part_name,
                                       id=0,
                                       shape=(2, 3, 3),
                                       append_share_dim=False)
        loading_data = next(loader())
        self.assertTrue(np.allclose(loading_data, share))

        self.remove_temp_file(tmp_part_name)
예제 #5
0
def decrypt_data(filepath, shape):
    """
    load the encrypted data and reconstruct
    """
    part_readers = []
    for id in six.moves.range(3):
        part_readers.append(aby3.load_aby3_shares(filepath, id=id,
                                                  shape=shape))
    aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1],
                                              part_readers[2])

    for instance in aby3_share_reader():
        p = aby3.reconstruct(np.array(instance))
        return p
예제 #6
0
def get_mpc_dataloader(feature_file, label_file, feature_shape, label_shape,
                       feature_name, label_name, role, batch_size):
    """
    Read feature and label training data from files.

    """
    x = fluid.default_main_program().global_block().var(feature_name)
    y = fluid.default_main_program().global_block().var(label_name)
    feature_reader = aby3.load_aby3_shares(feature_file,
                                           id=role,
                                           shape=feature_shape)
    label_reader = aby3.load_aby3_shares(label_file,
                                         id=role,
                                         shape=label_shape)
    batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True)
    batch_label = aby3.batch(label_reader, batch_size, drop_last=True)
    # async data loader
    loader = fluid.io.DataLoader.from_generator(feed_list=[x, y],
                                                capacity=batch_size)
    batch_sample = paddle.reader.compose(batch_feature, batch_label)
    place = fluid.CPUPlace()
    loader.set_batch_generator(batch_sample, places=place)
    return loader
예제 #7
0
def load_decrypt_data(filepath, shape):
    """
    load the encrypted data and reconstruct
    """
    part_readers = []
    for id in six.moves.range(3):
        part_readers.append(aby3.load_aby3_shares(filepath, id=id,
                                                  shape=shape))
    aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1],
                                              part_readers[2])
    epoch_id = 0
    for instance in aby3_share_reader():
        p = aby3.reconstruct(np.array(instance))
        print("Epoch %d, Step 0, Loss: %f " % (epoch_id, p[0]))
        epoch_id += 1
예제 #8
0
def load_decrypt_bs_data(filepath, shape):
    """
    load the encrypted data and reconstruct
    """
    part_readers = []
    for id in six.moves.range(3):
        part_readers.append(aby3.load_aby3_shares(filepath, id=id,
                                                  shape=shape))
    aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1],
                                              part_readers[2])

    for instance in aby3_share_reader():
        p = np.bitwise_xor(np.array(instance[0]), np.array(instance[1]))
        p = np.bitwise_xor(p, np.array(instance[2]))
        logger.info(p)
예제 #9
0
def decrypt_data_to_file(filepath, shape, decrypted_filepath):
    """
    load the encrypted data and reconstruct
    """
    part_readers = []
    for id in six.moves.range(3):
        part_readers.append(aby3.load_aby3_shares(filepath, id=id,
                                                  shape=shape))
    aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1],
                                              part_readers[2])

    for instance in aby3_share_reader():
        p = aby3.reconstruct(np.array(instance))
        with open(decrypted_filepath, 'a+') as f:
            for i in p:
                f.write(str(i) + '\n')
예제 #10
0
def decrypt_data_to_file(cypher_filepath, plaintext_filepath, shape):
    """
    Load the encrypted data and reconstruct.

    """
    part_readers = []
    for id in six.moves.range(3):
        part_readers.append(
            aby3.load_aby3_shares(cypher_filepath, id=id, shape=shape))
    aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1],
                                              part_readers[2])

    for instance in aby3_share_reader():
        p = aby3.reconstruct(np.array(instance))
        tmp = pd.DataFrame(p)
        tmp.to_csv(plaintext_filepath, mode='a', index=False, header=0)
예제 #11
0
def decrypt_bs_data_to_file(filepath, shape, decrypted_filepath):
    """
    load the encrypted data (boolean share) and reconstruct to a file
    """
    if os.path.exists(decrypted_filepath):
        os.remove(decrypted_filepath)
    part_readers = []
    for id in six.moves.range(3):
        part_readers.append(aby3.load_aby3_shares(filepath, id=id,
                                                  shape=shape))
    aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1],
                                              part_readers[2])

    for instance in aby3_share_reader():
        p = np.bitwise_xor(np.array(instance[0]), np.array(instance[1]))
        p = np.bitwise_xor(p, np.array(instance[2]))
        with open(decrypted_filepath, 'a+') as f:
            for i in p:
                f.write(str(i) + '\n')
예제 #12
0
def decrypt_data_to_file(filepath, shape, decrypted_filepath):
    """
    load the encrypted data (arithmetic share) and reconstruct to a file
    """
    #while(not (os.path.exists(filepath + '.part0')
    #           and os.path.exists(filepath + '.part1')
    #           and os.path.exists(filepath + '.part2'))):
    #    time.sleep(0.1)

    if os.path.exists(decrypted_filepath):
        os.remove(decrypted_filepath)
    part_readers = []
    for id in six.moves.range(3):
        part_readers.append(aby3.load_aby3_shares(filepath, id=id,
                                                  shape=shape))
    aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1],
                                              part_readers[2])

    for instance in aby3_share_reader():
        p = aby3.reconstruct(np.array(instance))
        with open(decrypted_filepath, 'a+') as f:
            for i in p:
                f.write(str(i) + '\n')
예제 #13
0
import time

import paddle
import paddle.fluid as fluid
import paddle_fl.mpc as pfl_mpc
import paddle_fl.mpc.data_utils.aby3 as aby3

role, server, port = sys.argv[1], sys.argv[2], sys.argv[3]
pfl_mpc.init("aby3", int(role), "localhost", server, int(port))
role = int(role)

# data preprocessing
BATCH_SIZE = 10

feature_reader = aby3.load_aby3_shares("tmp/house_feature",
                                       id=role,
                                       shape=(13, ))
label_reader = aby3.load_aby3_shares("tmp/house_label", id=role, shape=(1, ))
batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True)
batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)

x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 13], dtype='int64')
y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 1], dtype='int64')

# async data loader
loader = fluid.io.DataLoader.from_generator(feed_list=[x, y],
                                            capacity=BATCH_SIZE)
batch_sample = paddle.reader.compose(batch_feature, batch_label)
place = fluid.CPUPlace()
loader.set_batch_generator(batch_sample, places=place)
예제 #14
0
def train(args):

    # Init MPC
    role = int(args.role)
    pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port))

    # Input and Network
    BATCH_SIZE = args.batch_size
    FIELD_NUM = args.num_field
    FEATURE_NUM = args.sparse_feature_number + 1

    feat_idx = pfl_mpc.data(name='feat_idx',
                            shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM],
                            lod_level=1,
                            dtype="int64")
    feat_value = pfl_mpc.data(name='feat_value',
                              shape=[BATCH_SIZE, FIELD_NUM],
                              lod_level=0,
                              dtype="int64")
    label = pfl_mpc.data(name='label',
                         shape=[BATCH_SIZE, 1],
                         lod_level=1,
                         dtype="int64")
    inputs = [feat_idx] + [feat_value] + [label]

    avg_cost, predict = mpc_network.FM(args, inputs, seed=2)
    infer_program = fluid.default_main_program().clone(for_test=True)
    optimizer = pfl_mpc.optimizer.SGD(args.base_lr)
    optimizer.minimize(avg_cost)

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # Prepare train data
    mpc_data_dir = "./mpc_data/"
    mpc_train_data_dir = mpc_data_dir + 'train/'
    if not os.path.exists(mpc_train_data_dir):
        raise ValueError(
            "{} is not found. Please prepare encrypted data.".format(
                mpc_train_data_dir))
    feature_idx_reader = aby3.load_aby3_shares(mpc_train_data_dir +
                                               "criteo_feature_idx",
                                               id=role,
                                               shape=(FIELD_NUM, FEATURE_NUM))
    feature_value_reader = aby3.load_aby3_shares(mpc_train_data_dir +
                                                 "criteo_feature_value",
                                                 id=role,
                                                 shape=(FIELD_NUM, ))
    label_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_label",
                                         id=role,
                                         shape=(1, ))

    batch_feature_idx = aby3.batch(feature_idx_reader,
                                   BATCH_SIZE,
                                   drop_last=True)
    batch_feature_value = aby3.batch(feature_value_reader,
                                     BATCH_SIZE,
                                     drop_last=True)
    batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)

    loader = fluid.io.DataLoader.from_generator(
        feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE)
    batch_sample = paddle.reader.compose(batch_feature_idx,
                                         batch_feature_value, batch_label)
    loader.set_batch_generator(batch_sample, places=place)

    # Training
    logger.info('******************************************')
    logger.info('Start Training...')
    logger.info('batch_size = {}, learning_rate = {}'.format(
        args.batch_size, args.base_lr))

    mpc_model_basedir = "./mpc_model/"
    start_time = time.time()
    step = 0

    for epoch_id in range(args.epoch_num):
        for sample in loader():
            step += 1
            exe.run(feed=sample, fetch_list=[predict.name])
            batch_end = time.time()
            if step % 100 == 0:
                print('Epoch={}, Step={}, current cost time: {}'.format(
                    epoch_id, step, batch_end - start_time))

        print('Epoch={}, current cost time: {}'.format(epoch_id,
                                                       batch_end - start_time))

        # For each epoch: save infer program
        mpc_model_dir = mpc_model_basedir + "epoch{}/party{}".format(
            epoch_id, role)
        fluid.io.save_inference_model(
            dirname=mpc_model_dir,
            feeded_var_names=["feat_idx", "feat_value", "label"],
            target_vars=[predict],
            executor=exe,
            main_program=infer_program,
            model_filename="__model__")

        logger.info('Model is saved in {}'.format(mpc_model_dir))
    end_time = time.time()
    print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s'.format(
        args.epoch_num, BATCH_SIZE, (end_time - start_time)))
    logger.info('******************************************')
    logger.info('End Training...')
예제 #15
0
def load_model_and_infer(args):

    # Init MPC
    role = int(args.role)
    pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port))

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    # Input
    BATCH_SIZE = args.batch_size
    FIELD_NUM = args.num_field
    FEATURE_NUM = args.sparse_feature_number + 1

    feat_idx = pfl_mpc.data(name='feat_idx',
                            shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM],
                            lod_level=1,
                            dtype="int64")
    feat_value = pfl_mpc.data(name='feat_value',
                              shape=[BATCH_SIZE, FIELD_NUM],
                              lod_level=0,
                              dtype="int64")
    label = pfl_mpc.data(name='label',
                         shape=[BATCH_SIZE, 1],
                         lod_level=1,
                         dtype="int64")

    # Prepare test data
    mpc_data_dir = "./mpc_data/"
    mpc_test_data_dir = mpc_data_dir + 'test/'
    if not os.path.exists(mpc_test_data_dir):
        raise ValueError(
            "{}is not found. Please prepare encrypted data.".format(
                mpc_test_data_dir))
    test_feature_idx_reader = aby3.load_aby3_shares(
        mpc_test_data_dir + "criteo_feature_idx",
        id=role,
        shape=(FIELD_NUM, FEATURE_NUM))
    test_feature_value_reader = aby3.load_aby3_shares(mpc_test_data_dir +
                                                      "criteo_feature_value",
                                                      id=role,
                                                      shape=(FIELD_NUM, ))
    test_label_reader = aby3.load_aby3_shares(mpc_test_data_dir +
                                              "criteo_label",
                                              id=role,
                                              shape=(1, ))

    test_batch_feature_idx = aby3.batch(test_feature_idx_reader,
                                        BATCH_SIZE,
                                        drop_last=True)
    test_batch_feature_value = aby3.batch(test_feature_value_reader,
                                          BATCH_SIZE,
                                          drop_last=True)
    test_batch_label = aby3.batch(test_label_reader,
                                  BATCH_SIZE,
                                  drop_last=True)

    test_loader = fluid.io.DataLoader.from_generator(
        feed_list=[feat_idx, feat_value, label],
        capacity=BATCH_SIZE,
        drop_last=True)
    test_batch_sample = paddle.reader.compose(test_batch_feature_idx,
                                              test_batch_feature_value,
                                              test_batch_label)
    test_loader.set_batch_generator(test_batch_sample, places=place)

    for i in range(args.epoch_num):
        mpc_model_dir = './mpc_model/epoch{}/party{}'.format(i, role)
        mpc_model_filename = '__model__'
        infer(test_loader, role, exe, BATCH_SIZE, mpc_model_dir,
              mpc_model_filename)
예제 #16
0
model = Model()
cost, softmax = model.lenet5()

infer_program = fluid.default_main_program().clone(for_test=False)

avg_loss = pfl_mpc.layers.mean(cost)
optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.1)
optimizer.minimize(avg_loss)

# prepare train and test reader
mpc_data_dir = "./mpc_data/"
if not os.path.exists(mpc_data_dir):
    raise ValueError("mpc_data_dir is not found. Please prepare encrypted data.")

# train_reader
feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_feature", id=role, shape=(1, 28, 28))
label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_label", id=role, shape=(10,))
batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True)
batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)

# test_reader
test_feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_feature", id=role, shape=(1, 28, 28))
test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_label", id=role, shape=(10,))
test_batch_feature = aby3.batch(test_feature_reader, BATCH_SIZE, drop_last=True)
test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True)

place = fluid.CPUPlace()

# async data loader
loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=BATCH_SIZE)
batch_sample = paddle.reader.compose(batch_feature, batch_label)
예제 #17
0
cost = pfl_mpc.layers.sigmoid_cross_entropy_with_logits(y_pre, y)

infer_program = fluid.default_main_program().clone(for_test=False)

avg_loss = pfl_mpc.layers.mean(cost)
optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)

mpc_data_dir = "./mpc_data/"
if not os.path.exists(mpc_data_dir):
    raise ValueError(
        "mpc_data_dir is not found. Please prepare encrypted data.")

# train_reader
feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_feature",
                                       id=role,
                                       shape=(784, ))
label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_label",
                                     id=role,
                                     shape=(1, ))
batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True)
batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)

# test_reader
test_feature_reader = aby3.load_aby3_shares(mpc_data_dir +
                                            "mnist2_test_feature",
                                            id=role,
                                            shape=(784, ))
test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_test_label",
                                          id=role,
                                          shape=(1, ))
예제 #18
0
# network
x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 784], dtype='int64')
y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 1], dtype='int64')

y_pre = pfl_mpc.layers.fc(input=x, size=1)
cost = pfl_mpc.layers.sigmoid_cross_entropy_with_logits(y_pre, y)

infer_program = fluid.default_main_program().clone(for_test=False)

avg_loss = pfl_mpc.layers.mean(cost)
optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)

# train_reader
feature_reader = aby3.load_aby3_shares("/tmp/mnist2_feature",
                                       id=role,
                                       shape=(784, ))
label_reader = aby3.load_aby3_shares("/tmp/mnist2_label", id=role, shape=(1, ))
batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True)
batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)

# test_reader
test_feature_reader = aby3.load_aby3_shares("/tmp/mnist2_test_feature",
                                            id=role,
                                            shape=(784, ))
test_label_reader = aby3.load_aby3_shares("/tmp/mnist2_test_label",
                                          id=role,
                                          shape=(1, ))
test_batch_feature = aby3.batch(test_feature_reader,
                                BATCH_SIZE,
                                drop_last=True)