Example #1
0
    def test_batch(self):
        default_batch_reader = aby3.batch(reader=self.dummy_reader,
                                          batch_size=3)
        default_batch_sample_shapes = [(2, 3, 2, 2), (2, 1, 2, 2)]
        for item, shape in zip(default_batch_reader(),
                               default_batch_sample_shapes):
            self.assertEqual(item.shape, shape)

        batch_reader = aby3.batch(reader=self.dummy_reader,
                                  batch_size=3,
                                  drop_last=True)
        for item in batch_reader():
            self.assertEqual(item.shape, (2, 3, 2, 2))
Example #2
0
def get_mpc_test_dataloader(feature_file, feature_shape, role, batch_size):
    """
    Read feature test data for prediction.

    """
    feature_reader = aby3.load_aby3_shares(feature_file,
                                           id=role,
                                           shape=feature_shape)
    batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True)
    return batch_feature
Example #3
0
def get_mpc_dataloader(feature_file, label_file, feature_shape, label_shape,
                       feature_name, label_name, role, batch_size):
    """
    Read feature and label training data from files.

    """
    x = fluid.default_main_program().global_block().var(feature_name)
    y = fluid.default_main_program().global_block().var(label_name)
    feature_reader = aby3.load_aby3_shares(feature_file,
                                           id=role,
                                           shape=feature_shape)
    label_reader = aby3.load_aby3_shares(label_file,
                                         id=role,
                                         shape=label_shape)
    batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True)
    batch_label = aby3.batch(label_reader, batch_size, drop_last=True)
    # async data loader
    loader = fluid.io.DataLoader.from_generator(feed_list=[x, y],
                                                capacity=batch_size)
    batch_sample = paddle.reader.compose(batch_feature, batch_label)
    place = fluid.CPUPlace()
    loader.set_batch_generator(batch_sample, places=place)
    return loader
Example #4
0
optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)

mpc_data_dir = "./mpc_data/"
if not os.path.exists(mpc_data_dir):
    raise ValueError(
        "mpc_data_dir is not found. Please prepare encrypted data.")

# train_reader
feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_feature",
                                       id=role,
                                       shape=(784, ))
label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_label",
                                     id=role,
                                     shape=(1, ))
batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True)
batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)

# test_reader
test_feature_reader = aby3.load_aby3_shares(mpc_data_dir +
                                            "mnist2_test_feature",
                                            id=role,
                                            shape=(784, ))
test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_test_label",
                                          id=role,
                                          shape=(1, ))
test_batch_feature = aby3.batch(test_feature_reader,
                                BATCH_SIZE,
                                drop_last=True)
test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True)
Example #5
0
def train(args):

    # Init MPC
    role = int(args.role)
    pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port))

    # Input and Network
    BATCH_SIZE = args.batch_size
    FIELD_NUM = args.num_field
    FEATURE_NUM = args.sparse_feature_number + 1

    feat_idx = pfl_mpc.data(name='feat_idx',
                            shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM],
                            lod_level=1,
                            dtype="int64")
    feat_value = pfl_mpc.data(name='feat_value',
                              shape=[BATCH_SIZE, FIELD_NUM],
                              lod_level=0,
                              dtype="int64")
    label = pfl_mpc.data(name='label',
                         shape=[BATCH_SIZE, 1],
                         lod_level=1,
                         dtype="int64")
    inputs = [feat_idx] + [feat_value] + [label]

    avg_cost, predict = mpc_network.FM(args, inputs, seed=2)
    infer_program = fluid.default_main_program().clone(for_test=True)
    optimizer = pfl_mpc.optimizer.SGD(args.base_lr)
    optimizer.minimize(avg_cost)

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # Prepare train data
    mpc_data_dir = "./mpc_data/"
    mpc_train_data_dir = mpc_data_dir + 'train/'
    if not os.path.exists(mpc_train_data_dir):
        raise ValueError(
            "{} is not found. Please prepare encrypted data.".format(
                mpc_train_data_dir))
    feature_idx_reader = aby3.load_aby3_shares(mpc_train_data_dir +
                                               "criteo_feature_idx",
                                               id=role,
                                               shape=(FIELD_NUM, FEATURE_NUM))
    feature_value_reader = aby3.load_aby3_shares(mpc_train_data_dir +
                                                 "criteo_feature_value",
                                                 id=role,
                                                 shape=(FIELD_NUM, ))
    label_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_label",
                                         id=role,
                                         shape=(1, ))

    batch_feature_idx = aby3.batch(feature_idx_reader,
                                   BATCH_SIZE,
                                   drop_last=True)
    batch_feature_value = aby3.batch(feature_value_reader,
                                     BATCH_SIZE,
                                     drop_last=True)
    batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True)

    loader = fluid.io.DataLoader.from_generator(
        feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE)
    batch_sample = paddle.reader.compose(batch_feature_idx,
                                         batch_feature_value, batch_label)
    loader.set_batch_generator(batch_sample, places=place)

    # Training
    logger.info('******************************************')
    logger.info('Start Training...')
    logger.info('batch_size = {}, learning_rate = {}'.format(
        args.batch_size, args.base_lr))

    mpc_model_basedir = "./mpc_model/"
    start_time = time.time()
    step = 0

    for epoch_id in range(args.epoch_num):
        for sample in loader():
            step += 1
            exe.run(feed=sample, fetch_list=[predict.name])
            batch_end = time.time()
            if step % 100 == 0:
                print('Epoch={}, Step={}, current cost time: {}'.format(
                    epoch_id, step, batch_end - start_time))

        print('Epoch={}, current cost time: {}'.format(epoch_id,
                                                       batch_end - start_time))

        # For each epoch: save infer program
        mpc_model_dir = mpc_model_basedir + "epoch{}/party{}".format(
            epoch_id, role)
        fluid.io.save_inference_model(
            dirname=mpc_model_dir,
            feeded_var_names=["feat_idx", "feat_value", "label"],
            target_vars=[predict],
            executor=exe,
            main_program=infer_program,
            model_filename="__model__")

        logger.info('Model is saved in {}'.format(mpc_model_dir))
    end_time = time.time()
    print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s'.format(
        args.epoch_num, BATCH_SIZE, (end_time - start_time)))
    logger.info('******************************************')
    logger.info('End Training...')
def load_model_and_infer(args):

    # Init MPC
    role = int(args.role)
    pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port))

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    # Input
    BATCH_SIZE = args.batch_size
    FIELD_NUM = args.num_field
    FEATURE_NUM = args.sparse_feature_number + 1

    feat_idx = pfl_mpc.data(name='feat_idx',
                            shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM],
                            lod_level=1,
                            dtype="int64")
    feat_value = pfl_mpc.data(name='feat_value',
                              shape=[BATCH_SIZE, FIELD_NUM],
                              lod_level=0,
                              dtype="int64")
    label = pfl_mpc.data(name='label',
                         shape=[BATCH_SIZE, 1],
                         lod_level=1,
                         dtype="int64")

    # Prepare test data
    mpc_data_dir = "./mpc_data/"
    mpc_test_data_dir = mpc_data_dir + 'test/'
    if not os.path.exists(mpc_test_data_dir):
        raise ValueError(
            "{}is not found. Please prepare encrypted data.".format(
                mpc_test_data_dir))
    test_feature_idx_reader = aby3.load_aby3_shares(
        mpc_test_data_dir + "criteo_feature_idx",
        id=role,
        shape=(FIELD_NUM, FEATURE_NUM))
    test_feature_value_reader = aby3.load_aby3_shares(mpc_test_data_dir +
                                                      "criteo_feature_value",
                                                      id=role,
                                                      shape=(FIELD_NUM, ))
    test_label_reader = aby3.load_aby3_shares(mpc_test_data_dir +
                                              "criteo_label",
                                              id=role,
                                              shape=(1, ))

    test_batch_feature_idx = aby3.batch(test_feature_idx_reader,
                                        BATCH_SIZE,
                                        drop_last=True)
    test_batch_feature_value = aby3.batch(test_feature_value_reader,
                                          BATCH_SIZE,
                                          drop_last=True)
    test_batch_label = aby3.batch(test_label_reader,
                                  BATCH_SIZE,
                                  drop_last=True)

    test_loader = fluid.io.DataLoader.from_generator(
        feed_list=[feat_idx, feat_value, label],
        capacity=BATCH_SIZE,
        drop_last=True)
    test_batch_sample = paddle.reader.compose(test_batch_feature_idx,
                                              test_batch_feature_value,
                                              test_batch_label)
    test_loader.set_batch_generator(test_batch_sample, places=place)

    for i in range(args.epoch_num):
        mpc_model_dir = './mpc_model/epoch{}/party{}'.format(i, role)
        mpc_model_filename = '__model__'
        infer(test_loader, role, exe, BATCH_SIZE, mpc_model_dir,
              mpc_model_filename)