def get_sample_num(path): """ get encrypted sample nums """ reader = aby3.load_aby3_shares(path, id=role, shape=(party_num, )) for n in reader(): return n
def get_mpc_test_dataloader(feature_file, feature_shape, role, batch_size): """ Read feature test data for prediction. """ feature_reader = aby3.load_aby3_shares(feature_file, id=role, shape=feature_shape) batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True) return batch_feature
def get_shares(path): """ collect encrypted feature stats from all data owners """ data = [] for i in range(party_num): reader = aby3.load_aby3_shares( path + '.' + str(i), id=role, shape=(feat_num, )) data.append([x for x in reader()]) data = np.array(data).reshape([party_num, share_num, feat_num]) return np.transpose(data, axes=[1, 0, 2])
def test_load_aby3_shares(self): share = np.arange(1, 19).reshape((2, 3, 3)).astype(np.int64) tmp_part_name = './tmp_test_load_aby3_shares.part0' with open(tmp_part_name, 'wb') as f: f.write(share.tostring()) part_name = './tmp_test_load_aby3_shares' default_loader = aby3.load_aby3_shares(part_name=part_name, id=0, shape=(3, 3)) default_loading_data = next(default_loader()) self.assertTrue(np.allclose(default_loading_data, share)) loader = aby3.load_aby3_shares(part_name=part_name, id=0, shape=(2, 3, 3), append_share_dim=False) loading_data = next(loader()) self.assertTrue(np.allclose(loading_data, share)) self.remove_temp_file(tmp_part_name)
def decrypt_data(filepath, shape): """ load the encrypted data and reconstruct """ part_readers = [] for id in six.moves.range(3): part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape)) aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) for instance in aby3_share_reader(): p = aby3.reconstruct(np.array(instance)) return p
def get_mpc_dataloader(feature_file, label_file, feature_shape, label_shape, feature_name, label_name, role, batch_size): """ Read feature and label training data from files. """ x = fluid.default_main_program().global_block().var(feature_name) y = fluid.default_main_program().global_block().var(label_name) feature_reader = aby3.load_aby3_shares(feature_file, id=role, shape=feature_shape) label_reader = aby3.load_aby3_shares(label_file, id=role, shape=label_shape) batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True) batch_label = aby3.batch(label_reader, batch_size, drop_last=True) # async data loader loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=batch_size) batch_sample = paddle.reader.compose(batch_feature, batch_label) place = fluid.CPUPlace() loader.set_batch_generator(batch_sample, places=place) return loader
def load_decrypt_data(filepath, shape): """ load the encrypted data and reconstruct """ part_readers = [] for id in six.moves.range(3): part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape)) aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) epoch_id = 0 for instance in aby3_share_reader(): p = aby3.reconstruct(np.array(instance)) print("Epoch %d, Step 0, Loss: %f " % (epoch_id, p[0])) epoch_id += 1
def load_decrypt_bs_data(filepath, shape): """ load the encrypted data and reconstruct """ part_readers = [] for id in six.moves.range(3): part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape)) aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) for instance in aby3_share_reader(): p = np.bitwise_xor(np.array(instance[0]), np.array(instance[1])) p = np.bitwise_xor(p, np.array(instance[2])) logger.info(p)
def decrypt_data_to_file(filepath, shape, decrypted_filepath): """ load the encrypted data and reconstruct """ part_readers = [] for id in six.moves.range(3): part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape)) aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) for instance in aby3_share_reader(): p = aby3.reconstruct(np.array(instance)) with open(decrypted_filepath, 'a+') as f: for i in p: f.write(str(i) + '\n')
def decrypt_data_to_file(cypher_filepath, plaintext_filepath, shape): """ Load the encrypted data and reconstruct. """ part_readers = [] for id in six.moves.range(3): part_readers.append( aby3.load_aby3_shares(cypher_filepath, id=id, shape=shape)) aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) for instance in aby3_share_reader(): p = aby3.reconstruct(np.array(instance)) tmp = pd.DataFrame(p) tmp.to_csv(plaintext_filepath, mode='a', index=False, header=0)
def decrypt_bs_data_to_file(filepath, shape, decrypted_filepath): """ load the encrypted data (boolean share) and reconstruct to a file """ if os.path.exists(decrypted_filepath): os.remove(decrypted_filepath) part_readers = [] for id in six.moves.range(3): part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape)) aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) for instance in aby3_share_reader(): p = np.bitwise_xor(np.array(instance[0]), np.array(instance[1])) p = np.bitwise_xor(p, np.array(instance[2])) with open(decrypted_filepath, 'a+') as f: for i in p: f.write(str(i) + '\n')
def decrypt_data_to_file(filepath, shape, decrypted_filepath): """ load the encrypted data (arithmetic share) and reconstruct to a file """ #while(not (os.path.exists(filepath + '.part0') # and os.path.exists(filepath + '.part1') # and os.path.exists(filepath + '.part2'))): # time.sleep(0.1) if os.path.exists(decrypted_filepath): os.remove(decrypted_filepath) part_readers = [] for id in six.moves.range(3): part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape)) aby3_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) for instance in aby3_share_reader(): p = aby3.reconstruct(np.array(instance)) with open(decrypted_filepath, 'a+') as f: for i in p: f.write(str(i) + '\n')
import time import paddle import paddle.fluid as fluid import paddle_fl.mpc as pfl_mpc import paddle_fl.mpc.data_utils.aby3 as aby3 role, server, port = sys.argv[1], sys.argv[2], sys.argv[3] pfl_mpc.init("aby3", int(role), "localhost", server, int(port)) role = int(role) # data preprocessing BATCH_SIZE = 10 feature_reader = aby3.load_aby3_shares("tmp/house_feature", id=role, shape=(13, )) label_reader = aby3.load_aby3_shares("tmp/house_label", id=role, shape=(1, )) batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True) batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 13], dtype='int64') y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 1], dtype='int64') # async data loader loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=BATCH_SIZE) batch_sample = paddle.reader.compose(batch_feature, batch_label) place = fluid.CPUPlace() loader.set_batch_generator(batch_sample, places=place)
def train(args): # Init MPC role = int(args.role) pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port)) # Input and Network BATCH_SIZE = args.batch_size FIELD_NUM = args.num_field FEATURE_NUM = args.sparse_feature_number + 1 feat_idx = pfl_mpc.data(name='feat_idx', shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM], lod_level=1, dtype="int64") feat_value = pfl_mpc.data(name='feat_value', shape=[BATCH_SIZE, FIELD_NUM], lod_level=0, dtype="int64") label = pfl_mpc.data(name='label', shape=[BATCH_SIZE, 1], lod_level=1, dtype="int64") inputs = [feat_idx] + [feat_value] + [label] avg_cost, predict = mpc_network.FM(args, inputs, seed=2) infer_program = fluid.default_main_program().clone(for_test=True) optimizer = pfl_mpc.optimizer.SGD(args.base_lr) optimizer.minimize(avg_cost) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # Prepare train data mpc_data_dir = "./mpc_data/" mpc_train_data_dir = mpc_data_dir + 'train/' if not os.path.exists(mpc_train_data_dir): raise ValueError( "{} is not found. Please prepare encrypted data.".format( mpc_train_data_dir)) feature_idx_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_feature_idx", id=role, shape=(FIELD_NUM, FEATURE_NUM)) feature_value_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_feature_value", id=role, shape=(FIELD_NUM, )) label_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_label", id=role, shape=(1, )) batch_feature_idx = aby3.batch(feature_idx_reader, BATCH_SIZE, drop_last=True) batch_feature_value = aby3.batch(feature_value_reader, BATCH_SIZE, drop_last=True) batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) loader = fluid.io.DataLoader.from_generator( feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE) batch_sample = paddle.reader.compose(batch_feature_idx, batch_feature_value, batch_label) loader.set_batch_generator(batch_sample, places=place) # Training logger.info('******************************************') logger.info('Start Training...') logger.info('batch_size = {}, learning_rate = {}'.format( args.batch_size, args.base_lr)) mpc_model_basedir = "./mpc_model/" start_time = time.time() step = 0 for epoch_id in range(args.epoch_num): for sample in loader(): step += 1 exe.run(feed=sample, fetch_list=[predict.name]) batch_end = time.time() if step % 100 == 0: print('Epoch={}, Step={}, current cost time: {}'.format( epoch_id, step, batch_end - start_time)) print('Epoch={}, current cost time: {}'.format(epoch_id, batch_end - start_time)) # For each epoch: save infer program mpc_model_dir = mpc_model_basedir + "epoch{}/party{}".format( epoch_id, role) fluid.io.save_inference_model( dirname=mpc_model_dir, feeded_var_names=["feat_idx", "feat_value", "label"], target_vars=[predict], executor=exe, main_program=infer_program, model_filename="__model__") logger.info('Model is saved in {}'.format(mpc_model_dir)) end_time = time.time() print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s'.format( args.epoch_num, BATCH_SIZE, (end_time - start_time))) logger.info('******************************************') logger.info('End Training...')
def load_model_and_infer(args): # Init MPC role = int(args.role) pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port)) place = fluid.CPUPlace() exe = fluid.Executor(place) # Input BATCH_SIZE = args.batch_size FIELD_NUM = args.num_field FEATURE_NUM = args.sparse_feature_number + 1 feat_idx = pfl_mpc.data(name='feat_idx', shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM], lod_level=1, dtype="int64") feat_value = pfl_mpc.data(name='feat_value', shape=[BATCH_SIZE, FIELD_NUM], lod_level=0, dtype="int64") label = pfl_mpc.data(name='label', shape=[BATCH_SIZE, 1], lod_level=1, dtype="int64") # Prepare test data mpc_data_dir = "./mpc_data/" mpc_test_data_dir = mpc_data_dir + 'test/' if not os.path.exists(mpc_test_data_dir): raise ValueError( "{}is not found. Please prepare encrypted data.".format( mpc_test_data_dir)) test_feature_idx_reader = aby3.load_aby3_shares( mpc_test_data_dir + "criteo_feature_idx", id=role, shape=(FIELD_NUM, FEATURE_NUM)) test_feature_value_reader = aby3.load_aby3_shares(mpc_test_data_dir + "criteo_feature_value", id=role, shape=(FIELD_NUM, )) test_label_reader = aby3.load_aby3_shares(mpc_test_data_dir + "criteo_label", id=role, shape=(1, )) test_batch_feature_idx = aby3.batch(test_feature_idx_reader, BATCH_SIZE, drop_last=True) test_batch_feature_value = aby3.batch(test_feature_value_reader, BATCH_SIZE, drop_last=True) test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True) test_loader = fluid.io.DataLoader.from_generator( feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE, drop_last=True) test_batch_sample = paddle.reader.compose(test_batch_feature_idx, test_batch_feature_value, test_batch_label) test_loader.set_batch_generator(test_batch_sample, places=place) for i in range(args.epoch_num): mpc_model_dir = './mpc_model/epoch{}/party{}'.format(i, role) mpc_model_filename = '__model__' infer(test_loader, role, exe, BATCH_SIZE, mpc_model_dir, mpc_model_filename)
model = Model() cost, softmax = model.lenet5() infer_program = fluid.default_main_program().clone(for_test=False) avg_loss = pfl_mpc.layers.mean(cost) optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.1) optimizer.minimize(avg_loss) # prepare train and test reader mpc_data_dir = "./mpc_data/" if not os.path.exists(mpc_data_dir): raise ValueError("mpc_data_dir is not found. Please prepare encrypted data.") # train_reader feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_feature", id=role, shape=(1, 28, 28)) label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_label", id=role, shape=(10,)) batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True) batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) # test_reader test_feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_feature", id=role, shape=(1, 28, 28)) test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_label", id=role, shape=(10,)) test_batch_feature = aby3.batch(test_feature_reader, BATCH_SIZE, drop_last=True) test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True) place = fluid.CPUPlace() # async data loader loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=BATCH_SIZE) batch_sample = paddle.reader.compose(batch_feature, batch_label)
cost = pfl_mpc.layers.sigmoid_cross_entropy_with_logits(y_pre, y) infer_program = fluid.default_main_program().clone(for_test=False) avg_loss = pfl_mpc.layers.mean(cost) optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.001) optimizer.minimize(avg_loss) mpc_data_dir = "./mpc_data/" if not os.path.exists(mpc_data_dir): raise ValueError( "mpc_data_dir is not found. Please prepare encrypted data.") # train_reader feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_feature", id=role, shape=(784, )) label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_label", id=role, shape=(1, )) batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True) batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) # test_reader test_feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_test_feature", id=role, shape=(784, )) test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_test_label", id=role, shape=(1, ))
# network x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 784], dtype='int64') y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 1], dtype='int64') y_pre = pfl_mpc.layers.fc(input=x, size=1) cost = pfl_mpc.layers.sigmoid_cross_entropy_with_logits(y_pre, y) infer_program = fluid.default_main_program().clone(for_test=False) avg_loss = pfl_mpc.layers.mean(cost) optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.001) optimizer.minimize(avg_loss) # train_reader feature_reader = aby3.load_aby3_shares("/tmp/mnist2_feature", id=role, shape=(784, )) label_reader = aby3.load_aby3_shares("/tmp/mnist2_label", id=role, shape=(1, )) batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True) batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) # test_reader test_feature_reader = aby3.load_aby3_shares("/tmp/mnist2_test_feature", id=role, shape=(784, )) test_label_reader = aby3.load_aby3_shares("/tmp/mnist2_test_label", id=role, shape=(1, )) test_batch_feature = aby3.batch(test_feature_reader, BATCH_SIZE, drop_last=True)