def _preprocess_rsa_psi_leader(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_private_key_path, 'rb') as f: rsa_key_pem = f.read() for partition_id in range( self._data_source_l.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='leader-rsa-psi-processor', role=common_pb.FLRole.Leader, rsa_key_pem=rsa_key_pem, input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]], output_file_dir=self._pre_processor_ouput_dir_l, raw_data_publish_dir=self._raw_data_pub_dir_l, partition_id=partition_id, offload_processor_number=1, max_flying_sign_batch=128, stub_fanout=2, slow_sign_threshold=8, sort_run_merger_read_ahead_buffer=1 << 20, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_l, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_follower(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_public_key_path, 'rb') as f: rsa_key_pem = f.read() for partition_id in range( self._data_source_f.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='follower-rsa-psi-processor', role=common_pb.FLRole.Follower, rsa_key_pem=rsa_key_pem, input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]], output_file_dir=self._pre_processor_ouput_dir_f, raw_data_publish_dir=self._raw_data_pub_dir_f, partition_id=partition_id, leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr, offload_processor_number=1, max_flying_sign_batch=128, max_flying_sign_rpc=64, sign_rpc_timeout_ms=100000, stub_fanout=2, slow_sign_threshold=8, sort_run_merger_read_ahead_buffer=1 << 20, rpc_sync_mode=True if partition_id % 2 == 0 else False, rpc_thread_pool_size=16, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_f, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_follower(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_public_key_path, 'rb') as f: rsa_key_pem = f.read() for partition_id in range( self._data_source_f.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='follower-rsa-psi-processor', role=common_pb.FLRole.Follower, rsa_key_pem=rsa_key_pem, input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]], output_file_dir=self._pre_processor_ouput_dir_f, raw_data_publish_dir=self._raw_data_pub_dir_f, partition_id=partition_id, leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr, offload_processor_number=1, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_f, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_follower(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_public_key_path, 'rb') as f: rsa_key_pem = f.read() self._follower_rsa_psi_sub_dir = 'follower_rsa_psi_sub_dir' rd_publisher = raw_data_publisher.RawDataPublisher( self._kvstore_f, self._follower_rsa_psi_sub_dir) for partition_id in range( self._data_source_f.data_source_meta.partition_num): rd_publisher.publish_raw_data( partition_id, [self._psi_raw_data_fpaths_f[partition_id]]) rd_publisher.finish_raw_data(partition_id) options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='follower-rsa-psi-processor', role=common_pb.FLRole.Follower, rsa_key_pem=rsa_key_pem, input_file_subscribe_dir=self._follower_rsa_psi_sub_dir, output_file_dir=self._pre_processor_ouput_dir_f, raw_data_publish_dir=self._raw_data_pub_dir_f, partition_id=partition_id, leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr, offload_processor_number=1, max_flying_sign_batch=128, max_flying_sign_rpc=64, sign_rpc_timeout_ms=100000, stub_fanout=2, slow_sign_threshold=8, sort_run_merger_read_ahead_buffer=1 << 20, sort_run_merger_read_batch_size=128, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14), input_raw_data=dj_pb.RawDataOptions(raw_data_iter='TF_RECORD', read_ahead_size=1 << 20), writer_options=dj_pb.WriterOptions(output_writer='CSV_DICT')) os.environ['ETCD_BASE_DIR'] = self.follower_base_dir processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self.kvstore_type, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_leader(self): processors = [] for partition_id in range( self._data_source_l.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( role=common_pb.FLRole.Leader, rsa_key_file_path=self._rsa_private_key_path, input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]], output_file_dir=self._pre_processor_ouput_dir_l, raw_data_publish_dir=self._raw_data_pub_dir_l, partition_id=partition_id, offload_processor_number=1, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_l, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
preprocessor_options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name=args.preprocessor_name, rsa_key_pem=rsa_key_pem, input_file_paths=list(set(all_fpaths)), input_file_subscribe_dir=args.input_file_subscribe_dir, output_file_dir=args.output_file_dir, raw_data_publish_dir=args.raw_data_publish_dir, partition_id=args.partition_id, leader_rsa_psi_signer_addr=args.leader_rsa_psi_signer_addr, offload_processor_number=offload_processor_number, max_flying_sign_batch=args.max_flying_sign_batch, max_flying_sign_rpc=args.max_flying_sign_rpc, sign_rpc_timeout_ms=args.sign_rpc_timeout_ms, stub_fanout=args.stub_fanout, slow_sign_threshold=args.slow_sign_threshold, sort_run_merger_read_ahead_buffer=\ args.sort_run_merger_read_ahead_buffer, sort_run_merger_read_batch_size=\ args.sort_run_merger_read_batch_size, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=args.process_batch_size, max_flying_item=-1 ), input_raw_data=dj_pb.RawDataOptions( raw_data_iter=args.raw_data_iter, compressed_type=args.compressed_type, read_ahead_size=args.read_ahead_size, read_batch_size=args.read_batch_size ), writer_options=dj_pb.WriterOptions( output_writer=args.output_builder, compressed_type=args.builder_compressed_type, ) )
for fp in args.input_file_paths: all_fpaths.append(fp) if args.input_dir is not None: all_fpaths += [ os.path.join(args.input_dir, f) for f in gfile.ListDirectory(args.input_dir) ] if len(all_fpaths) == 0: raise RuntimeError("no input files for preprocessor") preprocessor_options = dj_pb.RsaPsiPreProcessorOptions( role=common_pb.FLRole.Leader if args.psi_role == 'leader' \ else common_pb.FLRole.Follower, rsa_key_file_path=args.rsa_key_file_path, input_file_paths=list(set(all_fpaths)), output_file_dir=args.output_file_dir, raw_data_publish_dir=args.raw_data_publish_dir, partition_id=args.partition_id, leader_rsa_psi_signer_addr=args.leader_rsa_psi_signer_addr, offload_processor_number=args.offload_processor_number, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=args.process_batch_size, max_flying_item=args.max_flying_item ) ) preprocessor = RsaPsiPreProcessor(preprocessor_options, args.etcd_name, args.etcd_addrs, args.etcd_base_dir) preprocessor.start_process() logging.info("PreProcessor launched for %s of RSA PSI", args.psi_role) preprocessor.wait_for_finished() logging.info("PreProcessor finished for %s of RSA PSI", args.psi_role)
assert args.rsa_key_path is not None with gfile.GFile(args.rsa_key_path, 'rb') as f: rsa_key_pem = f.read() preprocessor_options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name=args.preprocessor_name, role=common_pb.FLRole.Leader if args.psi_role == 'leader' \ else common_pb.FLRole.Follower, rsa_key_pem=rsa_key_pem, input_file_paths=list(set(all_fpaths)), input_file_subscribe_dir=args.input_file_subscribe_dir, output_file_dir=args.output_file_dir, raw_data_publish_dir=args.raw_data_publish_dir, partition_id=args.partition_id, leader_rsa_psi_signer_addr=args.leader_rsa_psi_signer_addr, offload_processor_number=args.offload_processor_number, max_flying_sign_batch=args.max_flying_sign_batch, max_flying_sign_rpc=args.max_flying_sign_rpc, sign_rpc_timeout_ms=args.sign_rpc_timeout_ms, stub_fanout=args.stub_fanout, slow_sign_threshold=args.slow_sign_threshold, rpc_sync_mode=args.rpc_sync_mode, rpc_thread_pool_size=args.rpc_thread_pool_size, sort_run_merger_read_ahead_buffer=\ args.sort_run_merger_read_ahead_buffer, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=args.process_batch_size, max_flying_item=args.max_flying_item ) ) preprocessor = RsaPsiPreProcessor(preprocessor_options, args.etcd_name, args.etcd_addrs, args.etcd_base_dir) preprocessor.start_process()