def _preprocess_rsa_psi_follower(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_public_key_path, 'rb') as f: rsa_key_pem = f.read() for partition_id in range( self._data_source_f.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='follower-rsa-psi-processor', role=common_pb.FLRole.Follower, rsa_key_pem=rsa_key_pem, input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]], output_file_dir=self._pre_processor_ouput_dir_f, raw_data_publish_dir=self._raw_data_pub_dir_f, partition_id=partition_id, leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr, offload_processor_number=1, max_flying_sign_batch=128, max_flying_sign_rpc=64, sign_rpc_timeout_ms=100000, stub_fanout=2, slow_sign_threshold=8, sort_run_merger_read_ahead_buffer=1 << 20, rpc_sync_mode=True if partition_id % 2 == 0 else False, rpc_thread_pool_size=16, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_f, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_leader(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_private_key_path, 'rb') as f: rsa_key_pem = f.read() for partition_id in range( self._data_source_l.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='leader-rsa-psi-processor', role=common_pb.FLRole.Leader, rsa_key_pem=rsa_key_pem, input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]], output_file_dir=self._pre_processor_ouput_dir_l, raw_data_publish_dir=self._raw_data_pub_dir_l, partition_id=partition_id, offload_processor_number=1, max_flying_sign_batch=128, stub_fanout=2, slow_sign_threshold=8, sort_run_merger_read_ahead_buffer=1 << 20, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_l, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_follower(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_public_key_path, 'rb') as f: rsa_key_pem = f.read() for partition_id in range( self._data_source_f.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='follower-rsa-psi-processor', role=common_pb.FLRole.Follower, rsa_key_pem=rsa_key_pem, input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]], output_file_dir=self._pre_processor_ouput_dir_f, raw_data_publish_dir=self._raw_data_pub_dir_f, partition_id=partition_id, leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr, offload_processor_number=1, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_f, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_follower(self): processors = [] rsa_key_pem = None with gfile.GFile(self._rsa_public_key_path, 'rb') as f: rsa_key_pem = f.read() self._follower_rsa_psi_sub_dir = 'follower_rsa_psi_sub_dir' rd_publisher = raw_data_publisher.RawDataPublisher( self._kvstore_f, self._follower_rsa_psi_sub_dir) for partition_id in range( self._data_source_f.data_source_meta.partition_num): rd_publisher.publish_raw_data( partition_id, [self._psi_raw_data_fpaths_f[partition_id]]) rd_publisher.finish_raw_data(partition_id) options = dj_pb.RsaPsiPreProcessorOptions( preprocessor_name='follower-rsa-psi-processor', role=common_pb.FLRole.Follower, rsa_key_pem=rsa_key_pem, input_file_subscribe_dir=self._follower_rsa_psi_sub_dir, output_file_dir=self._pre_processor_ouput_dir_f, raw_data_publish_dir=self._raw_data_pub_dir_f, partition_id=partition_id, leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr, offload_processor_number=1, max_flying_sign_batch=128, max_flying_sign_rpc=64, sign_rpc_timeout_ms=100000, stub_fanout=2, slow_sign_threshold=8, sort_run_merger_read_ahead_buffer=1 << 20, sort_run_merger_read_batch_size=128, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14), input_raw_data=dj_pb.RawDataOptions(raw_data_iter='TF_RECORD', read_ahead_size=1 << 20), writer_options=dj_pb.WriterOptions(output_writer='CSV_DICT')) os.environ['ETCD_BASE_DIR'] = self.follower_base_dir processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self.kvstore_type, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()
def _preprocess_rsa_psi_leader(self): processors = [] for partition_id in range( self._data_source_l.data_source_meta.partition_num): options = dj_pb.RsaPsiPreProcessorOptions( role=common_pb.FLRole.Leader, rsa_key_file_path=self._rsa_private_key_path, input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]], output_file_dir=self._pre_processor_ouput_dir_l, raw_data_publish_dir=self._raw_data_pub_dir_l, partition_id=partition_id, offload_processor_number=1, batch_processor_options=dj_pb.BatchProcessorOptions( batch_size=1024, max_flying_item=1 << 14)) processor = rsa_psi_preprocessor.RsaPsiPreProcessor( options, self._etcd_name, self._etcd_addrs, self._etcd_base_dir_l, True) processor.start_process() processors.append(processor) for processor in processors: processor.wait_for_finished()