Esempio n. 1
0
 def _preprocess_rsa_psi_follower(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_public_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     for partition_id in range(
             self._data_source_f.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='follower-rsa-psi-processor',
             role=common_pb.FLRole.Follower,
             rsa_key_pem=rsa_key_pem,
             input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_f,
             raw_data_publish_dir=self._raw_data_pub_dir_f,
             partition_id=partition_id,
             leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr,
             offload_processor_number=1,
             max_flying_sign_batch=128,
             max_flying_sign_rpc=64,
             sign_rpc_timeout_ms=100000,
             stub_fanout=2,
             slow_sign_threshold=8,
             sort_run_merger_read_ahead_buffer=1 << 20,
             rpc_sync_mode=True if partition_id % 2 == 0 else False,
             rpc_thread_pool_size=16,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_f, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Esempio n. 2
0
 def _preprocess_rsa_psi_leader(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_private_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     for partition_id in range(
             self._data_source_l.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='leader-rsa-psi-processor',
             role=common_pb.FLRole.Leader,
             rsa_key_pem=rsa_key_pem,
             input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_l,
             raw_data_publish_dir=self._raw_data_pub_dir_l,
             partition_id=partition_id,
             offload_processor_number=1,
             max_flying_sign_batch=128,
             stub_fanout=2,
             slow_sign_threshold=8,
             sort_run_merger_read_ahead_buffer=1 << 20,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_l, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Esempio n. 3
0
 def _preprocess_rsa_psi_follower(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_public_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     for partition_id in range(
             self._data_source_f.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='follower-rsa-psi-processor',
             role=common_pb.FLRole.Follower,
             rsa_key_pem=rsa_key_pem,
             input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_f,
             raw_data_publish_dir=self._raw_data_pub_dir_f,
             partition_id=partition_id,
             leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr,
             offload_processor_number=1,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_f, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Esempio n. 4
0
 def _preprocess_rsa_psi_follower(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_public_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     self._follower_rsa_psi_sub_dir = 'follower_rsa_psi_sub_dir'
     rd_publisher = raw_data_publisher.RawDataPublisher(
         self._kvstore_f, self._follower_rsa_psi_sub_dir)
     for partition_id in range(
             self._data_source_f.data_source_meta.partition_num):
         rd_publisher.publish_raw_data(
             partition_id, [self._psi_raw_data_fpaths_f[partition_id]])
         rd_publisher.finish_raw_data(partition_id)
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='follower-rsa-psi-processor',
             role=common_pb.FLRole.Follower,
             rsa_key_pem=rsa_key_pem,
             input_file_subscribe_dir=self._follower_rsa_psi_sub_dir,
             output_file_dir=self._pre_processor_ouput_dir_f,
             raw_data_publish_dir=self._raw_data_pub_dir_f,
             partition_id=partition_id,
             leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr,
             offload_processor_number=1,
             max_flying_sign_batch=128,
             max_flying_sign_rpc=64,
             sign_rpc_timeout_ms=100000,
             stub_fanout=2,
             slow_sign_threshold=8,
             sort_run_merger_read_ahead_buffer=1 << 20,
             sort_run_merger_read_batch_size=128,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14),
             input_raw_data=dj_pb.RawDataOptions(raw_data_iter='TF_RECORD',
                                                 read_ahead_size=1 << 20),
             writer_options=dj_pb.WriterOptions(output_writer='CSV_DICT'))
         os.environ['ETCD_BASE_DIR'] = self.follower_base_dir
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self.kvstore_type, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Esempio n. 5
0
 def _preprocess_rsa_psi_leader(self):
     processors = []
     for partition_id in range(
             self._data_source_l.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             role=common_pb.FLRole.Leader,
             rsa_key_file_path=self._rsa_private_key_path,
             input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_l,
             raw_data_publish_dir=self._raw_data_pub_dir_l,
             partition_id=partition_id,
             offload_processor_number=1,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_l, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()