Example #1
0
 def _preprocess_rsa_psi_leader(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_private_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     for partition_id in range(
             self._data_source_l.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='leader-rsa-psi-processor',
             role=common_pb.FLRole.Leader,
             rsa_key_pem=rsa_key_pem,
             input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_l,
             raw_data_publish_dir=self._raw_data_pub_dir_l,
             partition_id=partition_id,
             offload_processor_number=1,
             max_flying_sign_batch=128,
             stub_fanout=2,
             slow_sign_threshold=8,
             sort_run_merger_read_ahead_buffer=1 << 20,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_l, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Example #2
0
 def _preprocess_rsa_psi_follower(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_public_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     for partition_id in range(
             self._data_source_f.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='follower-rsa-psi-processor',
             role=common_pb.FLRole.Follower,
             rsa_key_pem=rsa_key_pem,
             input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_f,
             raw_data_publish_dir=self._raw_data_pub_dir_f,
             partition_id=partition_id,
             leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr,
             offload_processor_number=1,
             max_flying_sign_batch=128,
             max_flying_sign_rpc=64,
             sign_rpc_timeout_ms=100000,
             stub_fanout=2,
             slow_sign_threshold=8,
             sort_run_merger_read_ahead_buffer=1 << 20,
             rpc_sync_mode=True if partition_id % 2 == 0 else False,
             rpc_thread_pool_size=16,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_f, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Example #3
0
 def _preprocess_rsa_psi_follower(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_public_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     for partition_id in range(
             self._data_source_f.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='follower-rsa-psi-processor',
             role=common_pb.FLRole.Follower,
             rsa_key_pem=rsa_key_pem,
             input_file_paths=[self._psi_raw_data_fpaths_f[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_f,
             raw_data_publish_dir=self._raw_data_pub_dir_f,
             partition_id=partition_id,
             leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr,
             offload_processor_number=1,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_f, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Example #4
0
 def _preprocess_rsa_psi_follower(self):
     processors = []
     rsa_key_pem = None
     with gfile.GFile(self._rsa_public_key_path, 'rb') as f:
         rsa_key_pem = f.read()
     self._follower_rsa_psi_sub_dir = 'follower_rsa_psi_sub_dir'
     rd_publisher = raw_data_publisher.RawDataPublisher(
         self._kvstore_f, self._follower_rsa_psi_sub_dir)
     for partition_id in range(
             self._data_source_f.data_source_meta.partition_num):
         rd_publisher.publish_raw_data(
             partition_id, [self._psi_raw_data_fpaths_f[partition_id]])
         rd_publisher.finish_raw_data(partition_id)
         options = dj_pb.RsaPsiPreProcessorOptions(
             preprocessor_name='follower-rsa-psi-processor',
             role=common_pb.FLRole.Follower,
             rsa_key_pem=rsa_key_pem,
             input_file_subscribe_dir=self._follower_rsa_psi_sub_dir,
             output_file_dir=self._pre_processor_ouput_dir_f,
             raw_data_publish_dir=self._raw_data_pub_dir_f,
             partition_id=partition_id,
             leader_rsa_psi_signer_addr=self._rsa_psi_signer_addr,
             offload_processor_number=1,
             max_flying_sign_batch=128,
             max_flying_sign_rpc=64,
             sign_rpc_timeout_ms=100000,
             stub_fanout=2,
             slow_sign_threshold=8,
             sort_run_merger_read_ahead_buffer=1 << 20,
             sort_run_merger_read_batch_size=128,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14),
             input_raw_data=dj_pb.RawDataOptions(raw_data_iter='TF_RECORD',
                                                 read_ahead_size=1 << 20),
             writer_options=dj_pb.WriterOptions(output_writer='CSV_DICT'))
         os.environ['ETCD_BASE_DIR'] = self.follower_base_dir
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self.kvstore_type, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Example #5
0
 def _preprocess_rsa_psi_leader(self):
     processors = []
     for partition_id in range(
             self._data_source_l.data_source_meta.partition_num):
         options = dj_pb.RsaPsiPreProcessorOptions(
             role=common_pb.FLRole.Leader,
             rsa_key_file_path=self._rsa_private_key_path,
             input_file_paths=[self._psi_raw_data_fpaths_l[partition_id]],
             output_file_dir=self._pre_processor_ouput_dir_l,
             raw_data_publish_dir=self._raw_data_pub_dir_l,
             partition_id=partition_id,
             offload_processor_number=1,
             batch_processor_options=dj_pb.BatchProcessorOptions(
                 batch_size=1024, max_flying_item=1 << 14))
         processor = rsa_psi_preprocessor.RsaPsiPreProcessor(
             options, self._etcd_name, self._etcd_addrs,
             self._etcd_base_dir_l, True)
         processor.start_process()
         processors.append(processor)
     for processor in processors:
         processor.wait_for_finished()
Example #6
0
 preprocessor_options = dj_pb.RsaPsiPreProcessorOptions(
         preprocessor_name=args.preprocessor_name,
         rsa_key_pem=rsa_key_pem,
         input_file_paths=list(set(all_fpaths)),
         input_file_subscribe_dir=args.input_file_subscribe_dir,
         output_file_dir=args.output_file_dir,
         raw_data_publish_dir=args.raw_data_publish_dir,
         partition_id=args.partition_id,
         leader_rsa_psi_signer_addr=args.leader_rsa_psi_signer_addr,
         offload_processor_number=offload_processor_number,
         max_flying_sign_batch=args.max_flying_sign_batch,
         max_flying_sign_rpc=args.max_flying_sign_rpc,
         sign_rpc_timeout_ms=args.sign_rpc_timeout_ms,
         stub_fanout=args.stub_fanout,
         slow_sign_threshold=args.slow_sign_threshold,
         sort_run_merger_read_ahead_buffer=\
             args.sort_run_merger_read_ahead_buffer,
         sort_run_merger_read_batch_size=\
             args.sort_run_merger_read_batch_size,
         batch_processor_options=dj_pb.BatchProcessorOptions(
             batch_size=args.process_batch_size,
             max_flying_item=-1
         ),
         input_raw_data=dj_pb.RawDataOptions(
             raw_data_iter=args.raw_data_iter,
             compressed_type=args.compressed_type,
             read_ahead_size=args.read_ahead_size,
             read_batch_size=args.read_batch_size
         ),
         writer_options=dj_pb.WriterOptions(
             output_writer=args.output_builder,
             compressed_type=args.builder_compressed_type,
         )
     )
        for fp in args.input_file_paths:
            all_fpaths.append(fp)
    if args.input_dir is not None:
        all_fpaths += [
            os.path.join(args.input_dir, f)
            for f in gfile.ListDirectory(args.input_dir)
        ]
    if len(all_fpaths) == 0:
        raise RuntimeError("no input files for preprocessor")
    preprocessor_options = dj_pb.RsaPsiPreProcessorOptions(
            role=common_pb.FLRole.Leader if args.psi_role == 'leader' \
                                         else common_pb.FLRole.Follower,
            rsa_key_file_path=args.rsa_key_file_path,
            input_file_paths=list(set(all_fpaths)),
            output_file_dir=args.output_file_dir,
            raw_data_publish_dir=args.raw_data_publish_dir,
            partition_id=args.partition_id,
            leader_rsa_psi_signer_addr=args.leader_rsa_psi_signer_addr,
            offload_processor_number=args.offload_processor_number,
            batch_processor_options=dj_pb.BatchProcessorOptions(
                batch_size=args.process_batch_size,
                max_flying_item=args.max_flying_item
            )
        )
    preprocessor = RsaPsiPreProcessor(preprocessor_options, args.etcd_name,
                                      args.etcd_addrs, args.etcd_base_dir)
    preprocessor.start_process()
    logging.info("PreProcessor launched for %s of RSA PSI", args.psi_role)
    preprocessor.wait_for_finished()
    logging.info("PreProcessor finished for %s of RSA PSI", args.psi_role)
Example #8
0
     assert args.rsa_key_path is not None
     with gfile.GFile(args.rsa_key_path, 'rb') as f:
         rsa_key_pem = f.read()
 preprocessor_options = dj_pb.RsaPsiPreProcessorOptions(
         preprocessor_name=args.preprocessor_name,
         role=common_pb.FLRole.Leader if args.psi_role == 'leader' \
                                      else common_pb.FLRole.Follower,
         rsa_key_pem=rsa_key_pem,
         input_file_paths=list(set(all_fpaths)),
         input_file_subscribe_dir=args.input_file_subscribe_dir,
         output_file_dir=args.output_file_dir,
         raw_data_publish_dir=args.raw_data_publish_dir,
         partition_id=args.partition_id,
         leader_rsa_psi_signer_addr=args.leader_rsa_psi_signer_addr,
         offload_processor_number=args.offload_processor_number,
         max_flying_sign_batch=args.max_flying_sign_batch,
         max_flying_sign_rpc=args.max_flying_sign_rpc,
         sign_rpc_timeout_ms=args.sign_rpc_timeout_ms,
         stub_fanout=args.stub_fanout,
         slow_sign_threshold=args.slow_sign_threshold,
         rpc_sync_mode=args.rpc_sync_mode,
         rpc_thread_pool_size=args.rpc_thread_pool_size,
         sort_run_merger_read_ahead_buffer=\
             args.sort_run_merger_read_ahead_buffer,
         batch_processor_options=dj_pb.BatchProcessorOptions(
             batch_size=args.process_batch_size,
             max_flying_item=args.max_flying_item
         )
     )
 preprocessor = RsaPsiPreProcessor(preprocessor_options, args.etcd_name,
                                   args.etcd_addrs, args.etcd_base_dir)
 preprocessor.start_process()