Exemple #1
0
 def test_update(self) -> None:
     instance_id = self._get_random_id()
     test_update_private_computation_instance = PrivateComputationInstance(
         instance_id=instance_id,
         role=PrivateComputationRole.PUBLISHER,
         instances=[self.test_mpc_instance],
         status=PrivateComputationInstanceStatus.CREATED,
         status_update_ts=1600000000,
         num_files_per_mpc_container=40,
         game_type=PrivateComputationGameType.LIFT,
         input_path="in",
         output_dir="out",
         num_pid_containers=4,
         num_mpc_containers=4,
         concurrency=1,
     )
     # Create a new MPC instance to be added to instances
     self.repo.create(test_update_private_computation_instance)
     test_mpc_instance_new = PCSMPCInstance.create_instance(
         instance_id=instance_id,
         game_name="aggregation",
         mpc_party=MPCParty.SERVER,
         num_workers=1,
     )
     instances_new = [self.test_mpc_instance, test_mpc_instance_new]
     # Update instances
     test_update_private_computation_instance.instances = instances_new
     self.repo.update(test_update_private_computation_instance)
     # Assert instances is updated
     self.assertEqual(self.repo.read(instance_id).instances, instances_new)
     self.repo.delete(instance_id)
Exemple #2
0
 def test_pc_deserialiation(self) -> None:
     # this tests that old fields (and instances) can be deserialized
     with open(LIFT_PC_PATH) as f:
         instance_json = f.read().strip()
     try:
         PrivateComputationInstance.loads_schema(instance_json)
     except Exception as e:
         raise RuntimeError(ERR_MSG) from e
Exemple #3
0
    def _update_instance(
        self, private_computation_instance: PrivateComputationInstance
    ) -> PrivateComputationInstance:
        stage = private_computation_instance.current_stage
        stage_svc = stage.get_stage_service(self.stage_service_args)
        self.logger.info(f"Updating instance | {stage}={stage!r}")
        new_status = stage_svc.get_status(private_computation_instance)
        private_computation_instance.update_status(new_status, self.logger)
        self.instance_repository.update(private_computation_instance)
        self.logger.info(
            f"Finished updating instance: {private_computation_instance.instance_id}"
        )

        return private_computation_instance
    async def test_run_async(self, pid_mr_svc_mock) -> None:

        pc_instance = PrivateComputationInstance(
            instance_id="publisher_123",
            role=PrivateComputationRole.PUBLISHER,
            instances=[],
            status=PrivateComputationInstanceStatus.PID_MR_STARTED,
            status_update_ts=1600000000,
            num_pid_containers=1,
            num_mpc_containers=1,
            num_files_per_mpc_container=1,
            game_type=PrivateComputationGameType.LIFT,
            input_path=
            "https://mpc-aem-exp-platform-input.s3.us-west-2.amazonaws.com/pid_test_data/stress_test/input.csv",
            output_dir=
            "https://mpc-aem-exp-platform-input.s3.us-west-2.amazonaws.com/pid_test/output",
            pid_configs={
                "pid_mr": {
                    "PIDWorkflowConfigs": {
                        "state_machine_arn": "machine_arn"
                    },
                    "PIDRunConfigs": {
                        "conf": "conf1"
                    },
                    "sparkConfigs": {
                        "conf-2": "conf2"
                    },
                }
            },
        )
        flow = PrivateComputationMRStageFlow
        pc_instance._stage_flow_cls_name = flow.get_cls_name()

        service = SfnWorkflowService("us-west-2", "access_key", "access_data")
        service.start_workflow = MagicMock(return_value="execution_arn")
        service.get_workflow_status = MagicMock(
            return_value=WorkflowStatus.COMPLETED)
        stage_svc = PIDMRStageService(service, )
        await stage_svc.run_async(pc_instance)

        self.assertEqual(
            stage_svc.get_status(pc_instance),
            PrivateComputationInstanceStatus.PID_MR_COMPLETED,
        )
        self.assertEqual(
            pc_instance.pid_mr_stage_output_data_path,
            "https://mpc-aem-exp-platform-input.s3.us-west-2.amazonaws.com/pid_test/output/publisher_123_out_dir/pid_mr",
        )
        self.assertEqual(pc_instance.instances[0].instance_id, "execution_arn")
        self.assertIsInstance(pc_instance.instances[0], StageStateInstance)
    async def test_run_async(self, pid_svc_mock) -> None:

        pc_instance = PrivateComputationInstance(
            instance_id="123",
            role=PrivateComputationRole.PUBLISHER,
            instances=[],
            status=PrivateComputationInstanceStatus.CREATED,
            status_update_ts=1600000000,
            num_pid_containers=1,
            num_mpc_containers=1,
            num_files_per_mpc_container=1,
            game_type=PrivateComputationGameType.LIFT,
            input_path="456",
            output_dir="789",
        )

        pid_instance = PIDInstance(
            instance_id="123_id_match0",
            protocol=PIDProtocol.UNION_PID,
            pid_role=PIDRole.PUBLISHER,
            num_shards=2,
            input_path=pc_instance.input_path,
            output_path=pc_instance.pid_stage_output_data_path,
            status=PIDInstanceStatus.STARTED,
        )

        pid_svc_mock.run_instance = AsyncMock(return_value=pid_instance)

        stage_svc = IdMatchStageService(pid_svc_mock, )
        await stage_svc.run_async(pc_instance)
        self.assertIsInstance(pc_instance.instances[0], PIDInstance)
Exemple #6
0
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """Runs the private computation prepare data stage

        Args:
            pc_instance: the private computation instance to run prepare data with
            server_ips: ignored

        Returns:
            An updated version of pc_instance
        """

        output_path = pc_instance.data_processing_output_path
        combine_output_path = output_path + "_combine"

        self._logger.info(f"[{self}] Starting id spine combiner service")

        # TODO: we will write log_cost_to_s3 to the instance, so this function interface
        #   will get simplified
        await start_combiner_service(
            pc_instance,
            self._onedocker_svc,
            self._onedocker_binary_config_map,
            combine_output_path,
            log_cost_to_s3=self._log_cost_to_s3,
            wait_for_containers=True,
        )
        self._logger.info(
            "Finished running CombinerService, starting to reshard")

        # reshard each file into x shards
        #     note we need each file to be sharded into the same # of files
        #     because we want to keep the data of each existing file to run
        #     on the same container
        await start_sharder_service(
            pc_instance,
            self._onedocker_svc,
            self._onedocker_binary_config_map,
            combine_output_path,
            wait_for_containers=True,
        )
        self._logger.info("All sharding coroutines finished")
        # currently, prepare data blocks and runs until completion or failure (exception is thrown)
        # this if statement will let the legacy way of calling prepare data NOT update the status,
        # whereas the new way of calling prepare data can update the status.
        if self._update_status_to_complete:
            pc_instance.status = pc_instance.current_stage.completed_status
        return pc_instance
 def _create_pc_instance(self) -> PrivateComputationInstance:
     return PrivateComputationInstance(
         instance_id="123",
         role=PrivateComputationRole.PUBLISHER,
         instances=[],
         status=PrivateComputationInstanceStatus.UNKNOWN,
         status_update_ts=1600000000,
         num_pid_containers=1,
         num_mpc_containers=1,
         num_files_per_mpc_container=1,
         game_type=PrivateComputationGameType.LIFT,
         input_path="456",
         output_dir="789",
     )
Exemple #8
0
 def create_sample_instance(self) -> PrivateComputationInstance:
     return PrivateComputationInstance(
         instance_id="test_instance_123",
         role=PrivateComputationRole.PARTNER,
         instances=[],
         status=PrivateComputationInstanceStatus.ID_MATCHING_COMPLETED,
         status_update_ts=1600000000,
         num_pid_containers=self.test_num_containers,
         num_mpc_containers=self.test_num_containers,
         num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE,
         game_type=PrivateComputationGameType.LIFT,
         input_path="456",
         output_dir="789",
     )
Exemple #9
0
 def _get_pc_instance(
     self, status: PrivateComputationInstanceStatus
 ) -> PrivateComputationInstance:
     return PrivateComputationInstance(
         instance_id=self.instance_id,
         role=PrivateComputationRole.PARTNER,
         instances=[],
         status=status,
         status_update_ts=1600000000,
         num_pid_containers=self.num_shards,
         num_mpc_containers=self.num_shards,
         num_files_per_mpc_container=40,
         game_type=PrivateComputationGameType.LIFT,
         input_path="fake_input_path",
         output_dir="789",
     )
    def _create_pc_instance(self) -> PrivateComputationInstance:

        return PrivateComputationInstance(
            instance_id="test_instance_123",
            role=PrivateComputationRole.PARTNER,
            instances=[],
            status=PrivateComputationInstanceStatus.ID_MATCHING_COMPLETED,
            attribution_rule=AttributionRule.LAST_CLICK_1D,
            status_update_ts=1600000000,
            num_pid_containers=2,
            num_mpc_containers=2,
            num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE,
            game_type=PrivateComputationGameType.ATTRIBUTION,
            input_path="456",
            output_dir="789",
            padding_size=4,
        )
Exemple #11
0
 def test_create_with_invalid_num_containers(self) -> None:
     instance_id = self._get_random_id()
     with self.assertRaises(ValueError):
         PrivateComputationInstance(
             instance_id=instance_id,
             role=PrivateComputationRole.PUBLISHER,
             instances=[self.test_mpc_instance],
             status=PrivateComputationInstanceStatus.CREATED,
             status_update_ts=1600000000,
             num_files_per_mpc_container=40,
             game_type=PrivateComputationGameType.LIFT,
             input_path="in",
             output_dir="out",
             num_pid_containers=8,
             num_mpc_containers=4,
             concurrency=1,
         )
Exemple #12
0
 def create_sample_pc_instance(
     self, pc_role: PrivateComputationRole
 ) -> PrivateComputationInstance:
     return PrivateComputationInstance(
         instance_id=self.pc_instance_id,
         role=pc_role,
         instances=[],
         status=PrivateComputationInstanceStatus.PID_PREPARE_COMPLETED,
         status_update_ts=1600000000,
         num_pid_containers=self.test_num_containers,
         num_mpc_containers=self.test_num_containers,
         num_files_per_mpc_container=self.test_num_containers,
         game_type=PrivateComputationGameType.LIFT,
         input_path=self.input_path,
         output_dir=self.output_path,
         pid_use_row_numbers=True,
     )
Exemple #13
0
 def test_read(self) -> None:
     instance_id = self._get_random_id()
     test_read_private_computation_instance = PrivateComputationInstance(
         instance_id=instance_id,
         role=PrivateComputationRole.PUBLISHER,
         instances=[self.test_mpc_instance],
         status=PrivateComputationInstanceStatus.CREATED,
         status_update_ts=1600000000,
         num_files_per_mpc_container=40,
         game_type=PrivateComputationGameType.LIFT,
         input_path="in",
         output_dir="out",
         num_pid_containers=4,
         num_mpc_containers=4,
         concurrency=1,
     )
     self.repo.create(test_read_private_computation_instance)
     self.assertEqual(self.repo.read(instance_id),
                      test_read_private_computation_instance)
     self.repo.delete(instance_id)
Exemple #14
0
 def create_sample_pc_instance(
     self,
     pc_role: PrivateComputationRole,
     test_num_containers: int,
     hmac_key: Optional[str],
 ) -> PrivateComputationInstance:
     return PrivateComputationInstance(
         instance_id=self.pc_instance_id,
         role=pc_role,
         instances=[],
         status=PrivateComputationInstanceStatus.PID_SHARD_COMPLETED,
         status_update_ts=1600000000,
         num_pid_containers=test_num_containers,
         num_mpc_containers=test_num_containers,
         num_files_per_mpc_container=test_num_containers,
         game_type=PrivateComputationGameType.LIFT,
         input_path=self.input_path,
         output_dir=self.output_path,
         hmac_key=hmac_key,
     )
Exemple #15
0
    def get_status(
        self,
        pc_instance: PrivateComputationInstance,
    ) -> PrivateComputationInstanceStatus:
        """Updates the PIDInstances and gets latest PrivateComputationInstance status

        Arguments:
            private_computation_instance: The PC instance that is being updated

        Returns:
            The latest status for private_computation_instance
        """
        status = pc_instance.status
        if pc_instance.instances:
            # Only need to update the last stage/instance
            last_instance = pc_instance.instances[-1]
            if not isinstance(last_instance, PIDInstance):
                raise ValueError(f"Expected {last_instance} to be a PIDInstance")

            # PID service has to call update_instance to get the newest containers
            # information in case they are still running
            pc_instance.instances[-1] = self._pid_svc.update_instance(
                last_instance.instance_id
            )
            last_instance = pc_instance.instances[-1]
            assert isinstance(last_instance, PIDInstance)  # appeasing pyre

            pid_current_stage = last_instance.current_stage
            if not pid_current_stage:
                return status
            pid_stage_status = last_instance.stages_status.get(pid_current_stage)

            stage = pc_instance.current_stage
            if pid_stage_status is PIDStageStatus.STARTED:
                status = stage.started_status
            elif pid_stage_status is PIDStageStatus.COMPLETED:
                status = stage.completed_status
            elif pid_stage_status is PIDStageStatus.FAILED:
                status = stage.failed_status

        return status
def gen_dummy_pc_instance() -> PrivateComputationInstance:
    """Creates a dummy private computation instance to be used in unit tests"""
    return PrivateComputationInstance(
        instance_id="pc_instance_id",
        role=PrivateComputationRole.PUBLISHER,
        instances=[
            gen_dummy_pid_instance(),
            gen_dummy_mpc_instance(),
            gen_dummy_post_processing_instance(),
        ],
        status=PrivateComputationInstanceStatus.
        POST_PROCESSING_HANDLERS_COMPLETED,
        status_update_ts=int(time.time()),
        num_files_per_mpc_container=40,
        game_type=PrivateComputationGameType.LIFT,
        input_path=
        "https://bucket.s3.us-west-2.amazonaws.com/lift/partner/partner_e2e_input.csv",
        output_dir="https://bucket.s3.us-west-2.amazonaws.com/lift/partner",
        num_pid_containers=1,
        num_mpc_containers=1,
        attribution_rule=None,
        aggregation_type=None,
        partial_container_retry_enabled=False,
        is_validating=False,
        synthetic_shard_path=None,
        retry_counter=0,
        hmac_key="",
        concurrency=4,
        padding_size=25,
        is_test=False,
        k_anonymity_threshold=100,
        _stage_flow_cls_name="PrivateComputationStageFlow",
        breakdown_key=BreakdownKey.get_default_key(),
        pce_config=PCEConfig(
            subnets=["subnet"],
            cluster="onedocker-cluster-name",
            region="us-west-2",
            onedocker_task_definition=
            "arn:aws:ecs:us-west-2:000000000000:task/cluster-name/subnet",
        ),
    )
Exemple #17
0
def get_updated_pc_status_mpc_game(
    private_computation_instance: PrivateComputationInstance,
    mpc_svc: MPCService,
) -> PrivateComputationInstanceStatus:
    """Updates the MPCInstances and gets latest PrivateComputationInstance status

    Arguments:
        private_computation_instance: The PC instance that is being updated
        mpc_svc: Used to update MPC instances stored on private_computation_instance

    Returns:
        The latest status for private_computation_instance
    """
    status = private_computation_instance.status
    if private_computation_instance.instances:
        # Only need to update the last stage/instance
        last_instance = private_computation_instance.instances[-1]
        if not isinstance(last_instance, MPCInstance):
            return status

        # MPC service has to call update_instance to get the newest containers
        # information in case they are still running
        private_computation_instance.instances[
            -1] = PCSMPCInstance.from_mpc_instance(
                mpc_svc.update_instance(last_instance.instance_id))

        mpc_instance_status = private_computation_instance.instances[-1].status

        current_stage = private_computation_instance.current_stage
        if mpc_instance_status is MPCInstanceStatus.STARTED:
            status = current_stage.started_status
        elif mpc_instance_status is MPCInstanceStatus.COMPLETED:
            status = current_stage.completed_status
        elif mpc_instance_status in (
                MPCInstanceStatus.FAILED,
                MPCInstanceStatus.CANCELED,
        ):
            status = current_stage.failed_status

    return status
 def create_sample_instance(
     self,
     status: PrivateComputationInstanceStatus,
     role: PrivateComputationRole = PrivateComputationRole.PUBLISHER,
     instances: Optional[List[UnionedPCInstance]] = None,
 ) -> PrivateComputationInstance:
     return PrivateComputationInstance(
         instance_id=self.test_private_computation_id,
         role=role,
         instances=instances or [],
         status=status,
         status_update_ts=1600000000,
         num_pid_containers=self.test_num_containers,
         num_mpc_containers=self.test_num_containers,
         concurrency=self.test_concurrency,
         num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE,
         game_type=PrivateComputationGameType.LIFT,
         input_path=self.test_input_path,
         output_dir=self.test_output_dir,
         k_anonymity_threshold=DEFAULT_K_ANONYMITY_THRESHOLD_PL,
         hmac_key=self.test_hmac_key,
     )
    def setUp(self) -> None:
        self._pc_instance = PrivateComputationInstance(
            instance_id="123",
            role=PrivateComputationRole.PARTNER,
            instances=[],
            status=PrivateComputationInstanceStatus.
            INPUT_DATA_VALIDATION_STARTED,
            status_update_ts=1600000000,
            num_pid_containers=1,
            num_mpc_containers=1,
            num_files_per_mpc_container=1,
            game_type=PrivateComputationGameType.LIFT,
            input_path=
            "https://a-test-bucket.s3.us-west-2.amazonaws.com/lift/test/input_data1.csv",
            output_dir="789",
        )

        self.onedocker_binary_config_map = defaultdict(
            lambda: OneDockerBinaryConfig(
                tmp_directory="/test_tmp_directory/",
                binary_version="latest",
                repository_path="test_path/",
            ))
Exemple #20
0
    async def test_update_instance(self, mock_update) -> None:
        # mock pc update_instance to return a pc instance with specific test status and instances
        test_pid_id = self.test_instance_id
        test_pid_role = PIDRole.PUBLISHER
        test_input_path = "pid_in"
        test_output_path = "pid_out"
        # create one PID instance to be put into PrivateComputationInstance
        pid_instance = PIDInstance(
            instance_id=test_pid_id,
            protocol=DEFAULT_PID_PROTOCOL,
            pid_role=test_pid_role,
            num_shards=self.test_num_containers,
            input_path=test_input_path,
            output_path=test_output_path,
            status=PIDInstanceStatus.STARTED,
            server_ips=["10.0.10.242"],
        )
        test_instance = PrivateComputationInstance(
            instance_id=self.test_instance_id,
            role=self.test_role,
            instances=[pid_instance],
            status=PrivateComputationInstanceStatus.CREATED,
            status_update_ts=0,
            num_files_per_mpc_container=NUM_NEW_SHARDS_PER_FILE,
            game_type=self.test_game_type,
            input_path=self.test_input_path,
            output_dir=self.test_output_path,
            num_pid_containers=self.test_num_containers,
            num_mpc_containers=self.test_num_containers,
        )
        mock_update.return_value = test_instance
        return_state = await self.bolt_pcs_client.update_instance(
            instance_id=self.test_instance_id, )
        self.assertEqual(return_state.pc_instance_status,
                         PrivateComputationInstanceStatus.CREATED)

        self.assertEqual(["10.0.10.242"], return_state.server_ips)
    def get_status(
        self,
        pc_instance: PrivateComputationInstance,
    ) -> PrivateComputationInstanceStatus:
        """Updates the PIDInstances and gets latest PrivateComputationInstance status

        Arguments:
            private_computation_instance: The PC instance that is being updated

        Returns:
            The latest status for private_computation_instance
        """
        status = pc_instance.status
        if pc_instance.instances:
            # Only need to update the last stage/instance
            last_instance = pc_instance.instances[-1]
            if not isinstance(last_instance, PIDInstance):
                return status

            # PID service has to call update_instance to get the newest containers
            # information in case they are still running
            pc_instance.instances[-1] = self._pid_svc.update_instance(
                last_instance.instance_id
            )

            pid_instance_status = pc_instance.instances[-1].status

            stage = pc_instance.current_stage
            if pid_instance_status is PIDInstanceStatus.STARTED:
                status = stage.started_status
            elif pid_instance_status is PIDInstanceStatus.COMPLETED:
                status = stage.completed_status
            elif pid_instance_status is PIDInstanceStatus.FAILED:
                status = stage.failed_status

        return status
Exemple #22
0
    def create_instance(
        self,
        instance_id: str,
        role: PrivateComputationRole,
        game_type: PrivateComputationGameType,
        input_path: str,
        output_dir: str,
        num_pid_containers: int,
        num_mpc_containers: int,
        concurrency: Optional[int] = None,
        attribution_rule: Optional[AttributionRule] = None,
        aggregation_type: Optional[AggregationType] = None,
        num_files_per_mpc_container: Optional[int] = None,
        is_validating: Optional[bool] = False,
        synthetic_shard_path: Optional[str] = None,
        breakdown_key: Optional[BreakdownKey] = None,
        pce_config: Optional[PCEConfig] = None,
        is_test: Optional[bool] = False,
        hmac_key: Optional[str] = None,
        padding_size: Optional[int] = None,
        k_anonymity_threshold: Optional[int] = None,
        stage_flow_cls: Optional[Type[PrivateComputationBaseStageFlow]] = None,
        result_visibility: Optional[ResultVisibility] = None,
        tier: Optional[str] = None,
        pid_use_row_numbers: bool = True,
        post_processing_data_optional: Optional[PostProcessingData] = None,
        pid_configs: Optional[Dict[str, Any]] = None,
    ) -> PrivateComputationInstance:
        self.logger.info(f"Creating instance: {instance_id}")

        # For Private Attribution daily recurrent runs, we would need dataset_timestamp of data used for computation.
        # Assigning a default value of day before the computation for dataset_timestamp.
        yesterday_date = datetime.now(tz=timezone.utc) - timedelta(days=1)
        yesterday_timestamp = datetime.timestamp(yesterday_date)

        post_processing_data = post_processing_data_optional or PostProcessingData(
            dataset_timestamp=int(yesterday_timestamp))

        instance = PrivateComputationInstance(
            instance_id=instance_id,
            role=role,
            instances=[],
            status=PrivateComputationInstanceStatus.CREATED,
            status_update_ts=PrivateComputationService.get_ts_now(),
            num_files_per_mpc_container=unwrap_or_default(
                optional=num_files_per_mpc_container,
                default=NUM_NEW_SHARDS_PER_FILE),
            game_type=game_type,
            is_validating=is_validating,
            synthetic_shard_path=synthetic_shard_path,
            num_pid_containers=num_pid_containers,
            num_mpc_containers=self._get_number_of_mpc_containers(
                game_type, num_pid_containers, num_mpc_containers),
            attribution_rule=attribution_rule,
            aggregation_type=aggregation_type,
            input_path=input_path,
            output_dir=output_dir,
            breakdown_key=breakdown_key,
            pce_config=pce_config,
            is_test=is_test,
            hmac_key=unwrap_or_default(optional=hmac_key,
                                       default=DEFAULT_HMAC_KEY),
            padding_size=unwrap_or_default(
                optional=padding_size,
                default=LIFT_DEFAULT_PADDING_SIZE
                if game_type is PrivateComputationGameType.LIFT else
                ATTRIBUTION_DEFAULT_PADDING_SIZE,
            ),
            concurrency=concurrency or DEFAULT_CONCURRENCY,
            k_anonymity_threshold=unwrap_or_default(
                optional=k_anonymity_threshold,
                default=DEFAULT_K_ANONYMITY_THRESHOLD_PA
                if game_type is PrivateComputationGameType.ATTRIBUTION else
                DEFAULT_K_ANONYMITY_THRESHOLD_PL,
            ),
            _stage_flow_cls_name=unwrap_or_default(
                optional=stage_flow_cls,
                default=PrivateComputationPCF2StageFlow
                if game_type is PrivateComputationGameType.ATTRIBUTION else
                PrivateComputationStageFlow,
            ).get_cls_name(),
            result_visibility=result_visibility or ResultVisibility.PUBLIC,
            tier=tier,
            pid_use_row_numbers=pid_use_row_numbers,
            post_processing_data=post_processing_data,
            pid_configs=pid_configs,
        )

        self.instance_repository.create(instance)
        return instance
Exemple #23
0
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """Runs a pid service stage, e.g. pid shard, pid prepare, pid run

        This function creates a pid instance if necessary, stores it on the caller provided pc_instance, and
        runs PIDService for a given stage.

        Args:
            pc_instance: the private computation instance to run ID match with
            server_ips: only used by the partner role. These are the ip addresses of the publisher's containers.

        Returns:
            An updated version of pc_instance that stores a PIDInstance
        """

        # if this in the shard stage (first pid stage), then create the pid instance
        if (
            self._publisher_stage is UnionPIDStage.PUBLISHER_SHARD
            and self._partner_stage is UnionPIDStage.ADV_SHARD
        ):
            # increment the retry counter (starts at 0 for first attempt)
            pid_instance_id = (
                f"{pc_instance.instance_id}_id_match{pc_instance.retry_counter}"
            )
            pid_instance = self._pid_svc.create_instance(
                instance_id=pid_instance_id,
                pid_role=self._map_private_computation_role_to_pid_role(
                    pc_instance.role
                ),
                num_shards=pc_instance.num_pid_containers,
                input_path=pc_instance.input_path,
                output_path=pc_instance.pid_stage_output_base_path,
                is_validating=self._is_validating or pc_instance.is_validating,
                synthetic_shard_path=self._synthetic_shard_path
                or pc_instance.synthetic_shard_path,
                hmac_key=pc_instance.hmac_key,
                pid_use_row_numbers=pc_instance.pid_use_row_numbers,
            )
        else:
            # If there no previous instance, then we should run shard first
            if not pc_instance.instances:
                raise RuntimeError(
                    f"Cannot run PID stages {self._publisher_stage}, {self._partner_stage}. Run PID shard first."
                )
            pid_instance = pc_instance.instances[-1]
            # if the last instance is not a pid instance, then we are out of order
            if not isinstance(pid_instance, PIDInstance):
                raise ValueError(
                    f"Cannot run PID stages {self._publisher_stage}, {self._partner_stage}. Last instance is not a PIDInstance."
                )

        # Run pid
        pid_instance = await self._pid_svc.run_stage_or_next(
            instance_id=pid_instance.instance_id,
            server_ips=server_ips,
            pid_union_stage=self._publisher_stage
            if pc_instance.role is PrivateComputationRole.PUBLISHER
            else self._partner_stage,
            wait_for_containers=False,
            container_timeout=self._container_timeout,
        )

        if not pc_instance.instances or not isinstance(
            pc_instance.instances[-1], PIDInstance
        ):
            # Push PID instance to PrivateComputationInstance.instances
            pc_instance.instances.append(pid_instance)
        else:
            # replace the outdated pid instance with the updated one
            pc_instance.instances[-1] = pid_instance

        return pc_instance
 def read(self, instance_id: str) -> PrivateComputationInstance:
     return PrivateComputationInstance.loads_schema(
         self.repo.read(instance_id))
    async def run_async(
        self,
        pc_instance: PrivateComputationInstance,
        server_ips: Optional[List[str]] = None,
    ) -> PrivateComputationInstance:
        """Runs the private computation post processing handlers stage

        Post processing handlers are designed to run after final results are available. You can write
        post processing handlers to download results from cloud storage, send you an email, etc.

        Args:
            pc_instance: the private computation instance to run post processing handlers with
            server_ips: only used by the partner role. These are the ip addresses of the publisher's containers.

        Returns:
            An updated version of pc_instance that stores a post processing instance
        """

        post_processing_handlers_statuses = None
        if pc_instance.instances:
            last_instance = pc_instance.instances[-1]
            if (
                isinstance(last_instance, PostProcessingInstance)
                and last_instance.handler_statuses.keys()
                == self._post_processing_handlers.keys()
            ):
                self._logger.info("Copying statuses from last instance")
                post_processing_handlers_statuses = (
                    last_instance.handler_statuses.copy()
                )

        post_processing_instance = PostProcessingInstance.create_instance(
            instance_id=pc_instance.instance_id
            + "_post_processing"
            + str(pc_instance.retry_counter),
            handlers=self._post_processing_handlers,
            handler_statuses=post_processing_handlers_statuses,
            status=PostProcessingInstanceStatus.STARTED,
        )

        pc_instance.instances.append(post_processing_instance)

        # if any handlers fail, then the post_processing_instance status will be
        # set to failed, as will the pc_instance status
        await asyncio.gather(
            *[
                self._run_post_processing_handler(
                    pc_instance,
                    post_processing_instance,
                    name,
                    handler,
                )
                for name, handler in self._post_processing_handlers.items()
                if post_processing_instance.handler_statuses[name]
                != PostProcessingHandlerStatus.COMPLETED
            ]
        )

        # if any of the handlers failed, then the status of the post processing instance would have
        # been set to failed. If none of them failed, then that means all of the handlers completed, so
        # we can set the status to completed.
        if post_processing_instance.status is not PostProcessingInstanceStatus.FAILED:
            post_processing_instance.status = PostProcessingInstanceStatus.COMPLETED
            pc_instance.update_status(
                pc_instance.current_stage.completed_status, self._logger
            )
        return pc_instance