예제 #1
0
    def test_select_partitions_calls_select_partitions_with_params(
            self, mock_select_partitions):
        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol = pipeline | 'Create produce' >> beam.Create(
                [1, 2, 3, 4, 5, 6])
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            select_partitions_params = \
                aggregate_params.SelectPartitionsParams(
                    max_partitions_contributed=2,
                    budget_weight=0.5)
            partition_extractor = lambda x: f"pk:{x // 10}"

            # Act
            transformer = private_beam.SelectPartitions(
                select_partitions_params=select_partitions_params,
                partition_extractor=partition_extractor,
                label="Test select partitions")
            private_collection | transformer

            # Assert
            self.assertEqual(transformer._budget_accountant, budget_accountant)
            mock_select_partitions.assert_called_once()

            args = mock_select_partitions.call_args[0]
            self.assertEqual(args[1], select_partitions_params)
예제 #2
0
    def test_map_returns_correct_results_and_accountant(self):
        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol_input = [(1, 2), (2, 3), (3, 4), (4, 5)]
            pcol = pipeline | 'Create produce' >> beam.Create(pcol_input)
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            # Act
            transformed = private_collection | private_beam.Map(
                fn=lambda x: x[1]**2)

            # Assert
            self.assertIsInstance(transformed, private_beam.PrivatePCollection)
            beam_util.assert_that(
                transformed._pcol,
                beam_util.equal_to(
                    map(
                        lambda x:
                        (PrivateBeamTest.privacy_id_extractor(x), x[1]**2),
                        pcol_input)))
            self.assertEqual(transformed._budget_accountant, budget_accountant)
예제 #3
0
    def test_flatmap_returns_correct_results_and_accountant(self):
        def flat_map_fn(x):
            return [(x[0], x[1] + i) for i in range(2)]

        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol_input = [(1, 2), (2, 3), (3, 4)]
            pcol = pipeline | 'Create produce' >> beam.Create(pcol_input)
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            # Act
            transformed = private_collection | private_beam.FlatMap(
                flat_map_fn)

            # Assert
            self.assertIsInstance(transformed, private_beam.PrivatePCollection)
            beam_util.assert_that(
                transformed._pcol,
                beam_util.equal_to([('pid:(1, 2)', (1, 2)),
                                    ('pid:(1, 2)', (1, 3)),
                                    ('pid:(2, 3)', (2, 3)),
                                    ('pid:(2, 3)', (2, 4)),
                                    ('pid:(3, 4)', (3, 4)),
                                    ('pid:(3, 4)', (3, 5))]))
            self.assertEqual(transformed._budget_accountant, budget_accountant)
예제 #4
0
 def create_pipeline(self):
   # Must be GRPC so we can send data and split requests concurrent
   # to the bundle process request.
   return beam.Pipeline(
       runner=fn_api_runner.FnApiRunner(
           default_environment=beam_runner_api_pb2.Environment(
               urn=python_urns.EMBEDDED_PYTHON_GRPC)))
예제 #5
0
파일: base_executor.py 프로젝트: hyunmu/tfx
    def _make_beam_pipeline(self) -> beam_Pipeline:  # pytype: disable=invalid-annotation
        """Makes beam pipeline."""
        if not beam:
            raise Exception(
                'Apache Beam must be installed to use this functionality.')
        # pylint: disable=g-import-not-at-top
        from apache_beam.options.pipeline_options import DirectOptions
        from apache_beam.options.pipeline_options import PipelineOptions
        from apache_beam.options.pipeline_options import StandardOptions
        from apache_beam.runners.portability import fn_api_runner
        # pylint: enable=g-import-not-at-top
        pipeline_options = PipelineOptions(self._beam_pipeline_args)
        if pipeline_options.view_as(StandardOptions).runner:
            return beam.Pipeline(argv=self._beam_pipeline_args)

        # TODO(b/159468583): move this warning to Beam.
        direct_running_mode = pipeline_options.view_as(
            DirectOptions).direct_running_mode
        direct_num_workers = pipeline_options.view_as(
            DirectOptions).direct_num_workers
        if direct_running_mode == 'in_memory' and direct_num_workers != 1:
            absl.logging.warning(
                'If direct_num_workers is not equal to 1, direct_running_mode should '
                'be `multi_processing` or `multi_threading` instead of `in_memory` '
                'in order for it to have the desired worker parallelism effect.'
            )

        return beam.Pipeline(options=pipeline_options,
                             runner=fn_api_runner.FnApiRunner())
예제 #6
0
    def _make_beam_pipeline(self) -> beam.Pipeline:
        """Makes beam pipeline."""
        # TODO(b/142684737): refactor when beam support multi-processing by args.
        pipeline_options = PipelineOptions(self._beam_pipeline_args)
        parallelism = pipeline_options.view_as(
            DirectOptions).direct_num_workers

        if parallelism == 0:
            try:
                parallelism = multiprocessing.cpu_count()
            except NotImplementedError as e:
                absl.logging.warning('Cannot get cpu count: %s' % e)
                parallelism = 1
            pipeline_options.view_as(
                DirectOptions).direct_num_workers = parallelism

        absl.logging.info('Using %d process(es) for Beam pipeline execution.' %
                          parallelism)

        if parallelism > 1:
            if beam_runner_api_pb2:
                env = beam_runner_api_pb2.Environment(
                    urn=python_urns.SUBPROCESS_SDK,
                    payload=b'%s -m apache_beam.runners.worker.sdk_worker_main'
                    % (sys.executable or sys.argv[0]).encode('ascii'))
            else:
                env = environments.SubprocessSDKEnvironment(
                    command_string=
                    '%s -m apache_beam.runners.worker.sdk_worker_main' %
                    (sys.executable or sys.argv[0]))
            return beam.Pipeline(
                options=pipeline_options,
                runner=fn_api_runner.FnApiRunner(default_environment=env))

        return beam.Pipeline(argv=self._beam_pipeline_args)
예제 #7
0
  def benchmarkAnalyzeAndTransformDataset(self):
    """Benchmark AnalyzeAndTransformDataset.

    Runs AnalyzeAndTransformDataset in a Beam pipeline. Records the wall time
    taken for the whole pipeline.
    """
    common_variables = _get_common_variables(self._dataset)

    pipeline = beam.Pipeline(runner=fn_api_runner.FnApiRunner())
    _ = pipeline | _AnalyzeAndTransformDataset(
        self._dataset, common_variables.tf_metadata_schema,
        common_variables.preprocessing_fn,
        common_variables.transform_input_dataset_metadata)
    start = time.time()
    result = pipeline.run()
    result.wait_until_finish()
    end = time.time()
    delta = end - start

    self.report_benchmark(
        name=benchmark_utils.with_dataset_prefix(
            "benchmarkAnalyzeAndTransformDataset", FLAGS.dataset),
        iters=1,
        wall_time=delta,
        extras={"num_examples": self._dataset.num_examples()})
예제 #8
0
  def _make_beam_pipeline(self) -> beam.Pipeline:
    """Makes beam pipeline."""
    pipeline_options = PipelineOptions(self._beam_pipeline_args)
    if pipeline_options.view_as(StandardOptions).runner:
      return beam.Pipeline(argv=self._beam_pipeline_args)

    return beam.Pipeline(
        options=pipeline_options, runner=fn_api_runner.FnApiRunner())
예제 #9
0
def compute_on_beam():
    runner = fn_api_runner.FnApiRunner()  # local runner
    with beam.Pipeline(runner=runner) as pipeline:
        movie_views = pipeline | beam.io.ReadFromText(
            FLAGS.input_file) | beam.ParDo(ParseFile())
        pipeline_backend = pipeline_dp.BeamBackend()
        dp_result = calculate_private_result(movie_views, pipeline_backend)
        dp_result | beam.io.WriteToText(FLAGS.output_file)
예제 #10
0
def compute_on_beam():
    runner = fn_api_runner.FnApiRunner()  # local runner
    public_partitions = get_public_partitions()
    with beam.Pipeline(runner=runner) as pipeline:
        movie_views = pipeline | beam.io.ReadFromText(FLAGS.input_file) | beam.ParDo(
            ParseFile())
        pipeline_operations = pipeline_dp.BeamOperations()
        dp_result = calc_dp_rating_metrics(movie_views, pipeline_operations, public_partitions)
        dp_result | beam.io.WriteToText(FLAGS.output_file)
예제 #11
0
 def run(self):
     with JobLogHandler(self._log_queue):
         try:
             fn_api_runner.FnApiRunner(
                 use_grpc=self._use_grpc,
                 sdk_harness_factory=self._sdk_harness_factory
             ).run_via_runner_api(self._pipeline_proto)
             self.state = beam_job_api_pb2.JobState.DONE
         except:  # pylint: disable=bare-except
             logging.exception("Error running pipeline.")
             traceback.print_exc()
             self.state = beam_job_api_pb2.JobState.FAILED
예제 #12
0
 def test_dofn_lifecycle(self):
     from apache_beam.runners.direct import direct_runner
     from apache_beam.runners.portability import fn_api_runner
     runners = [
         direct_runner.BundleBasedDirectRunner(),
         fn_api_runner.FnApiRunner()
     ]
     for r in runners:
         with TestPipeline(runner=r) as p:
             _ = (p
                  | 'Start' >> beam.Create([1, 2, 3])
                  | 'Do' >> beam.ParDo(CallSequenceEnforcingDoFn()))
예제 #13
0
 def run(self):
     with JobLogHandler(self._log_queues):
         try:
             fn_api_runner.FnApiRunner().run_via_runner_api(
                 self._pipeline_proto)
             logging.info('Successfully completed job.')
             self.state = beam_job_api_pb2.JobState.DONE
         except:  # pylint: disable=bare-except
             logging.exception('Error running pipeline.')
             logging.exception(traceback)
             self.state = beam_job_api_pb2.JobState.FAILED
             raise
예제 #14
0
 def _run_job(self):
     self.set_state(beam_job_api_pb2.JobState.RUNNING)
     with JobLogHandler(self._log_queues):
         try:
             result = fn_api_runner.FnApiRunner(
                 provision_info=self._provision_info).run_via_runner_api(
                     self._pipeline_proto)
             _LOGGER.info('Successfully completed job.')
             self.set_state(beam_job_api_pb2.JobState.DONE)
             self.result = result
         except:  # pylint: disable=bare-except
             _LOGGER.exception('Error running pipeline.')
             _LOGGER.exception(traceback)
             self.set_state(beam_job_api_pb2.JobState.FAILED)
             raise
예제 #15
0
def main(unused_argv):
    # Setup Beam

    # Here, we use a local Beam runner.
    # For a truly distributed calculation, connect to a Beam cluster (e.g.
    # running on some cloud provider).
    runner = fn_api_runner.FnApiRunner()  # Local Beam runner
    with beam.Pipeline(runner=runner) as pipeline:

        # Define the privacy budget available for our computation.
        budget_accountant = pipeline_dp.NaiveBudgetAccountant(total_epsilon=1,
                                                              total_delta=1e-6)

        # Load and parse input data
        df = pd.read_csv(FLAGS.input_file)
        df.rename(inplace=True,
                  columns={
                      'VisitorId': 'user_id',
                      'Time entered': 'enter_time',
                      'Time spent (minutes)': 'spent_minutes',
                      'Money spent (euros)': 'spent_money',
                      'Day': 'day'
                  })
        restaraunt_visits_rows = [index_row[1] for index_row in df.iterrows()]
        beam_data = pipeline | beam.Create(restaraunt_visits_rows)

        # Wrap Beam's PCollection into it's private version
        private_restaraunt_visits = beam_data | private_beam.MakePrivate(
            budget_accountant=budget_accountant,
            privacy_id_extractor=lambda row: row.user_id)

        # Calculate the private sum
        dp_result = private_restaraunt_visits | private_beam.Sum(
            SumParams(noise_kind=pipeline_dp.NoiseKind.GAUSSIAN,
                      max_partitions_contributed=7,
                      max_contributions_per_partition=2,
                      min_value=1,
                      max_value=100,
                      budget_weight=1,
                      public_partitions=None,
                      partition_extractor=lambda row: row.day,
                      value_extractor=lambda row: row.spent_money))
        budget_accountant.compute_budgets()

        # Save the results
        dp_result | beam.io.WriteToText(FLAGS.output_file)

    return 0
예제 #16
0
    def benchmarkMiniPipelineBatched(self):
        """Benchmark a batched "mini" TFMA - predict, slice and compute metrics.

    Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time
    taken for the whole pipeline.
    """
        self._init_model()
        pipeline = beam.Pipeline(runner=fn_api_runner.FnApiRunner())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=benchmark_utils.read_schema(
                self._dataset.tf_metadata_schema_path()),
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        raw_data = (pipeline
                    | "Examples" >> beam.Create(
                        self._dataset.read_raw_dataset(deserialize=False,
                                                       limit=MAX_NUM_EXAMPLES))
                    | "BatchExamples" >> tfx_io.BeamSource()
                    | "InputsToExtracts" >> tfma.BatchedInputsToExtracts())

        _ = (raw_data
             | "BatchedInputExtractor" >> batched_input_extractor.
             BatchedInputExtractor(eval_config=self._eval_config).ptransform
             | "V2BatchedPredictExtractor" >>
             batched_predict_extractor_v2.BatchedPredictExtractor(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform
             | "UnbatchExtractor" >>
             unbatch_extractor.UnbatchExtractor().ptransform
             | "SliceKeyExtractor" >>
             tfma.extractors.SliceKeyExtractor().ptransform
             | "V2ComputeMetricsAndPlots" >>
             metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform)

        start = time.time()
        result = pipeline.run()
        result.wait_until_finish()
        end = time.time()
        delta = end - start

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })
예제 #17
0
def main(unused_argv):
    # Setup Beam

    # Here, we use a local Beam runner.
    # For a truly distributed calculation, connect to a Beam cluster (e.g.
    # running on some cloud provider).
    runner = fn_api_runner.FnApiRunner()  # Local Beam runner
    with beam.Pipeline(runner=runner) as pipeline:

        # Define the privacy budget available for our computation.
        budget_accountant = pipeline_dp.NaiveBudgetAccountant(total_epsilon=1,
                                                              total_delta=1e-6)

        # Load and parse input data
        movie_views_pcol = pipeline | \
                           beam.io.ReadFromText(FLAGS.input_file) | \
                           beam.ParDo(ParseFile())

        # Wrap Beam's PCollection into it's private version
        private_movie_views = (movie_views_pcol
                               | 'Create private collection' >> MakePrivate(
                                   budget_accountant=budget_accountant,
                                   privacy_id_extractor=lambda mv: mv.user_id))

        # Calculate the private sum
        dp_result = private_movie_views | "Private Sum" >> private_beam.Sum(
            SumParams(
                # Limits to how much one user can contribute:
                # .. at most two movies rated per user
                max_partitions_contributed=2,
                # .. at most one rating for each movie
                max_contributions_per_partition=1,
                # .. with minimal rating of "1"
                min_value=1,
                # .. and maximum rating of "5"
                max_value=5,
                # The aggregation key: we're grouping data by movies
                partition_extractor=lambda mv: mv.movie_id,
                # The value we're aggregating: we're summing up ratings
                value_extractor=lambda mv: mv.rating))
        budget_accountant.compute_budgets()

        # Save the results
        dp_result | beam.io.WriteToText(FLAGS.output_file)

    return 0
예제 #18
0
    def test_sum_calls_aggregate_with_params(self, mock_aggregate):
        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol = pipeline | 'Create produce' >> beam.Create(
                float(i) for i in range(1, 7))
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            sum_params = aggregate_params.SumParams(
                noise_kind=pipeline_dp.NoiseKind.GAUSSIAN,
                max_partitions_contributed=2,
                max_contributions_per_partition=3,
                min_value=1,
                max_value=5,
                budget_weight=1,
                public_partitions=[],
                partition_extractor=lambda x: f"pk:{x // 10}",
                value_extractor=lambda x: x)

            # Act
            transformer = private_beam.Sum(sum_params=sum_params)
            private_collection | transformer

            # Assert
            self.assertEqual(transformer._budget_accountant, budget_accountant)
            mock_aggregate.assert_called_once()

            args = mock_aggregate.call_args[0]

            params = pipeline_dp.AggregateParams(
                noise_kind=pipeline_dp.NoiseKind.GAUSSIAN,
                metrics=[pipeline_dp.Metrics.SUM],
                max_partitions_contributed=sum_params.
                max_partitions_contributed,
                max_contributions_per_partition=sum_params.
                max_contributions_per_partition,
                min_value=sum_params.min_value,
                max_value=sum_params.max_value,
                public_partitions=sum_params.public_partitions)
            self.assertEqual(params, args[1])
예제 #19
0
    def _make_beam_pipeline(self) -> beam.Pipeline:
        """Makes beam pipeline."""
        pipeline_options = PipelineOptions(self._beam_pipeline_args)
        if pipeline_options.view_as(StandardOptions).runner:
            return beam.Pipeline(argv=self._beam_pipeline_args)

        # TODO(b/159468583): move this warning to Beam.
        direct_running_mode = pipeline_options.view_as(
            DirectOptions).direct_running_mode
        direct_num_workers = pipeline_options.view_as(
            DirectOptions).direct_num_workers
        if direct_running_mode == 'in_memory' and direct_num_workers != 1:
            absl.logging.warning(
                'If direct_num_workers is not equal to 1, direct_running_mode should '
                'be `multi_processing` or `multi_threading` instead of `in_memory` '
                'in order for it to have the desired worker parallelism effect.'
            )

        return beam.Pipeline(options=pipeline_options,
                             runner=fn_api_runner.FnApiRunner())
예제 #20
0
    def test_make_private_transform_succeeds(self):
        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol = pipeline | 'Create produce' >> beam.Create(
                [1, 2, 3, 4, 5, 6])
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)

            # Act
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            # Assert
            self.assertIsInstance(private_collection,
                                  private_beam.PrivatePCollection)
            self.assertEqual(private_collection._budget_accountant,
                             budget_accountant)
예제 #21
0
    def test_transform_with_return_anonymized_enabled_returns_pcollection(
            self):
        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol = pipeline | 'Create produce' >> beam.Create(
                [1, 2, 3, 4, 5, 6])
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            # Act
            transformed = private_collection | SimplePrivatePTransform(
                return_anonymized=True)

            # Assert
            self.assertIsInstance(transformed, pvalue.PCollection)
예제 #22
0
    def benchmarkMiniPipelineUnbatched(self):
        """Benchmark an unbatched "mini" TFMA - predict, slice and compute metrics.

    Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time
    taken for the whole pipeline.
    """
        self._init_model()
        pipeline = beam.Pipeline(runner=fn_api_runner.FnApiRunner())
        raw_data = (pipeline
                    | "Examples" >> beam.Create(
                        self._dataset.read_raw_dataset(deserialize=False,
                                                       limit=MAX_NUM_EXAMPLES))
                    | "InputsToExtracts" >> tfma.InputsToExtracts())

        _ = (raw_data
             | "InputExtractor" >> input_extractor.InputExtractor(
                 eval_config=self._eval_config).ptransform
             | "V2PredictExtractor" >> predict_extractor_v2.PredictExtractor(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform
             | "SliceKeyExtractor" >>
             tfma.extractors.SliceKeyExtractor().ptransform
             | "V2ComputeMetricsAndPlots" >>
             metrics_and_plots_evaluator_v2.MetricsAndPlotsEvaluator(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_model).ptransform)

        start = time.time()
        result = pipeline.run()
        result.wait_until_finish()
        end = time.time()
        delta = end - start

        self.report_benchmark(
            iters=1,
            wall_time=delta,
            extras={
                "num_examples":
                self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
            })
예제 #23
0
    def test_private_collection_with_non_private_transform_throws_error(self):
        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol = pipeline | 'Create produce' >> beam.Create(
                [1, 2, 3, 4, 5, 6])
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            # Act and Assert
            with self.assertRaises(TypeError) as context:
                (private_collection | 'Non private transform on '
                 'PrivatePCollection' >> beam.Map(lambda x: x))
            self.assertIsInstance(private_collection,
                                  private_beam.PrivatePCollection)
            self.assertTrue(
                "private_transform should be of type "
                "PrivatePTransform but is " in str(context.exception))
예제 #24
0
    def test_privacy_id_count_calls_aggregate_with_params(
            self, mock_aggregate):
        runner = fn_api_runner.FnApiRunner()
        with beam.Pipeline(runner=runner) as pipeline:
            # Arrange
            pcol = pipeline | 'Create produce' >> beam.Create(
                [1, 2, 3, 4, 5, 6])
            budget_accountant = budget_accounting.NaiveBudgetAccountant(
                total_epsilon=1, total_delta=0.01)
            private_collection = (
                pcol | 'Create private collection' >> private_beam.MakePrivate(
                    budget_accountant=budget_accountant,
                    privacy_id_extractor=PrivateBeamTest.privacy_id_extractor))

            privacy_id_count_params = aggregate_params.PrivacyIdCountParams(
                noise_kind=pipeline_dp.NoiseKind.GAUSSIAN,
                max_partitions_contributed=2,
                budget_weight=1,
                partition_extractor=lambda x: f"pk:{x // 10}")

            # Act
            transformer = private_beam.PrivacyIdCount(
                privacy_id_count_params=privacy_id_count_params)
            private_collection | transformer

            # Assert
            self.assertEqual(transformer._budget_accountant, budget_accountant)
            mock_aggregate.assert_called_once()

            args = mock_aggregate.call_args[0]

            params = pipeline_dp.AggregateParams(
                noise_kind=pipeline_dp.NoiseKind.GAUSSIAN,
                metrics=[pipeline_dp.Metrics.PRIVACY_ID_COUNT],
                max_partitions_contributed=privacy_id_count_params.
                max_partitions_contributed,
                max_contributions_per_partition=1,
                public_partitions=privacy_id_count_params.public_partitions)
            self.assertEqual(args[1], params)
예제 #25
0
 def create_pipeline(self):
     return beam.Pipeline(runner=fn_api_runner.FnApiRunner(
         use_runner_protos=False))
예제 #26
0
 def create_pipeline(self):
     return beam.Pipeline(
         runner=fn_api_runner.FnApiRunner(use_grpc=False, bundle_repeat=3))
예제 #27
0
 def create_pipeline(self):
     return beam.Pipeline(runner=fn_api_runner.FnApiRunner(
         use_grpc=True,
         sdk_harness_factory=functools.partial(sdk_worker.SdkHarness,
                                               worker_count=2)))
예제 #28
0
 def create_pipeline(self):
     return beam.Pipeline(runner=fn_api_runner.FnApiRunner(use_grpc=True))
예제 #29
0
 def create_pipeline(self):
     return beam.Pipeline(runner=fn_api_runner.FnApiRunner(
         default_environment=beam_runner_api_pb2.Environment(
             urn=python_urns.EMBEDDED_PYTHON_GRPC, payload=b'2')))
예제 #30
0
 def create_pipeline(self):
     return beam.Pipeline(runner=fn_api_runner.FnApiRunner())