Python BeamBackend Beispiele

Programmiersprache: Python

Namespace / Paketname: pipeline_dp

Methode / Funktion: BeamBackend

Beispiele auf hotexamples.com: 7

Python BeamBackend - 7 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die pipeline_dp.BeamBackend, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

    def expand(self, pcol: pvalue.PCollection) -> pvalue.PCollection:
        backend = pipeline_dp.BeamBackend()
        dp_engine = pipeline_dp.DPEngine(self._budget_accountant, backend)

        params = pipeline_dp.AggregateParams(
            noise_kind=self._privacy_id_count_params.noise_kind,
            metrics=[pipeline_dp.Metrics.PRIVACY_ID_COUNT],
            max_partitions_contributed=self._privacy_id_count_params.
            max_partitions_contributed,
            max_contributions_per_partition=1,
            public_partitions=self._privacy_id_count_params.public_partitions)

        data_extractors = pipeline_dp.DataExtractors(
            partition_extractor=lambda x: self._privacy_id_count_params.
            partition_extractor(x[1]),
            privacy_id_extractor=lambda x: x[0],
            # PrivacyIdCount ignores values.
            value_extractor=lambda x: None)

        dp_result = dp_engine.aggregate(pcol, params, data_extractors)
        # dp_result : (partition_key, [dp_privacy_id_count])

        # aggregate() returns a namedtuple of metrics for each partition key.
        # Here is only one metric - privacy_id_count. Extract it from the list.
        dp_result = backend.map_values(dp_result, lambda v: v.privacy_id_count,
                                       "Extract privacy_id_count")
        # dp_result : (partition_key, dp_privacy_id_count)

        return dp_result

Beispiel #2

Datei anzeigen

    def expand(self, pcol: pvalue.PCollection) -> pvalue.PCollection:
        backend = pipeline_dp.BeamBackend()
        dp_engine = pipeline_dp.DPEngine(self._budget_accountant, backend)

        params = pipeline_dp.AggregateParams(
            noise_kind=self._mean_params.noise_kind,
            metrics=[pipeline_dp.Metrics.MEAN],
            max_partitions_contributed=self._mean_params.
            max_partitions_contributed,
            max_contributions_per_partition=self._mean_params.
            max_contributions_per_partition,
            min_value=self._mean_params.min_value,
            max_value=self._mean_params.max_value,
            public_partitions=self._mean_params.public_partitions)

        data_extractors = pipeline_dp.DataExtractors(
            partition_extractor=lambda x: self._mean_params.partition_extractor(
                x[1]),
            privacy_id_extractor=lambda x: x[0],
            value_extractor=lambda x: self._mean_params.value_extractor(x[1]))

        dp_result = dp_engine.aggregate(pcol, params, data_extractors)
        # dp_result : (partition_key, [dp_sum])

        # aggregate() returns a namedtuple of metrics for each partition key.
        # Here is only one metric - mean. Extract it from the list.
        dp_result = backend.map_values(dp_result, lambda v: v.mean,
                                       "Extract mean")
        # dp_result : (partition_key, dp_sum)

        return dp_result

Beispiel #3

Datei anzeigen

    def expand(self, pcol: pvalue.PCollection) -> pvalue.PCollection:
        backend = pipeline_dp.BeamBackend()
        dp_engine = pipeline_dp.DPEngine(self._budget_accountant, backend)

        params = pipeline_dp.AggregateParams(
            noise_kind=self._count_params.noise_kind,
            metrics=[pipeline_dp.Metrics.COUNT],
            max_partitions_contributed=self._count_params.
            max_partitions_contributed,
            max_contributions_per_partition=self._count_params.
            max_contributions_per_partition,
            public_partitions=self._count_params.public_partitions)

        data_extractors = pipeline_dp.DataExtractors(
            partition_extractor=lambda x: self._count_params.
            partition_extractor(x[1]),
            privacy_id_extractor=lambda x: x[0],
            # Count calculates the number of elements per partition key and
            # doesn't use value extractor.
            value_extractor=lambda x: None)

        dp_result = dp_engine.aggregate(pcol, params, data_extractors)
        # dp_result : (partition_key, [dp_count])

        # aggregate() returns a namedtuple of metrics for each partition key.
        # Here is only one metric - count. Extract it from the list.
        dp_result = backend.map_values(dp_result, lambda v: v.count,
                                       "Extract sum")
        # dp_result : (partition_key, dp_count)

        return dp_result

Beispiel #4

Datei anzeigen

    def test_run_e2e_beam(self):
        with test_pipeline.TestPipeline() as p:
            input = p | "Create input" >> beam.Create(list(range(10)))

            output = self.run_e2e_private_partition_selection_large_budget(
                input, pipeline_dp.BeamBackend())

            beam_util.assert_that(output, beam_util.is_not_empty())

Beispiel #5

Datei anzeigen

Datei: run_all_frameworks.py Projekt: OpenMined/PipelineDP

def compute_on_beam():
    runner = fn_api_runner.FnApiRunner()  # local runner
    with beam.Pipeline(runner=runner) as pipeline:
        movie_views = pipeline | beam.io.ReadFromText(
            FLAGS.input_file) | beam.ParDo(ParseFile())
        pipeline_backend = pipeline_dp.BeamBackend()
        dp_result = calculate_private_result(movie_views, pipeline_backend)
        dp_result | beam.io.WriteToText(FLAGS.output_file)

Beispiel #6

Datei anzeigen

    def expand(self, pcol: pvalue.PCollection) -> pvalue.PCollection:
        backend = pipeline_dp.BeamBackend()
        dp_engine = pipeline_dp.DPEngine(self._budget_accountant, backend)

        data_extractors = pipeline_dp.DataExtractors(
            partition_extractor=lambda x: self._partition_extractor(x[1]),
            privacy_id_extractor=lambda x: x[0])

        dp_result = dp_engine.select_partitions(pcol,
                                                self._select_partitions_params,
                                                data_extractors)

        return dp_result

Beispiel #7

Datei anzeigen

 def _create_dp_engine(self):
     backend = pipeline_dp.BeamBackend()
     return backend, pipeline_dp.DPEngine(self._budget_accountant, backend)