예제 #1
0
파일: job_io.py 프로젝트: sajalasati/oppia
    def expand(
        self, results: beam.PCollection[job_run_result.JobRunResult]
    ) -> beam.pvalue.PDone:
        """Writes the given job results to the NDB datastore.

        This overrides expand from parent class.

        Args:
            results: PCollection. Models, can also contain just one model.

        Returns:
            PCollection. An empty PCollection.
        """
        return (
            results
            # NOTE: Pylint is wrong. WithKeys() is a decorated function with a
            # different signature than the one it's defined with.
            | beam.WithKeys(None)  # pylint: disable=no-value-for-parameter
            # GroupIntoBatches() requires (key, value) pairs as input, so we
            # give everything None keys and then immediately discard them.
            | beam.GroupIntoBatches(self._MAX_RESULT_INSTANCES_PER_MODEL)
            | beam.Values()  # pylint: disable=no-value-for-parameter
            | beam.FlatMap(job_run_result.JobRunResult.accumulate)
            | beam.Map(self.create_beam_job_run_result_model,
                       results.pipeline.options.namespace)
            | ndb_io.PutModels())
예제 #2
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        user_settings_models = (
            self.pipeline
            | 'Get all UserSettingsModels' >>
            (ndb_io.GetModels(user_models.UserSettingsModel.get_all())))

        old_user_stats_models = (
            self.pipeline
            | 'Get all UserStatsModels' >>
            (ndb_io.GetModels(user_models.UserStatsModel.get_all())))

        # Creates UserStatsModels if it does not exists.
        new_user_stats_models = (
            (user_settings_models, old_user_stats_models)
            | 'Merge models' >> beam.Flatten()
            # Returns a PCollection of
            # (model.id, (user_settings_models, user_stats_models)) or
            # (model.id, (user_settings_models,)).
            | 'Group models with same ID' >> beam.GroupBy(lambda m: m.id)
            # Discards model.id from the PCollection.
            | 'Get rid of key' >> beam.Values()  # pylint: disable=no-value-for-parameter
            # Only keep groupings that indicate that
            # the UserStatsModel is missing.
            | 'Filter pairs of models' >>
            beam.Filter(lambda models: (len(list(models)) == 1 and isinstance(
                list(models)[0], user_models.UserSettingsModel)))
            # Choosing the first element.
            | 'Transform tuples into models' >>
            beam.Map(lambda models: list(models)[0])
            # Creates the missing UserStatsModels.
            | 'Create new user stat models' >> beam.ParDo(
                CreateUserStatsModel()))

        unused_put_result = (
            (new_user_stats_models, old_user_stats_models)
            | 'Merge new and old models together' >> beam.Flatten()
            | 'Update the dashboard stats' >> beam.ParDo(
                UpdateWeeklyCreatorStats())
            | 'Put models into the datastore' >> ndb_io.PutModels())

        new_user_stats_job_result = (
            new_user_stats_models
            | 'Count all new models' >> beam.combiners.Count.Globally()
            | 'Only create result for new models when > 0' >>
            (beam.Filter(lambda x: x > 0))
            | 'Create result for new models' >>
            beam.Map(lambda x: job_run_result.JobRunResult(
                stdout='SUCCESS NEW %s' % x)))
        old_user_stats_job_result = (
            old_user_stats_models
            | 'Count all old models' >> beam.combiners.Count.Globally()
            | 'Only create result for old models when > 0' >>
            (beam.Filter(lambda x: x > 0))
            | 'Create result for old models' >>
            beam.Map(lambda x: job_run_result.JobRunResult(
                stdout='SUCCESS OLD %s' % x)))

        return ((new_user_stats_job_result, old_user_stats_job_result)
                | 'Merge new and old results together' >> beam.Flatten())
예제 #3
0
    def test_write_to_datastore(self):
        model_list = [
            self.create_model(base_models.BaseModel, id='a'),
            self.create_model(base_models.BaseModel, id='b'),
            self.create_model(base_models.BaseModel, id='c'),
        ]

        self.assertItemsEqual(self.get_everything(), [])

        self.assert_pcoll_empty(self.pipeline | beam.Create(model_list)
                                | ndb_io.PutModels())

        self.assertItemsEqual(self.get_everything(), model_list)
예제 #4
0
 def expand(self, results):
     """Writes the given job results to the NDB datastore."""
     return (
         results
         # NOTE: Pylint is wrong. WithKeys() is a decorated function with a
         # different signature than the one it's defined with.
         | beam.WithKeys(None)  # pylint: disable=no-value-for-parameter
         # GroupIntoBatches() requires (key, value) pairs as input, so we
         # give everything None keys and then immediately discard them.
         | beam.GroupIntoBatches(self._MAX_RESULT_INSTANCES_PER_MODEL)
         | beam.Values()
         | beam.FlatMap(job_run_result.JobRunResult.accumulate)
         | beam.Map(self.create_beam_job_run_result_model)
         | ndb_io.PutModels(self.datastoreio_stub))
예제 #5
0
    def test_write_to_datastore(self) -> None:
        model_list = [
            self.create_model(base_models.BaseModel, id='a'),
            self.create_model(base_models.BaseModel, id='b'),
            self.create_model(base_models.BaseModel, id='c'),
        ]

        self.assertItemsEqual(self.get_base_models(),
                              [])  # type: ignore[no-untyped-call]

        self.assert_pcoll_empty(self.pipeline | beam.Create(model_list)
                                | ndb_io.PutModels())

        self.assertItemsEqual(self.get_base_models(),
                              model_list)  # type: ignore[no-untyped-call]
예제 #6
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Returns a PCollection of 'SUCCESS' or 'FAILURE' results from
        the Elastic Search.

        Returns:
            PCollection. A PCollection of 'SUCCESS' or 'FAILURE' results from
            the Elastic Search.
        """

        exp_summary_models = (
            self.pipeline
            | 'Get all non-deleted models' >>
            (ndb_io.GetModels(exp_models.ExpSummaryModel.get_all())))

        exp_summary_iter = beam.pvalue.AsIter(exp_summary_models)

        exp_recommendations_models = (
            exp_summary_models
            | 'Compute similarity' >> beam.ParDo(ComputeSimilarity(),
                                                 exp_summary_iter)
            | 'Group similarities per exploration ID' >> beam.GroupByKey()
            | 'Sort and slice similarities' >> beam.MapTuple(
                lambda exp_id, similarities:
                (exp_id, self._sort_and_slice_similarities(similarities)))
            | 'Create recommendation models' >> beam.MapTuple(
                self._create_recommendation))

        unused_put_result = (
            exp_recommendations_models
            | 'Put models into the datastore' >> ndb_io.PutModels())

        return (exp_recommendations_models
                | 'Count all new models' >> beam.combiners.Count.Globally()
                | 'Only create result for new models when > 0' >>
                (beam.Filter(lambda x: x > 0))
                | 'Create result for new models' >>
                beam.Map(lambda x: job_run_result.JobRunResult(
                    stdout='SUCCESS %s' % x)))
예제 #7
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Generates the translation contributins stats.

        Returns:
            PCollection. A PCollection of 'SUCCESS x' results, where x is
            the number of generated stats..
        """
        suggestions_grouped_by_target = (
            self.pipeline
            | 'Get all non-deleted suggestion models' >> ndb_io.GetModels(
                suggestion_models.GeneralSuggestionModel.get_all(
                    include_deleted=False))
            # We need to window the models so that CoGroupByKey below
            # works properly.
            | 'Window the suggestions' >> beam.WindowInto(
                beam.window.Sessions(10 * 60))
            | 'Filter translate suggestions' >> beam.Filter(lambda m: (
                m.suggestion_type == feconf.SUGGESTION_TYPE_TRANSLATE_CONTENT))
            | 'Transform to suggestion domain object' >> beam.Map(
                suggestion_services.get_suggestion_from_model)
            | 'Group by target' >> beam.GroupBy(lambda m: m.target_id))
        exp_opportunities = (
            self.pipeline
            | 'Get all non-deleted opportunity models' >> ndb_io.GetModels(
                opportunity_models.ExplorationOpportunitySummaryModel.get_all(
                    include_deleted=False))
            # We need to window the models so that CoGroupByKey below
            # works properly.
            | 'Window the opportunities' >> beam.WindowInto(
                beam.window.Sessions(10 * 60))
            | 'Transform to opportunity domain object' >>
            beam.Map(opportunity_services.
                     get_exploration_opportunity_summary_from_model)
            | 'Group by ID' >> beam.GroupBy(lambda m: m.id))

        new_user_stats_models = (
            {
                'suggestion': suggestions_grouped_by_target,
                'opportunity': exp_opportunities
            }
            | 'Merge models' >> beam.CoGroupByKey()
            | 'Get rid of key' >> beam.Values()  # pylint: disable=no-value-for-parameter
            | 'Generate stats' >> beam.ParDo(lambda x: self._generate_stats(
                x['suggestion'][0] if len(x['suggestion']) else [], x[
                    'opportunity'][0][0] if len(x['opportunity']) else None))
            | 'Group by key' >> beam.GroupByKey()
            | 'Combine the stats' >> beam.CombineValues(CombineStats())
            | 'Generate models from stats' >> beam.MapTuple(
                self._generate_translation_contribution_model))

        unused_put_result = (
            new_user_stats_models
            | 'Put models into the datastore' >> ndb_io.PutModels())

        return (new_user_stats_models
                | 'Count all new models' >>
                (beam.combiners.Count.Globally().without_defaults())
                | 'Only create result for new models when > 0' >>
                (beam.Filter(lambda x: x > 0))
                | 'Create result for new models' >>
                beam.Map(lambda x: job_run_result.JobRunResult(
                    stdout='SUCCESS %s' % x)))