Ejemplo n.º 1
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Returns a PCollection of 'SUCCESS' or 'FAILURE' results from
        generating ExplorationOpportunitySummaryModel.

        Returns:
            PCollection. A PCollection of 'SUCCESS' or 'FAILURE' results from
            generating ExplorationOpportunitySummaryModel.
        """

        topics = (self.pipeline
                  | 'Get all non-deleted topic models' >> (ndb_io.GetModels(
                      topic_models.TopicModel.get_all(include_deleted=False)))
                  | 'Get topic from model' >> beam.Map(
                      topic_fetchers.get_topic_from_model))

        story_ids_to_story = (
            self.pipeline
            | 'Get all non-deleted story models' >> ndb_io.GetModels(
                story_models.StoryModel.get_all(include_deleted=False))
            | 'Get story from model' >> beam.Map(
                story_fetchers.get_story_from_model)
            | 'Combine stories and ids' >> beam.Map(lambda story:
                                                    (story.id, story)))

        exp_ids_to_exp = (
            self.pipeline
            | 'Get all non-deleted exp models' >> ndb_io.GetModels(
                exp_models.ExplorationModel.get_all(include_deleted=False))
            | 'Get exploration from model' >> beam.Map(
                exp_fetchers.get_exploration_from_model)
            | 'Combine exploration and ids' >> beam.Map(lambda exp:
                                                        (exp.id, exp)))

        stories_dict = beam.pvalue.AsDict(story_ids_to_story)
        exps_dict = beam.pvalue.AsDict(exp_ids_to_exp)

        opportunities_results = (
            topics
            | beam.Map(self._generate_opportunities_related_to_topic,
                       stories_dict=stories_dict,
                       exps_dict=exps_dict))

        unused_put_result = (
            opportunities_results
            | 'Filter the results with SUCCESS status' >>
            beam.Filter(lambda result: result.is_ok())
            | 'Fetch the models to be put' >>
            beam.FlatMap(lambda result: result.unwrap())
            | 'Add ID as a key' >> beam.WithKeys(lambda model: model.id)  # pylint: disable=no-value-for-parameter
            | 'Allow only one item per key' >>
            (beam.combiners.Sample.FixedSizePerKey(1))
            | 'Remove the IDs' >> beam.Values()  # pylint: disable=no-value-for-parameter
            |
            'Flatten the list of lists of models' >> beam.FlatMap(lambda x: x)
            | 'Put models into the datastore' >> ndb_io.PutModels())

        return (opportunities_results
                | 'Count the output' >>
                (job_result_transforms.ResultsToJobRunResults()))
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Generates the translation contributins stats.

        Returns:
            PCollection. A PCollection of 'SUCCESS x' results, where x is
            the number of generated stats..
        """
        suggestions_grouped_by_target = (
            self.pipeline
            | 'Get all non-deleted suggestion models' >> ndb_io.GetModels(
                suggestion_models.GeneralSuggestionModel.get_all(
                    include_deleted=False))
            # We need to window the models so that CoGroupByKey below
            # works properly.
            | 'Filter translate suggestions' >> beam.Filter(lambda m: (
                m.suggestion_type == feconf.SUGGESTION_TYPE_TRANSLATE_CONTENT))
            | 'Transform to suggestion domain object' >> beam.Map(
                suggestion_services.get_suggestion_from_model)
            | 'Group by target' >> beam.GroupBy(lambda m: m.target_id))
        exp_opportunities = (
            self.pipeline
            | 'Get all non-deleted opportunity models' >> ndb_io.GetModels(
                opportunity_models.ExplorationOpportunitySummaryModel.get_all(
                    include_deleted=False))
            # We need to window the models so that CoGroupByKey below
            # works properly.
            | 'Transform to opportunity domain object' >>
            beam.Map(opportunity_services.
                     get_exploration_opportunity_summary_from_model)
            | 'Group by ID' >> beam.GroupBy(lambda m: m.id))

        user_stats_results = (
            {
                'suggestion': suggestions_grouped_by_target,
                'opportunity': exp_opportunities
            }
            | 'Merge models' >> beam.CoGroupByKey()
            | 'Get rid of key' >> beam.Values()  # pylint: disable=no-value-for-parameter
            | 'Generate stats' >> beam.ParDo(lambda x: self._generate_stats(
                x['suggestion'][0] if len(x['suggestion']) else [],
                list(x['opportunity'][0])[0]
                if len(x['opportunity']) else None)))

        user_stats_models = (
            user_stats_results
            | 'Filter ok results' >>
            beam.Filter(lambda key_and_result: key_and_result[1].is_ok())
            | 'Unpack result' >> beam.MapTuple(lambda key, result:
                                               (key, result.unwrap()))
            | 'Combine the stats' >> beam.CombinePerKey(CombineStats())
            | 'Generate models from stats' >> beam.MapTuple(
                self._generate_translation_contribution_model))

        user_stats_error_job_run_results = (
            user_stats_results
            | 'Filter err results' >>
            beam.Filter(lambda key_and_result: key_and_result[1].is_err())
            # Pylint disable is needed because pylint is not able to correctly
            # detect that the value is passed through the pipe.
            | 'Remove keys' >> beam.Values()  # pylint: disable=no-value-for-parameter
            | 'Transform result to job run result' >>
            (job_result_transforms.ResultsToJobRunResults()))

        unused_put_result = (
            user_stats_models
            | 'Put models into the datastore' >> ndb_io.PutModels())

        user_stats_models_job_run_results = (
            user_stats_models
            | 'Create job run result' >>
            (job_result_transforms.CountObjectsToJobRunResult()))

        return ((user_stats_error_job_run_results,
                 user_stats_models_job_run_results)
                | 'Merge job run results' >> beam.Flatten())
Ejemplo n.º 3
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Returns a PCollection of 'SUCCESS' or 'FAILURE' results from
        generating SkillOpportunityModel.

        Returns:
            PCollection. A PCollection of 'SUCCESS' or 'FAILURE' results from
            generating SkillOpportunityModel.
        """
        question_skill_link_models = (
            self.pipeline
            | 'Get all non-deleted QuestionSkillLinkModels' >>
            (ndb_io.GetModels(
                question_models.QuestionSkillLinkModel.get_all(
                    include_deleted=False)))
            | 'Group QuestionSkillLinkModels by skill ID' >>
            beam.GroupBy(lambda n: n.skill_id))

        skills = (
            self.pipeline
            | 'Get all non-deleted SkillModels' >> (ndb_io.GetModels(
                skill_models.SkillModel.get_all(include_deleted=False)))
            | 'Get skill object from model' >> beam.Map(
                skill_fetchers.get_skill_from_model)
            |
            'Group skill objects by skill ID' >> beam.GroupBy(lambda m: m.id))

        skills_with_question_counts = (
            {
                'skill': skills,
                'question_skill_links': question_skill_link_models
            }
            | 'Merge by skill ID' >> beam.CoGroupByKey()
            # Pylint disable is needed because pylint is not able to correctly
            # detect that the value is passed through the pipe.
            | 'Remove skill IDs' >> beam.Values()  # pylint: disable=no-value-for-parameter
            # We are using itertools.chain.from_iterable to flatten
            # question_skill_links from a 2D list into a 1D list.
            | 'Flatten skill and question_skill_links' >> beam.Map(
                lambda object: {
                    'skill':
                    list(object['skill'][0])[0],
                    'question_skill_links':
                    list(
                        itertools.chain.from_iterable(object[
                            'question_skill_links']))
                }))

        opportunities_results = (
            skills_with_question_counts
            | beam.Map(lambda object: self._create_skill_opportunity_model(
                object['skill'], object['question_skill_links'])))

        unused_put_result = (
            opportunities_results
            | 'Filter the results with OK status' >>
            beam.Filter(lambda result: result.is_ok())
            | 'Fetch the models to be put' >>
            beam.Map(lambda result: result.unwrap())
            | 'Put models into the datastore' >> ndb_io.PutModels())

        return (opportunities_results
                | 'Transform Results to JobRunResults' >>
                (job_result_transforms.ResultsToJobRunResults()))