def run(self) -> beam.PCollection[job_run_result.JobRunResult]: """Returns a PCollection of 'SUCCESS' or 'FAILURE' results from generating ExplorationOpportunitySummaryModel. Returns: PCollection. A PCollection of 'SUCCESS' or 'FAILURE' results from generating ExplorationOpportunitySummaryModel. """ topics = (self.pipeline | 'Get all non-deleted topic models' >> (ndb_io.GetModels( topic_models.TopicModel.get_all(include_deleted=False))) | 'Get topic from model' >> beam.Map( topic_fetchers.get_topic_from_model)) story_ids_to_story = ( self.pipeline | 'Get all non-deleted story models' >> ndb_io.GetModels( story_models.StoryModel.get_all(include_deleted=False)) | 'Get story from model' >> beam.Map( story_fetchers.get_story_from_model) | 'Combine stories and ids' >> beam.Map(lambda story: (story.id, story))) exp_ids_to_exp = ( self.pipeline | 'Get all non-deleted exp models' >> ndb_io.GetModels( exp_models.ExplorationModel.get_all(include_deleted=False)) | 'Get exploration from model' >> beam.Map( exp_fetchers.get_exploration_from_model) | 'Combine exploration and ids' >> beam.Map(lambda exp: (exp.id, exp))) stories_dict = beam.pvalue.AsDict(story_ids_to_story) exps_dict = beam.pvalue.AsDict(exp_ids_to_exp) opportunities_results = ( topics | beam.Map(self._generate_opportunities_related_to_topic, stories_dict=stories_dict, exps_dict=exps_dict)) unused_put_result = ( opportunities_results | 'Filter the results with SUCCESS status' >> beam.Filter(lambda result: result.is_ok()) | 'Fetch the models to be put' >> beam.FlatMap(lambda result: result.unwrap()) | 'Add ID as a key' >> beam.WithKeys(lambda model: model.id) # pylint: disable=no-value-for-parameter | 'Allow only one item per key' >> (beam.combiners.Sample.FixedSizePerKey(1)) | 'Remove the IDs' >> beam.Values() # pylint: disable=no-value-for-parameter | 'Flatten the list of lists of models' >> beam.FlatMap(lambda x: x) | 'Put models into the datastore' >> ndb_io.PutModels()) return (opportunities_results | 'Count the output' >> (job_result_transforms.ResultsToJobRunResults()))
def run(self) -> beam.PCollection[job_run_result.JobRunResult]: """Generates the translation contributins stats. Returns: PCollection. A PCollection of 'SUCCESS x' results, where x is the number of generated stats.. """ suggestions_grouped_by_target = ( self.pipeline | 'Get all non-deleted suggestion models' >> ndb_io.GetModels( suggestion_models.GeneralSuggestionModel.get_all( include_deleted=False)) # We need to window the models so that CoGroupByKey below # works properly. | 'Filter translate suggestions' >> beam.Filter(lambda m: ( m.suggestion_type == feconf.SUGGESTION_TYPE_TRANSLATE_CONTENT)) | 'Transform to suggestion domain object' >> beam.Map( suggestion_services.get_suggestion_from_model) | 'Group by target' >> beam.GroupBy(lambda m: m.target_id)) exp_opportunities = ( self.pipeline | 'Get all non-deleted opportunity models' >> ndb_io.GetModels( opportunity_models.ExplorationOpportunitySummaryModel.get_all( include_deleted=False)) # We need to window the models so that CoGroupByKey below # works properly. | 'Transform to opportunity domain object' >> beam.Map(opportunity_services. get_exploration_opportunity_summary_from_model) | 'Group by ID' >> beam.GroupBy(lambda m: m.id)) user_stats_results = ( { 'suggestion': suggestions_grouped_by_target, 'opportunity': exp_opportunities } | 'Merge models' >> beam.CoGroupByKey() | 'Get rid of key' >> beam.Values() # pylint: disable=no-value-for-parameter | 'Generate stats' >> beam.ParDo(lambda x: self._generate_stats( x['suggestion'][0] if len(x['suggestion']) else [], list(x['opportunity'][0])[0] if len(x['opportunity']) else None))) user_stats_models = ( user_stats_results | 'Filter ok results' >> beam.Filter(lambda key_and_result: key_and_result[1].is_ok()) | 'Unpack result' >> beam.MapTuple(lambda key, result: (key, result.unwrap())) | 'Combine the stats' >> beam.CombinePerKey(CombineStats()) | 'Generate models from stats' >> beam.MapTuple( self._generate_translation_contribution_model)) user_stats_error_job_run_results = ( user_stats_results | 'Filter err results' >> beam.Filter(lambda key_and_result: key_and_result[1].is_err()) # Pylint disable is needed because pylint is not able to correctly # detect that the value is passed through the pipe. | 'Remove keys' >> beam.Values() # pylint: disable=no-value-for-parameter | 'Transform result to job run result' >> (job_result_transforms.ResultsToJobRunResults())) unused_put_result = ( user_stats_models | 'Put models into the datastore' >> ndb_io.PutModels()) user_stats_models_job_run_results = ( user_stats_models | 'Create job run result' >> (job_result_transforms.CountObjectsToJobRunResult())) return ((user_stats_error_job_run_results, user_stats_models_job_run_results) | 'Merge job run results' >> beam.Flatten())
def run(self) -> beam.PCollection[job_run_result.JobRunResult]: """Returns a PCollection of 'SUCCESS' or 'FAILURE' results from generating SkillOpportunityModel. Returns: PCollection. A PCollection of 'SUCCESS' or 'FAILURE' results from generating SkillOpportunityModel. """ question_skill_link_models = ( self.pipeline | 'Get all non-deleted QuestionSkillLinkModels' >> (ndb_io.GetModels( question_models.QuestionSkillLinkModel.get_all( include_deleted=False))) | 'Group QuestionSkillLinkModels by skill ID' >> beam.GroupBy(lambda n: n.skill_id)) skills = ( self.pipeline | 'Get all non-deleted SkillModels' >> (ndb_io.GetModels( skill_models.SkillModel.get_all(include_deleted=False))) | 'Get skill object from model' >> beam.Map( skill_fetchers.get_skill_from_model) | 'Group skill objects by skill ID' >> beam.GroupBy(lambda m: m.id)) skills_with_question_counts = ( { 'skill': skills, 'question_skill_links': question_skill_link_models } | 'Merge by skill ID' >> beam.CoGroupByKey() # Pylint disable is needed because pylint is not able to correctly # detect that the value is passed through the pipe. | 'Remove skill IDs' >> beam.Values() # pylint: disable=no-value-for-parameter # We are using itertools.chain.from_iterable to flatten # question_skill_links from a 2D list into a 1D list. | 'Flatten skill and question_skill_links' >> beam.Map( lambda object: { 'skill': list(object['skill'][0])[0], 'question_skill_links': list( itertools.chain.from_iterable(object[ 'question_skill_links'])) })) opportunities_results = ( skills_with_question_counts | beam.Map(lambda object: self._create_skill_opportunity_model( object['skill'], object['question_skill_links']))) unused_put_result = ( opportunities_results | 'Filter the results with OK status' >> beam.Filter(lambda result: result.is_ok()) | 'Fetch the models to be put' >> beam.Map(lambda result: result.unwrap()) | 'Put models into the datastore' >> ndb_io.PutModels()) return (opportunities_results | 'Transform Results to JobRunResults' >> (job_result_transforms.ResultsToJobRunResults()))