def test_equality(self): a_result = job_run_result.JobRunResult(stdout='abc', stderr='123') b_result = job_run_result.JobRunResult(stdout='def', stderr='456') self.assertEqual(a_result, a_result) self.assertEqual(b_result, b_result) self.assertNotEqual(a_result, b_result)
def run(self) -> beam.PCollection[job_run_result.JobRunResult]: user_settings_models = ( self.pipeline | 'Get all UserSettingsModels' >> (ndb_io.GetModels(user_models.UserSettingsModel.get_all()))) old_user_stats_models = ( self.pipeline | 'Get all UserStatsModels' >> (ndb_io.GetModels(user_models.UserStatsModel.get_all()))) # Creates UserStatsModels if it does not exists. new_user_stats_models = ( (user_settings_models, old_user_stats_models) | 'Merge models' >> beam.Flatten() # Returns a PCollection of # (model.id, (user_settings_models, user_stats_models)) or # (model.id, (user_settings_models,)). | 'Group models with same ID' >> beam.GroupBy(lambda m: m.id) # Discards model.id from the PCollection. | 'Get rid of key' >> beam.Values() # pylint: disable=no-value-for-parameter # Only keep groupings that indicate that # the UserStatsModel is missing. | 'Filter pairs of models' >> beam.Filter(lambda models: (len(list(models)) == 1 and isinstance( list(models)[0], user_models.UserSettingsModel))) # Choosing the first element. | 'Transform tuples into models' >> beam.Map(lambda models: list(models)[0]) # Creates the missing UserStatsModels. | 'Create new user stat models' >> beam.ParDo( CreateUserStatsModel())) unused_put_result = ( (new_user_stats_models, old_user_stats_models) | 'Merge new and old models together' >> beam.Flatten() | 'Update the dashboard stats' >> beam.ParDo( UpdateWeeklyCreatorStats()) | 'Put models into the datastore' >> ndb_io.PutModels()) new_user_stats_job_result = ( new_user_stats_models | 'Count all new models' >> beam.combiners.Count.Globally() | 'Only create result for new models when > 0' >> (beam.Filter(lambda x: x > 0)) | 'Create result for new models' >> beam.Map(lambda x: job_run_result.JobRunResult( stdout='SUCCESS NEW %s' % x))) old_user_stats_job_result = ( old_user_stats_models | 'Count all old models' >> beam.combiners.Count.Globally() | 'Only create result for old models when > 0' >> (beam.Filter(lambda x: x > 0)) | 'Create result for old models' >> beam.Map(lambda x: job_run_result.JobRunResult( stdout='SUCCESS OLD %s' % x))) return ((new_user_stats_job_result, old_user_stats_job_result) | 'Merge new and old results together' >> beam.Flatten())
def test_concat(self): single_job_run_result = job_run_result.JobRunResult.concat([ job_run_result.JobRunResult(stdout='abc', stderr=''), job_run_result.JobRunResult(stdout='', stderr='123'), job_run_result.JobRunResult(stdout='def', stderr='456'), ]) self.assertItemsEqual(single_job_run_result.stdout.split('\n'), ['abc', 'def']) self.assertItemsEqual(single_job_run_result.stderr.split('\n'), ['123', '456'])
def test_accumulate(self): (single_job_run_result, ) = job_run_result.JobRunResult.accumulate([ job_run_result.JobRunResult(stdout='abc', stderr=''), job_run_result.JobRunResult(stdout='', stderr='123'), job_run_result.JobRunResult(stdout='def', stderr='456'), ]) self.assertItemsEqual(single_job_run_result.stdout.split('\n'), ['abc', 'def']) self.assertItemsEqual(single_job_run_result.stderr.split('\n'), ['123', '456'])
def test_sharded_output(self): messages = [ job_run_result.JobRunResult(stdout='abc', stderr='123'), job_run_result.JobRunResult(stdout='def', stderr='456'), job_run_result.JobRunResult(stdout='ghi', stderr='789'), ] with self.swap(job_run_result, 'MAX_OUTPUT_BYTES', 11): self.assert_pcoll_empty(self.pipeline | beam.Create(messages) | job_io.PutResults(self.JOB_ID)) result = beam_job_services.get_beam_job_run_result(self.JOB_ID) self.assertItemsEqual(result.stdout.split('\n'), ['abc', 'def', 'ghi']) self.assertItemsEqual(result.stderr.split('\n'), ['123', '456', '789'])
def test_indexes_non_deleted_model(self) -> None: exp_summary = self.create_model( exp_models.ExpSummaryModel, id='abcd', deleted=False, title='title', category='category', objective='objective', language_code='lang', community_owned=False, status=constants.ACTIVITY_STATUS_PUBLIC) exp_summary.update_timestamps() exp_summary.put() add_docs_to_index_swap = self.swap_with_checks( platform_search_services, 'add_documents_to_index', lambda _, __: None, expected_args=[([{ 'id': 'abcd', 'language_code': 'lang', 'title': 'title', 'category': 'category', 'tags': [], 'objective': 'objective', 'rank': 20, }], search_services.SEARCH_INDEX_EXPLORATIONS)]) with add_docs_to_index_swap: self.assert_job_output_is([ job_run_result.JobRunResult(stdout='SUCCESS 1 models indexed') ])
def test_updates_existing_stats_model_when_values_are_provided( self) -> None: user_settings_model = self.create_model(user_models.UserSettingsModel, id=self.VALID_USER_ID_1, email='*****@*****.**') user_stats_model = self.create_model( user_models.UserStatsModel, id=self.VALID_USER_ID_1, num_ratings=10, average_ratings=4.5, total_plays=22, ) self.put_multi([user_settings_model, user_stats_model]) self.assert_job_output_is( [job_run_result.JobRunResult(stdout='SUCCESS OLD 1')]) new_user_stats_model = cast( user_models.UserStatsModel, user_models.UserStatsModel.get(self.VALID_USER_ID_1)) self.assertIsNotNone(new_user_stats_model) self.assertEqual(new_user_stats_model.weekly_creator_stats_list, [{ self.formated_datetime: { 'num_ratings': 10, 'average_ratings': 4.5, 'total_plays': 22 } }])
def test_accumulate_with_enormous_outputs(self): accumulated_results = job_run_result.JobRunResult.accumulate([ job_run_result.JobRunResult(stdout='a' * 500000, stderr='b' * 500000), job_run_result.JobRunResult(stdout='a' * 400000, stderr='b' * 400000), job_run_result.JobRunResult(stdout='a' * 300000, stderr='b' * 300000), job_run_result.JobRunResult(stdout='a' * 200000, stderr='b' * 200000), job_run_result.JobRunResult(stdout='a' * 100000, stderr='b' * 100000), ]) # 100000 and 200000 are small enough ot fit as one, but the others will # each need their own result. self.assertEqual(len(accumulated_results), 4)
def test_output_returns_models_discovered(self) -> None: self.put_multi([ self.create_model(base_models.BaseModel), self.create_model(base_models.BaseModel), self.create_model(base_models.BaseModel), ]) self.assert_job_output_is([ job_run_result.JobRunResult(stdout='BaseModel: 3'), ])
def test_creates_stats_model_from_one_accepted_suggestion(self) -> None: suggestion_model = self.create_model( suggestion_models.GeneralSuggestionModel, suggestion_type=feconf.SUGGESTION_TYPE_TRANSLATE_CONTENT, author_id=self.VALID_USER_ID_1, change_cmd={ 'cmd': exp_domain.CMD_ADD_WRITTEN_TRANSLATION, 'state_name': 'state', 'content_id': 'content_id', 'language_code': 'lang', 'content_html': '111 222 333', 'translation_html': '111 222 333', 'data_format': 'format' }, score_category='irelevant', status=suggestion_models.STATUS_ACCEPTED, target_type='exploration', target_id=self.EXP_1_ID, target_version_at_submission=0, language_code=self.LANG_1) suggestion_model.update_timestamps() suggestion_model.put() self.assert_job_output_is( [job_run_result.JobRunResult(stdout='SUCCESS 1')]) translation_stats_model = ( suggestion_models.TranslationContributionStatsModel.get( self.LANG_1, self.VALID_USER_ID_1, '')) assert translation_stats_model is not None self.assertEqual(translation_stats_model.language_code, self.LANG_1) self.assertEqual(translation_stats_model.contributor_user_id, self.VALID_USER_ID_1) self.assertEqual(translation_stats_model.topic_id, '') self.assertEqual(translation_stats_model.submitted_translations_count, 1) self.assertEqual( translation_stats_model.submitted_translation_word_count, 3) self.assertEqual(translation_stats_model.accepted_translations_count, 1) self.assertEqual( translation_stats_model. accepted_translations_without_reviewer_edits_count, 1) self.assertEqual( translation_stats_model.accepted_translation_word_count, 3) self.assertEqual(translation_stats_model.rejected_translations_count, 0) self.assertEqual( translation_stats_model.rejected_translation_word_count, 0) self.assertItemsEqual( # type: ignore[no-untyped-call] translation_stats_model.contribution_dates, [datetime.date.today()])
def process( self, exp_summary_models: List[datastore_services.Model] ) -> Iterable[job_run_result.JobRunResult]: """Index exploration summaries and catch any errors. Args: exp_summary_models: list(Model). Models to index. Yields: JobRunResult. List containing one element, which is either SUCCESS, or FAILURE. """ try: search_services.index_exploration_summaries( # type: ignore[no-untyped-call] cast(List[exp_models.ExpSummaryModel], exp_summary_models)) yield job_run_result.JobRunResult( stdout='SUCCESS %s models indexed' % len(exp_summary_models)) except platform_search_services.SearchException: # type: ignore[attr-defined] yield job_run_result.JobRunResult( stderr='FAILURE %s models not indexed' % len(exp_summary_models))
def test_handles_multiple_models(self) -> None: user_settings_model_1 = self.create_model( user_models.UserSettingsModel, id=self.VALID_USER_ID_1, email='*****@*****.**') user_settings_model_2 = self.create_model( user_models.UserSettingsModel, id=self.VALID_USER_ID_2, email='*****@*****.**') user_stats_model_1 = self.create_model(user_models.UserStatsModel, id=self.VALID_USER_ID_1) self.put_multi( [user_settings_model_1, user_settings_model_2, user_stats_model_1]) self.assert_job_output_is([ job_run_result.JobRunResult(stdout='SUCCESS OLD 1'), job_run_result.JobRunResult(stdout='SUCCESS NEW 1') ]) user_stats_model = user_models.UserStatsModel.get(self.VALID_USER_ID_2) self.assertIsNotNone(user_stats_model)
def test_single_output(self): messages = [ job_run_result.JobRunResult(stdout='Hello, World!', stderr='Uh-oh, World!'), ] self.assert_pcoll_empty(self.pipeline | beam.Create(messages) | job_io.PutResults(self.JOB_ID)) result = beam_job_services.get_beam_job_run_result(self.JOB_ID) self.assertEqual(result.stdout, 'Hello, World!') self.assertEqual(result.stderr, 'Uh-oh, World!')
def test_creates_recommendations_for_similar_explorations(self) -> None: recommendations_services.create_default_topic_similarities( ) # type: ignore[no-untyped-call] exp_summary_1 = self.create_model( exp_models.ExpSummaryModel, id=self.EXP_1_ID, deleted=False, title='title', category='Architecture', objective='objective', language_code='lang', community_owned=False, status=constants.ACTIVITY_STATUS_PUBLIC, exploration_model_last_updated=datetime.datetime.utcnow()) exp_summary_1.update_timestamps() exp_summary_2 = self.create_model( exp_models.ExpSummaryModel, id=self.EXP_2_ID, deleted=False, title='title', category='Architecture', objective='objective', language_code='lang', community_owned=False, status=constants.ACTIVITY_STATUS_PUBLIC, exploration_model_last_updated=datetime.datetime.utcnow()) exp_summary_2.update_timestamps() self.put_multi([exp_summary_1, exp_summary_2]) self.assert_job_output_is( [job_run_result.JobRunResult(stdout='SUCCESS 2')]) exp_recommendations_model_1 = ( recommendations_models.ExplorationRecommendationsModel.get( self.EXP_1_ID)) # Ruling out the possibility of None for mypy type checking. assert exp_recommendations_model_1 is not None self.assertEqual( exp_recommendations_model_1.recommended_exploration_ids, [self.EXP_2_ID]) exp_recommendations_model_2 = ( recommendations_models.ExplorationRecommendationsModel.get( self.EXP_2_ID)) # Ruling out the possibility of None for mypy type checking. assert exp_recommendations_model_2 is not None self.assertEqual( exp_recommendations_model_2.recommended_exploration_ids, [self.EXP_1_ID])
def test_creates_new_stats_model_if_not_existing(self) -> None: user_settings_model = self.create_model(user_models.UserSettingsModel, id=self.VALID_USER_ID_1, email='*****@*****.**') user_settings_model.update_timestamps() user_settings_model.put() self.assert_job_output_is( [job_run_result.JobRunResult(stdout='SUCCESS NEW 1')]) user_stats_model = user_models.UserStatsModel.get(self.VALID_USER_ID_1) # Ruling out the possibility of None for mypy type checking. assert user_stats_model is not None self.assertEqual(user_stats_model.weekly_creator_stats_list, [{ self.formated_datetime: { 'num_ratings': 0, 'average_ratings': None, 'total_plays': 0 } }])
def test_reports_failed_when_indexing_fails(self) -> None: exp_summary = self.create_model( exp_models.ExpSummaryModel, id='abcd', deleted=False, title='title', category='category', objective='objective', language_code='lang', community_owned=False, status=constants.ACTIVITY_STATUS_PUBLIC) exp_summary.update_timestamps() exp_summary.put() def add_docs_to_index_mock(unused_documents: Dict[str, Union[int, str, List[str]]], unused_index_name: str) -> None: raise platform_search_services.SearchException # type: ignore[attr-defined] add_docs_to_index_swap = self.swap_with_checks( platform_search_services, 'add_documents_to_index', add_docs_to_index_mock, expected_args=[([{ 'id': 'abcd', 'language_code': 'lang', 'title': 'title', 'category': 'category', 'tags': [], 'objective': 'objective', 'rank': 20, }], search_services.SEARCH_INDEX_EXPLORATIONS)]) with add_docs_to_index_swap: self.assert_job_output_is([ job_run_result.JobRunResult( stderr='FAILURE 1 models not indexed') ])
def test_fails_when_existing_stats_has_wrong_schema_version(self) -> None: user_settings_model = self.create_model(user_models.UserSettingsModel, id=self.VALID_USER_ID_1, email='*****@*****.**') user_stats_model = self.create_model(user_models.UserStatsModel, id=self.VALID_USER_ID_1, schema_version=0) self.put_multi([user_settings_model, user_stats_model]) with self.assertRaisesRegexp( # type: ignore[no-untyped-call] Exception, 'Sorry, we can only process v1-v%d dashboard stats schemas at ' 'present.' % feconf.CURRENT_DASHBOARD_STATS_SCHEMA_VERSION): self.assert_job_output_is( [job_run_result.JobRunResult(stdout='SUCCESS OLD 1')]) new_user_stats_model = cast( user_models.UserStatsModel, user_models.UserStatsModel.get(self.VALID_USER_ID_1)) self.assertIsNotNone(new_user_stats_model) self.assertEqual(new_user_stats_model.weekly_creator_stats_list, [])
def run(self) -> beam.PCollection[job_run_result.JobRunResult]: """Returns a PCollection of 'SUCCESS' or 'FAILURE' results from the Elastic Search. Returns: PCollection. A PCollection of 'SUCCESS' or 'FAILURE' results from the Elastic Search. """ exp_summary_models = ( self.pipeline | 'Get all non-deleted models' >> (ndb_io.GetModels(exp_models.ExpSummaryModel.get_all()))) exp_summary_iter = beam.pvalue.AsIter(exp_summary_models) exp_recommendations_models = ( exp_summary_models | 'Compute similarity' >> beam.ParDo(ComputeSimilarity(), exp_summary_iter) | 'Group similarities per exploration ID' >> beam.GroupByKey() | 'Sort and slice similarities' >> beam.MapTuple( lambda exp_id, similarities: (exp_id, self._sort_and_slice_similarities(similarities))) | 'Create recommendation models' >> beam.MapTuple( self._create_recommendation)) unused_put_result = ( exp_recommendations_models | 'Put models into the datastore' >> ndb_io.PutModels()) return (exp_recommendations_models | 'Count all new models' >> beam.combiners.Count.Globally() | 'Only create result for new models when > 0' >> (beam.Filter(lambda x: x > 0)) | 'Create result for new models' >> beam.Map(lambda x: job_run_result.JobRunResult( stdout='SUCCESS %s' % x)))
def test_hash(self): a_result = job_run_result.JobRunResult(stdout='abc', stderr='123') b_result = job_run_result.JobRunResult(stdout='def', stderr='456') self.assertIn(a_result, {a_result}) self.assertNotIn(b_result, {a_result})
def test_len_in_bytes_of_empty_strings(self): result = job_run_result.JobRunResult(stdout='', stderr='') self.assertEqual(result.len_in_bytes(), 2)
def test_len_in_bytes(self): result = job_run_result.JobRunResult(stdout='123', stderr='123') self.assertEqual(result.len_in_bytes(), 8)
def test_raises_when_stderr_is_none(self): with self.assertRaisesRegexp(ValueError, 'must not be None'): job_run_result.JobRunResult(stdout='123', stderr=None)
def test_empty_result_raises_value_error(self): with self.assertRaisesRegexp(ValueError, 'must not be empty'): job_run_result.JobRunResult()
def run(self) -> beam.PCollection[job_run_result.JobRunResult]: """Generates the translation contributins stats. Returns: PCollection. A PCollection of 'SUCCESS x' results, where x is the number of generated stats.. """ suggestions_grouped_by_target = ( self.pipeline | 'Get all non-deleted suggestion models' >> ndb_io.GetModels( suggestion_models.GeneralSuggestionModel.get_all( include_deleted=False)) # We need to window the models so that CoGroupByKey below # works properly. | 'Window the suggestions' >> beam.WindowInto( beam.window.Sessions(10 * 60)) | 'Filter translate suggestions' >> beam.Filter(lambda m: ( m.suggestion_type == feconf.SUGGESTION_TYPE_TRANSLATE_CONTENT)) | 'Transform to suggestion domain object' >> beam.Map( suggestion_services.get_suggestion_from_model) | 'Group by target' >> beam.GroupBy(lambda m: m.target_id)) exp_opportunities = ( self.pipeline | 'Get all non-deleted opportunity models' >> ndb_io.GetModels( opportunity_models.ExplorationOpportunitySummaryModel.get_all( include_deleted=False)) # We need to window the models so that CoGroupByKey below # works properly. | 'Window the opportunities' >> beam.WindowInto( beam.window.Sessions(10 * 60)) | 'Transform to opportunity domain object' >> beam.Map(opportunity_services. get_exploration_opportunity_summary_from_model) | 'Group by ID' >> beam.GroupBy(lambda m: m.id)) new_user_stats_models = ( { 'suggestion': suggestions_grouped_by_target, 'opportunity': exp_opportunities } | 'Merge models' >> beam.CoGroupByKey() | 'Get rid of key' >> beam.Values() # pylint: disable=no-value-for-parameter | 'Generate stats' >> beam.ParDo(lambda x: self._generate_stats( x['suggestion'][0] if len(x['suggestion']) else [], x[ 'opportunity'][0][0] if len(x['opportunity']) else None)) | 'Group by key' >> beam.GroupByKey() | 'Combine the stats' >> beam.CombineValues(CombineStats()) | 'Generate models from stats' >> beam.MapTuple( self._generate_translation_contribution_model)) unused_put_result = ( new_user_stats_models | 'Put models into the datastore' >> ndb_io.PutModels()) return (new_user_stats_models | 'Count all new models' >> (beam.combiners.Count.Globally().without_defaults()) | 'Only create result for new models when > 0' >> (beam.Filter(lambda x: x > 0)) | 'Create result for new models' >> beam.Map(lambda x: job_run_result.JobRunResult( stdout='SUCCESS %s' % x)))
def test_len_in_bytes_of_unicode(self): result = job_run_result.JobRunResult(stdout='😀', stderr='😀') self.assertEqual(result.len_in_bytes(), 8)
def test_repr(self): run_result = job_run_result.JobRunResult(stdout='abc', stderr='123') self.assertEqual(repr(run_result), 'JobRunResult(stdout="abc", stderr="123")')
def test_pickle(self): run_result = job_run_result.JobRunResult(stdout='abc', stderr='123') pickle_result = pickle.loads(pickle.dumps(run_result)) self.assertEqual(run_result, pickle_result)
def test_usage(self): run_result = job_run_result.JobRunResult(stdout='abc', stderr='123') self.assertEqual(run_result.stdout, 'abc') self.assertEqual(run_result.stderr, '123')
def test_repr(self): run_result = job_run_result.JobRunResult(stdout='abc', stderr='123') self.assertEqual( repr(run_result), 'JobRunResult(stdout=%r, stderr=%r)' % (run_result.stdout, run_result.stderr))
def test_enormous_result_raises_value_error(self): with self.assertRaisesRegexp(ValueError, r'must not exceed \d+ bytes'): job_run_result.JobRunResult(stdout='a' * 1000001)