def test_equality(self):
        a_result = job_run_result.JobRunResult(stdout='abc', stderr='123')
        b_result = job_run_result.JobRunResult(stdout='def', stderr='456')

        self.assertEqual(a_result, a_result)
        self.assertEqual(b_result, b_result)
        self.assertNotEqual(a_result, b_result)
Exemple #2
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        user_settings_models = (
            self.pipeline
            | 'Get all UserSettingsModels' >>
            (ndb_io.GetModels(user_models.UserSettingsModel.get_all())))

        old_user_stats_models = (
            self.pipeline
            | 'Get all UserStatsModels' >>
            (ndb_io.GetModels(user_models.UserStatsModel.get_all())))

        # Creates UserStatsModels if it does not exists.
        new_user_stats_models = (
            (user_settings_models, old_user_stats_models)
            | 'Merge models' >> beam.Flatten()
            # Returns a PCollection of
            # (model.id, (user_settings_models, user_stats_models)) or
            # (model.id, (user_settings_models,)).
            | 'Group models with same ID' >> beam.GroupBy(lambda m: m.id)
            # Discards model.id from the PCollection.
            | 'Get rid of key' >> beam.Values()  # pylint: disable=no-value-for-parameter
            # Only keep groupings that indicate that
            # the UserStatsModel is missing.
            | 'Filter pairs of models' >>
            beam.Filter(lambda models: (len(list(models)) == 1 and isinstance(
                list(models)[0], user_models.UserSettingsModel)))
            # Choosing the first element.
            | 'Transform tuples into models' >>
            beam.Map(lambda models: list(models)[0])
            # Creates the missing UserStatsModels.
            | 'Create new user stat models' >> beam.ParDo(
                CreateUserStatsModel()))

        unused_put_result = (
            (new_user_stats_models, old_user_stats_models)
            | 'Merge new and old models together' >> beam.Flatten()
            | 'Update the dashboard stats' >> beam.ParDo(
                UpdateWeeklyCreatorStats())
            | 'Put models into the datastore' >> ndb_io.PutModels())

        new_user_stats_job_result = (
            new_user_stats_models
            | 'Count all new models' >> beam.combiners.Count.Globally()
            | 'Only create result for new models when > 0' >>
            (beam.Filter(lambda x: x > 0))
            | 'Create result for new models' >>
            beam.Map(lambda x: job_run_result.JobRunResult(
                stdout='SUCCESS NEW %s' % x)))
        old_user_stats_job_result = (
            old_user_stats_models
            | 'Count all old models' >> beam.combiners.Count.Globally()
            | 'Only create result for old models when > 0' >>
            (beam.Filter(lambda x: x > 0))
            | 'Create result for old models' >>
            beam.Map(lambda x: job_run_result.JobRunResult(
                stdout='SUCCESS OLD %s' % x)))

        return ((new_user_stats_job_result, old_user_stats_job_result)
                | 'Merge new and old results together' >> beam.Flatten())
    def test_concat(self):
        single_job_run_result = job_run_result.JobRunResult.concat([
            job_run_result.JobRunResult(stdout='abc', stderr=''),
            job_run_result.JobRunResult(stdout='', stderr='123'),
            job_run_result.JobRunResult(stdout='def', stderr='456'),
        ])

        self.assertItemsEqual(single_job_run_result.stdout.split('\n'),
                              ['abc', 'def'])
        self.assertItemsEqual(single_job_run_result.stderr.split('\n'),
                              ['123', '456'])
    def test_accumulate(self):
        (single_job_run_result, ) = job_run_result.JobRunResult.accumulate([
            job_run_result.JobRunResult(stdout='abc', stderr=''),
            job_run_result.JobRunResult(stdout='', stderr='123'),
            job_run_result.JobRunResult(stdout='def', stderr='456'),
        ])

        self.assertItemsEqual(single_job_run_result.stdout.split('\n'),
                              ['abc', 'def'])
        self.assertItemsEqual(single_job_run_result.stderr.split('\n'),
                              ['123', '456'])
Exemple #5
0
    def test_sharded_output(self):
        messages = [
            job_run_result.JobRunResult(stdout='abc', stderr='123'),
            job_run_result.JobRunResult(stdout='def', stderr='456'),
            job_run_result.JobRunResult(stdout='ghi', stderr='789'),
        ]

        with self.swap(job_run_result, 'MAX_OUTPUT_BYTES', 11):
            self.assert_pcoll_empty(self.pipeline
                                    | beam.Create(messages)
                                    | job_io.PutResults(self.JOB_ID))

        result = beam_job_services.get_beam_job_run_result(self.JOB_ID)
        self.assertItemsEqual(result.stdout.split('\n'), ['abc', 'def', 'ghi'])
        self.assertItemsEqual(result.stderr.split('\n'), ['123', '456', '789'])
Exemple #6
0
    def test_indexes_non_deleted_model(self) -> None:
        exp_summary = self.create_model(
            exp_models.ExpSummaryModel,
            id='abcd',
            deleted=False,
            title='title',
            category='category',
            objective='objective',
            language_code='lang',
            community_owned=False,
            status=constants.ACTIVITY_STATUS_PUBLIC)
        exp_summary.update_timestamps()
        exp_summary.put()

        add_docs_to_index_swap = self.swap_with_checks(
            platform_search_services,
            'add_documents_to_index',
            lambda _, __: None,
            expected_args=[([{
                'id': 'abcd',
                'language_code': 'lang',
                'title': 'title',
                'category': 'category',
                'tags': [],
                'objective': 'objective',
                'rank': 20,
            }], search_services.SEARCH_INDEX_EXPLORATIONS)])

        with add_docs_to_index_swap:
            self.assert_job_output_is([
                job_run_result.JobRunResult(stdout='SUCCESS 1 models indexed')
            ])
Exemple #7
0
    def test_updates_existing_stats_model_when_values_are_provided(
            self) -> None:
        user_settings_model = self.create_model(user_models.UserSettingsModel,
                                                id=self.VALID_USER_ID_1,
                                                email='*****@*****.**')
        user_stats_model = self.create_model(
            user_models.UserStatsModel,
            id=self.VALID_USER_ID_1,
            num_ratings=10,
            average_ratings=4.5,
            total_plays=22,
        )

        self.put_multi([user_settings_model, user_stats_model])

        self.assert_job_output_is(
            [job_run_result.JobRunResult(stdout='SUCCESS OLD 1')])

        new_user_stats_model = cast(
            user_models.UserStatsModel,
            user_models.UserStatsModel.get(self.VALID_USER_ID_1))
        self.assertIsNotNone(new_user_stats_model)
        self.assertEqual(new_user_stats_model.weekly_creator_stats_list, [{
            self.formated_datetime: {
                'num_ratings': 10,
                'average_ratings': 4.5,
                'total_plays': 22
            }
        }])
    def test_accumulate_with_enormous_outputs(self):
        accumulated_results = job_run_result.JobRunResult.accumulate([
            job_run_result.JobRunResult(stdout='a' * 500000,
                                        stderr='b' * 500000),
            job_run_result.JobRunResult(stdout='a' * 400000,
                                        stderr='b' * 400000),
            job_run_result.JobRunResult(stdout='a' * 300000,
                                        stderr='b' * 300000),
            job_run_result.JobRunResult(stdout='a' * 200000,
                                        stderr='b' * 200000),
            job_run_result.JobRunResult(stdout='a' * 100000,
                                        stderr='b' * 100000),
        ])

        # 100000 and 200000 are small enough ot fit as one, but the others will
        # each need their own result.
        self.assertEqual(len(accumulated_results), 4)
Exemple #9
0
    def test_output_returns_models_discovered(self) -> None:
        self.put_multi([
            self.create_model(base_models.BaseModel),
            self.create_model(base_models.BaseModel),
            self.create_model(base_models.BaseModel),
        ])

        self.assert_job_output_is([
            job_run_result.JobRunResult(stdout='BaseModel: 3'),
        ])
Exemple #10
0
    def test_creates_stats_model_from_one_accepted_suggestion(self) -> None:
        suggestion_model = self.create_model(
            suggestion_models.GeneralSuggestionModel,
            suggestion_type=feconf.SUGGESTION_TYPE_TRANSLATE_CONTENT,
            author_id=self.VALID_USER_ID_1,
            change_cmd={
                'cmd': exp_domain.CMD_ADD_WRITTEN_TRANSLATION,
                'state_name': 'state',
                'content_id': 'content_id',
                'language_code': 'lang',
                'content_html': '111 222 333',
                'translation_html': '111 222 333',
                'data_format': 'format'
            },
            score_category='irelevant',
            status=suggestion_models.STATUS_ACCEPTED,
            target_type='exploration',
            target_id=self.EXP_1_ID,
            target_version_at_submission=0,
            language_code=self.LANG_1)
        suggestion_model.update_timestamps()
        suggestion_model.put()

        self.assert_job_output_is(
            [job_run_result.JobRunResult(stdout='SUCCESS 1')])

        translation_stats_model = (
            suggestion_models.TranslationContributionStatsModel.get(
                self.LANG_1, self.VALID_USER_ID_1, ''))

        assert translation_stats_model is not None
        self.assertEqual(translation_stats_model.language_code, self.LANG_1)
        self.assertEqual(translation_stats_model.contributor_user_id,
                         self.VALID_USER_ID_1)
        self.assertEqual(translation_stats_model.topic_id, '')
        self.assertEqual(translation_stats_model.submitted_translations_count,
                         1)
        self.assertEqual(
            translation_stats_model.submitted_translation_word_count, 3)
        self.assertEqual(translation_stats_model.accepted_translations_count,
                         1)
        self.assertEqual(
            translation_stats_model.
            accepted_translations_without_reviewer_edits_count, 1)
        self.assertEqual(
            translation_stats_model.accepted_translation_word_count, 3)
        self.assertEqual(translation_stats_model.rejected_translations_count,
                         0)
        self.assertEqual(
            translation_stats_model.rejected_translation_word_count, 0)
        self.assertItemsEqual(  # type: ignore[no-untyped-call]
            translation_stats_model.contribution_dates,
            [datetime.date.today()])
Exemple #11
0
    def process(
        self, exp_summary_models: List[datastore_services.Model]
    ) -> Iterable[job_run_result.JobRunResult]:
        """Index exploration summaries and catch any errors.

        Args:
            exp_summary_models: list(Model). Models to index.

        Yields:
            JobRunResult. List containing one element, which is either SUCCESS,
            or FAILURE.
        """
        try:
            search_services.index_exploration_summaries(  # type: ignore[no-untyped-call]
                cast(List[exp_models.ExpSummaryModel], exp_summary_models))
            yield job_run_result.JobRunResult(
                stdout='SUCCESS %s models indexed' % len(exp_summary_models))
        except platform_search_services.SearchException:  # type: ignore[attr-defined]
            yield job_run_result.JobRunResult(
                stderr='FAILURE %s models not indexed' %
                len(exp_summary_models))
Exemple #12
0
    def test_handles_multiple_models(self) -> None:
        user_settings_model_1 = self.create_model(
            user_models.UserSettingsModel,
            id=self.VALID_USER_ID_1,
            email='*****@*****.**')
        user_settings_model_2 = self.create_model(
            user_models.UserSettingsModel,
            id=self.VALID_USER_ID_2,
            email='*****@*****.**')
        user_stats_model_1 = self.create_model(user_models.UserStatsModel,
                                               id=self.VALID_USER_ID_1)

        self.put_multi(
            [user_settings_model_1, user_settings_model_2, user_stats_model_1])

        self.assert_job_output_is([
            job_run_result.JobRunResult(stdout='SUCCESS OLD 1'),
            job_run_result.JobRunResult(stdout='SUCCESS NEW 1')
        ])

        user_stats_model = user_models.UserStatsModel.get(self.VALID_USER_ID_2)
        self.assertIsNotNone(user_stats_model)
Exemple #13
0
    def test_single_output(self):
        messages = [
            job_run_result.JobRunResult(stdout='Hello, World!',
                                        stderr='Uh-oh, World!'),
        ]

        self.assert_pcoll_empty(self.pipeline
                                | beam.Create(messages)
                                | job_io.PutResults(self.JOB_ID))

        result = beam_job_services.get_beam_job_run_result(self.JOB_ID)
        self.assertEqual(result.stdout, 'Hello, World!')
        self.assertEqual(result.stderr, 'Uh-oh, World!')
Exemple #14
0
    def test_creates_recommendations_for_similar_explorations(self) -> None:
        recommendations_services.create_default_topic_similarities(
        )  # type: ignore[no-untyped-call]
        exp_summary_1 = self.create_model(
            exp_models.ExpSummaryModel,
            id=self.EXP_1_ID,
            deleted=False,
            title='title',
            category='Architecture',
            objective='objective',
            language_code='lang',
            community_owned=False,
            status=constants.ACTIVITY_STATUS_PUBLIC,
            exploration_model_last_updated=datetime.datetime.utcnow())
        exp_summary_1.update_timestamps()
        exp_summary_2 = self.create_model(
            exp_models.ExpSummaryModel,
            id=self.EXP_2_ID,
            deleted=False,
            title='title',
            category='Architecture',
            objective='objective',
            language_code='lang',
            community_owned=False,
            status=constants.ACTIVITY_STATUS_PUBLIC,
            exploration_model_last_updated=datetime.datetime.utcnow())
        exp_summary_2.update_timestamps()
        self.put_multi([exp_summary_1, exp_summary_2])

        self.assert_job_output_is(
            [job_run_result.JobRunResult(stdout='SUCCESS 2')])

        exp_recommendations_model_1 = (
            recommendations_models.ExplorationRecommendationsModel.get(
                self.EXP_1_ID))
        # Ruling out the possibility of None for mypy type checking.
        assert exp_recommendations_model_1 is not None
        self.assertEqual(
            exp_recommendations_model_1.recommended_exploration_ids,
            [self.EXP_2_ID])
        exp_recommendations_model_2 = (
            recommendations_models.ExplorationRecommendationsModel.get(
                self.EXP_2_ID))
        # Ruling out the possibility of None for mypy type checking.
        assert exp_recommendations_model_2 is not None
        self.assertEqual(
            exp_recommendations_model_2.recommended_exploration_ids,
            [self.EXP_1_ID])
Exemple #15
0
    def test_creates_new_stats_model_if_not_existing(self) -> None:
        user_settings_model = self.create_model(user_models.UserSettingsModel,
                                                id=self.VALID_USER_ID_1,
                                                email='*****@*****.**')
        user_settings_model.update_timestamps()
        user_settings_model.put()

        self.assert_job_output_is(
            [job_run_result.JobRunResult(stdout='SUCCESS NEW 1')])

        user_stats_model = user_models.UserStatsModel.get(self.VALID_USER_ID_1)
        # Ruling out the possibility of None for mypy type checking.
        assert user_stats_model is not None
        self.assertEqual(user_stats_model.weekly_creator_stats_list, [{
            self.formated_datetime: {
                'num_ratings': 0,
                'average_ratings': None,
                'total_plays': 0
            }
        }])
Exemple #16
0
    def test_reports_failed_when_indexing_fails(self) -> None:
        exp_summary = self.create_model(
            exp_models.ExpSummaryModel,
            id='abcd',
            deleted=False,
            title='title',
            category='category',
            objective='objective',
            language_code='lang',
            community_owned=False,
            status=constants.ACTIVITY_STATUS_PUBLIC)
        exp_summary.update_timestamps()
        exp_summary.put()

        def add_docs_to_index_mock(unused_documents: Dict[str,
                                                          Union[int, str,
                                                                List[str]]],
                                   unused_index_name: str) -> None:
            raise platform_search_services.SearchException  # type: ignore[attr-defined]

        add_docs_to_index_swap = self.swap_with_checks(
            platform_search_services,
            'add_documents_to_index',
            add_docs_to_index_mock,
            expected_args=[([{
                'id': 'abcd',
                'language_code': 'lang',
                'title': 'title',
                'category': 'category',
                'tags': [],
                'objective': 'objective',
                'rank': 20,
            }], search_services.SEARCH_INDEX_EXPLORATIONS)])

        with add_docs_to_index_swap:
            self.assert_job_output_is([
                job_run_result.JobRunResult(
                    stderr='FAILURE 1 models not indexed')
            ])
Exemple #17
0
    def test_fails_when_existing_stats_has_wrong_schema_version(self) -> None:
        user_settings_model = self.create_model(user_models.UserSettingsModel,
                                                id=self.VALID_USER_ID_1,
                                                email='*****@*****.**')
        user_stats_model = self.create_model(user_models.UserStatsModel,
                                             id=self.VALID_USER_ID_1,
                                             schema_version=0)

        self.put_multi([user_settings_model, user_stats_model])

        with self.assertRaisesRegexp(  # type: ignore[no-untyped-call]
                Exception,
                'Sorry, we can only process v1-v%d dashboard stats schemas at '
                'present.' % feconf.CURRENT_DASHBOARD_STATS_SCHEMA_VERSION):
            self.assert_job_output_is(
                [job_run_result.JobRunResult(stdout='SUCCESS OLD 1')])

        new_user_stats_model = cast(
            user_models.UserStatsModel,
            user_models.UserStatsModel.get(self.VALID_USER_ID_1))
        self.assertIsNotNone(new_user_stats_model)
        self.assertEqual(new_user_stats_model.weekly_creator_stats_list, [])
Exemple #18
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Returns a PCollection of 'SUCCESS' or 'FAILURE' results from
        the Elastic Search.

        Returns:
            PCollection. A PCollection of 'SUCCESS' or 'FAILURE' results from
            the Elastic Search.
        """

        exp_summary_models = (
            self.pipeline
            | 'Get all non-deleted models' >>
            (ndb_io.GetModels(exp_models.ExpSummaryModel.get_all())))

        exp_summary_iter = beam.pvalue.AsIter(exp_summary_models)

        exp_recommendations_models = (
            exp_summary_models
            | 'Compute similarity' >> beam.ParDo(ComputeSimilarity(),
                                                 exp_summary_iter)
            | 'Group similarities per exploration ID' >> beam.GroupByKey()
            | 'Sort and slice similarities' >> beam.MapTuple(
                lambda exp_id, similarities:
                (exp_id, self._sort_and_slice_similarities(similarities)))
            | 'Create recommendation models' >> beam.MapTuple(
                self._create_recommendation))

        unused_put_result = (
            exp_recommendations_models
            | 'Put models into the datastore' >> ndb_io.PutModels())

        return (exp_recommendations_models
                | 'Count all new models' >> beam.combiners.Count.Globally()
                | 'Only create result for new models when > 0' >>
                (beam.Filter(lambda x: x > 0))
                | 'Create result for new models' >>
                beam.Map(lambda x: job_run_result.JobRunResult(
                    stdout='SUCCESS %s' % x)))
    def test_hash(self):
        a_result = job_run_result.JobRunResult(stdout='abc', stderr='123')
        b_result = job_run_result.JobRunResult(stdout='def', stderr='456')

        self.assertIn(a_result, {a_result})
        self.assertNotIn(b_result, {a_result})
 def test_len_in_bytes_of_empty_strings(self):
     result = job_run_result.JobRunResult(stdout='', stderr='')
     self.assertEqual(result.len_in_bytes(), 2)
 def test_len_in_bytes(self):
     result = job_run_result.JobRunResult(stdout='123', stderr='123')
     self.assertEqual(result.len_in_bytes(), 8)
 def test_raises_when_stderr_is_none(self):
     with self.assertRaisesRegexp(ValueError, 'must not be None'):
         job_run_result.JobRunResult(stdout='123', stderr=None)
 def test_empty_result_raises_value_error(self):
     with self.assertRaisesRegexp(ValueError, 'must not be empty'):
         job_run_result.JobRunResult()
Exemple #24
0
    def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Generates the translation contributins stats.

        Returns:
            PCollection. A PCollection of 'SUCCESS x' results, where x is
            the number of generated stats..
        """
        suggestions_grouped_by_target = (
            self.pipeline
            | 'Get all non-deleted suggestion models' >> ndb_io.GetModels(
                suggestion_models.GeneralSuggestionModel.get_all(
                    include_deleted=False))
            # We need to window the models so that CoGroupByKey below
            # works properly.
            | 'Window the suggestions' >> beam.WindowInto(
                beam.window.Sessions(10 * 60))
            | 'Filter translate suggestions' >> beam.Filter(lambda m: (
                m.suggestion_type == feconf.SUGGESTION_TYPE_TRANSLATE_CONTENT))
            | 'Transform to suggestion domain object' >> beam.Map(
                suggestion_services.get_suggestion_from_model)
            | 'Group by target' >> beam.GroupBy(lambda m: m.target_id))
        exp_opportunities = (
            self.pipeline
            | 'Get all non-deleted opportunity models' >> ndb_io.GetModels(
                opportunity_models.ExplorationOpportunitySummaryModel.get_all(
                    include_deleted=False))
            # We need to window the models so that CoGroupByKey below
            # works properly.
            | 'Window the opportunities' >> beam.WindowInto(
                beam.window.Sessions(10 * 60))
            | 'Transform to opportunity domain object' >>
            beam.Map(opportunity_services.
                     get_exploration_opportunity_summary_from_model)
            | 'Group by ID' >> beam.GroupBy(lambda m: m.id))

        new_user_stats_models = (
            {
                'suggestion': suggestions_grouped_by_target,
                'opportunity': exp_opportunities
            }
            | 'Merge models' >> beam.CoGroupByKey()
            | 'Get rid of key' >> beam.Values()  # pylint: disable=no-value-for-parameter
            | 'Generate stats' >> beam.ParDo(lambda x: self._generate_stats(
                x['suggestion'][0] if len(x['suggestion']) else [], x[
                    'opportunity'][0][0] if len(x['opportunity']) else None))
            | 'Group by key' >> beam.GroupByKey()
            | 'Combine the stats' >> beam.CombineValues(CombineStats())
            | 'Generate models from stats' >> beam.MapTuple(
                self._generate_translation_contribution_model))

        unused_put_result = (
            new_user_stats_models
            | 'Put models into the datastore' >> ndb_io.PutModels())

        return (new_user_stats_models
                | 'Count all new models' >>
                (beam.combiners.Count.Globally().without_defaults())
                | 'Only create result for new models when > 0' >>
                (beam.Filter(lambda x: x > 0))
                | 'Create result for new models' >>
                beam.Map(lambda x: job_run_result.JobRunResult(
                    stdout='SUCCESS %s' % x)))
 def test_len_in_bytes_of_unicode(self):
     result = job_run_result.JobRunResult(stdout='😀', stderr='😀')
     self.assertEqual(result.len_in_bytes(), 8)
    def test_repr(self):
        run_result = job_run_result.JobRunResult(stdout='abc', stderr='123')

        self.assertEqual(repr(run_result),
                         'JobRunResult(stdout="abc", stderr="123")')
    def test_pickle(self):
        run_result = job_run_result.JobRunResult(stdout='abc', stderr='123')
        pickle_result = pickle.loads(pickle.dumps(run_result))

        self.assertEqual(run_result, pickle_result)
 def test_usage(self):
     run_result = job_run_result.JobRunResult(stdout='abc', stderr='123')
     self.assertEqual(run_result.stdout, 'abc')
     self.assertEqual(run_result.stderr, '123')
    def test_repr(self):
        run_result = job_run_result.JobRunResult(stdout='abc', stderr='123')

        self.assertEqual(
            repr(run_result), 'JobRunResult(stdout=%r, stderr=%r)' %
            (run_result.stdout, run_result.stderr))
 def test_enormous_result_raises_value_error(self):
     with self.assertRaisesRegexp(ValueError, r'must not exceed \d+ bytes'):
         job_run_result.JobRunResult(stdout='a' * 1000001)