Example #1
0
class QuestionSkillLinkModel(base_models.BaseModel):
    """Model for storing Question-Skill Links.

    The ID of instances of this class has the form '[question_id]:[skill_id]'.
    """

    # The ID of the question.
    question_id = (datastore_services.StringProperty(required=True,
                                                     indexed=True))
    # The ID of the skill to which the question is linked.
    skill_id = datastore_services.StringProperty(required=True, indexed=True)
    # The difficulty of the skill.
    skill_difficulty = (datastore_services.FloatProperty(required=True,
                                                         indexed=True))

    @staticmethod
    def get_deletion_policy() -> base_models.DELETION_POLICY:
        """Model doesn't contain any data directly corresponding to a user."""
        return base_models.DELETION_POLICY.NOT_APPLICABLE

    @staticmethod
    def get_model_association_to_user(
    ) -> base_models.MODEL_ASSOCIATION_TO_USER:
        """Model does not contain user data."""
        return base_models.MODEL_ASSOCIATION_TO_USER.NOT_CORRESPONDING_TO_USER

    @classmethod
    def get_export_policy(cls) -> Dict[str, base_models.EXPORT_POLICY]:
        """Model doesn't contain any data directly corresponding to a user."""
        return dict(
            super(cls, cls).get_export_policy(), **{
                'question_id': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'skill_id': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'skill_difficulty': base_models.EXPORT_POLICY.NOT_APPLICABLE
            })

    @classmethod
    def get_model_id(cls, question_id: str, skill_id: str) -> str:
        """Returns the model id by combining the questions and skill id.

        Args:
            question_id: str. The ID of the question.
            skill_id: str. The ID of the skill to which the question is linked.

        Returns:
            str. The calculated model id.
        """
        return '%s:%s' % (question_id, skill_id)

    @classmethod
    def create(cls, question_id: str, skill_id: str,
               skill_difficulty: float) -> 'QuestionSkillLinkModel':
        """Creates a new QuestionSkillLinkModel entry.

        Args:
            question_id: str. The ID of the question.
            skill_id: str. The ID of the skill to which the question is linked.
            skill_difficulty: float. The difficulty between [0, 1] of the skill.

        Raises:
            Exception. The given question is already linked to the given skill.

        Returns:
            QuestionSkillLinkModel. Instance of the new QuestionSkillLinkModel
            entry.
        """
        question_skill_link_id = cls.get_model_id(question_id, skill_id)
        if cls.get(question_skill_link_id, strict=False) is not None:
            raise Exception(
                'The given question is already linked to given skill')

        question_skill_link_model_instance = cls(
            id=question_skill_link_id,
            question_id=question_id,
            skill_id=skill_id,
            skill_difficulty=skill_difficulty)
        return question_skill_link_model_instance

    @classmethod
    def get_total_question_count_for_skill_ids(cls,
                                               skill_ids: List[str]) -> int:
        """Returns the number of questions assigned to the given skill_ids.

        Args:
            skill_ids: list(str). Skill IDs for which the question count is
                requested.

        Returns:
            int. The number of questions assigned to the given skill_ids.
        """
        total_question_count = cls.query().filter(
            cls.skill_id.IN(skill_ids)).count()

        return total_question_count

    @classmethod
    def get_question_skill_links_by_skill_ids(
            cls, question_count: int, skill_ids: List[str],
            offset: int) -> List['QuestionSkillLinkModel']:
        """Fetches the list of QuestionSkillLinkModels linked to the skill in
        batches.

        Args:
            question_count: int. The number of questions to be returned.
            skill_ids: list(str). The ids of skills for which the linked
                question ids are to be retrieved.
            offset: int. Number of query results to skip.

        Returns:
            list(QuestionSkillLinkModel). The QuestionSkillLinkModels
            corresponding to given skill_ids.
        """
        question_skill_count = min(
            len(skill_ids), constants.MAX_SKILLS_PER_QUESTION) * question_count

        return cast(
            List[QuestionSkillLinkModel],
            cls.query(
                cls.skill_id.IN(skill_ids)).order(-cls.last_updated).fetch(
                    question_skill_count, offset=offset))

    @classmethod
    def get_question_skill_links_based_on_difficulty_equidistributed_by_skill(
            cls, total_question_count: int, skill_ids: List[str],
            difficulty_requested: float) -> List['QuestionSkillLinkModel']:
        """Fetches the list of constant number of random QuestionSkillLinkModels
        linked to the skills, sorted by the absolute value of the difference
        between skill difficulty and the requested difficulty.

        Args:
            total_question_count: int. The number of questions expected.
            skill_ids: list(str). The ids of skills for which the linked
                question ids are to be retrieved.
            difficulty_requested: float. The skill difficulty of the questions
                requested to be fetched.

        Returns:
            list(QuestionSkillLinkModel). A list of random
            QuestionSkillLinkModels corresponding to given skill_ids, with
            total_question_count/len(skill_ids) number of questions for
            each skill. If not evenly divisible, it will be rounded up.
            If not enough questions for a skill, just return all questions
            it links to.
        """
        if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS:
            raise Exception('Please keep the number of skill IDs below 20.')

        if (not skill_ids) or (total_question_count == 0):
            return []

        question_count_per_skill = int(
            math.ceil(
                python_utils.divide(  # type: ignore[no-untyped-call]
                    float(total_question_count), float(len(skill_ids)))))

        question_skill_link_mapping = {}

        # For fetching the questions randomly we have used a random offset.
        # But this is a temporary solution since this method scales linearly.
        # Other alternative methods were:
        # 1) Using a random id in question id filter
        # 2) Adding an additional column that can be filtered upon.
        # But these methods are not viable because google datastore limits
        # each query to have at most one inequality filter. So we can't filter
        # on both question_id and difficulty. Please see
        # https://github.com/oppia/oppia/pull/9061#issuecomment-629765809
        # for more details.

        def get_offset(query: datastore_services.Query) -> int:
            """Helper function to get the offset."""
            question_count = query.count()
            if question_count > 2 * question_count_per_skill:
                return utils.get_random_int(question_count -
                                            (question_count_per_skill * 2))
            return 0

        for skill_id in skill_ids:
            query = cls.query(cls.skill_id == skill_id)

            equal_questions_query = query.filter(
                cls.skill_difficulty == difficulty_requested)

            # We fetch more questions here in order to try and ensure that the
            # eventual number of returned questions is sufficient to meet the
            # number requested, even after deduplication.
            new_question_skill_link_models = cast(
                List[QuestionSkillLinkModel],
                equal_questions_query.fetch(
                    limit=question_count_per_skill * 2,
                    offset=get_offset(equal_questions_query)))
            for model in new_question_skill_link_models:
                if model.question_id in question_skill_link_mapping:
                    new_question_skill_link_models.remove(model)

            if len(new_question_skill_link_models) >= question_count_per_skill:
                new_question_skill_link_models = random.sample(
                    new_question_skill_link_models, question_count_per_skill)
            else:
                # Fetch QuestionSkillLinkModels with difficulty smaller than
                # requested difficulty.
                easier_questions_query = query.filter(
                    cls.skill_difficulty < difficulty_requested)
                easier_question_skill_link_models = cast(
                    List[QuestionSkillLinkModel],
                    easier_questions_query.fetch(
                        limit=question_count_per_skill * 2,
                        offset=get_offset(easier_questions_query)))
                for model in easier_question_skill_link_models:
                    if model.question_id in question_skill_link_mapping:
                        easier_question_skill_link_models.remove(model)
                question_extra_count = (
                    len(new_question_skill_link_models) +
                    len(easier_question_skill_link_models) -
                    question_count_per_skill)
                if question_extra_count >= 0:
                    easier_question_skill_link_models = random.sample(
                        easier_question_skill_link_models,
                        question_count_per_skill -
                        len(new_question_skill_link_models))
                    new_question_skill_link_models.extend(
                        easier_question_skill_link_models)
                else:
                    # Fetch QuestionSkillLinkModels with difficulty larger than
                    # requested difficulty.
                    new_question_skill_link_models.extend(
                        easier_question_skill_link_models)
                    harder_questions_query = query.filter(
                        cls.skill_difficulty > difficulty_requested)
                    harder_question_skill_link_models = (
                        harder_questions_query.fetch(
                            limit=question_count_per_skill * 2,
                            offset=get_offset(harder_questions_query)))
                    harder_question_skill_link_models = cast(
                        List[QuestionSkillLinkModel],
                        harder_questions_query.fetch())
                    for model in harder_question_skill_link_models:
                        if model.question_id in question_skill_link_mapping:
                            harder_question_skill_link_models.remove(model)
                    question_extra_count = (
                        len(new_question_skill_link_models) +
                        len(harder_question_skill_link_models) -
                        question_count_per_skill)
                    if question_extra_count >= 0:
                        harder_question_skill_link_models = (random.sample(
                            harder_question_skill_link_models,
                            question_count_per_skill -
                            len(new_question_skill_link_models)))
                    new_question_skill_link_models.extend(
                        cast(List[QuestionSkillLinkModel],
                             harder_question_skill_link_models))

            new_question_skill_link_models = (
                new_question_skill_link_models[:question_count_per_skill])

            for model in new_question_skill_link_models:
                if model.question_id not in question_skill_link_mapping:
                    question_skill_link_mapping[model.question_id] = model

        return list(question_skill_link_mapping.values())

    @classmethod
    def get_question_skill_links_equidistributed_by_skill(
            cls, total_question_count: int,
            skill_ids: List[str]) -> List['QuestionSkillLinkModel']:
        """Fetches the list of constant number of random
        QuestionSkillLinkModels linked to the skills.

        Args:
            total_question_count: int. The number of questions expected.
            skill_ids: list(str). The ids of skills for which the linked
                question ids are to be retrieved.

        Returns:
            list(QuestionSkillLinkModel). A list of random
            QuestionSkillLinkModels corresponding to given skill_ids, with
            total_question_count/len(skill_ids) number of questions for
            each skill. If not evenly divisible, it will be rounded up.
            If not enough questions for a skill, just return all questions
            it links to.
        """
        if len(skill_ids) > feconf.MAX_NUMBER_OF_SKILL_IDS:
            raise Exception('Please keep the number of skill IDs below 20.')

        if not skill_ids:
            return []

        question_count_per_skill = int(
            math.ceil(
                python_utils.divide(  # type: ignore[no-untyped-call]
                    float(total_question_count), float(len(skill_ids)))))
        question_skill_link_models = []
        existing_question_ids = []

        def get_offset(query: datastore_services.Query) -> int:
            """Helper function to get the offset."""
            question_count = query.count()
            if question_count > 2 * question_count_per_skill:
                return utils.get_random_int(question_count -
                                            (question_count_per_skill * 2))
            return 0

        for skill_id in skill_ids:
            query = cls.query(cls.skill_id == skill_id)

            # We fetch more questions here in order to try and ensure that the
            # eventual number of returned questions is sufficient to meet the
            # number requested, even after deduplication.
            new_question_skill_link_models = cast(
                List[QuestionSkillLinkModel],
                query.fetch(limit=question_count_per_skill * 2,
                            offset=get_offset(query)))
            # Deduplicate if the same question is linked to multiple skills.
            for model in new_question_skill_link_models:
                if model.question_id in existing_question_ids:
                    new_question_skill_link_models.remove(model)
            if len(new_question_skill_link_models) > question_count_per_skill:
                sampled_question_skill_link_models = random.sample(
                    new_question_skill_link_models, question_count_per_skill)
            else:
                sampled_question_skill_link_models = (
                    new_question_skill_link_models)

            question_skill_link_models.extend(
                sampled_question_skill_link_models)
            existing_question_ids.extend([
                model.question_id
                for model in (sampled_question_skill_link_models)
            ])

        return question_skill_link_models

    @classmethod
    def get_all_question_ids_linked_to_skill_id(cls,
                                                skill_id: str) -> List[str]:
        """Returns a list of all question ids corresponding to the given skill
        id.

        Args:
            skill_id: str. ID of the skill.

        Returns:
            list(str). The list of all question ids corresponding to the given
            skill id.
        """
        question_skill_link_models = cls.query().filter(
            cls.skill_id == skill_id, cls.deleted == False)  #pylint: disable=singleton-comparison
        question_ids = [
            model.question_id for model in question_skill_link_models
        ]
        return question_ids

    @classmethod
    def get_models_by_skill_id(
            cls, skill_id: str) -> List['QuestionSkillLinkModel']:
        """Returns a list of QuestionSkillLink domains of a particular skill ID.

        Args:
            skill_id: str. ID of the skill.

        Returns:
            list(QuestionSkillLinkModel)|None. The list of question skill link
            domains that are linked to the skill ID. None if the skill
            ID doesn't exist.
        """
        return cast(
            List[QuestionSkillLinkModel],
            QuestionSkillLinkModel.query().filter(
                cls.skill_id == skill_id).fetch())

    @classmethod
    def get_models_by_question_id(
            cls, question_id: str) -> List['QuestionSkillLinkModel']:
        """Returns a list of QuestionSkillLinkModels of a particular
        question ID.

        Args:
            question_id: str. ID of the question.

        Returns:
            list(QuestionSkillLinkModel)|None. The list of question skill link
            models that are linked to the question ID, or None if there are no
            question skill link models associated with the question ID.
        """
        return cast(List[QuestionSkillLinkModel],
                    QuestionSkillLinkModel.query().filter(
                        cls.question_id == question_id,
                        cls.deleted == False).fetch())  #pylint: disable=singleton-comparison

    @classmethod
    def put_multi_question_skill_links(
            cls, question_skill_links: List['QuestionSkillLinkModel']) -> None:
        """Puts multiple question skill link models into the datastore.

        Args:
            question_skill_links: list(QuestionSkillLink). The list of
                question skill link domain objects to put into the datastore.
        """
        cls.update_timestamps_multi(question_skill_links)
        cls.put_multi(question_skill_links)

    @classmethod
    def delete_multi_question_skill_links(
            cls, question_skill_links: List['QuestionSkillLinkModel']) -> None:
        """Deletes multiple question skill links from the datastore.

        Args:
            question_skill_links: list(QuestionSkillLinkModel). The list of
                question skill link domain objects to delete from the datastore.
        """
        cls.delete_multi(question_skill_links)
Example #2
0
class JobModel(base_models.BaseModel):
    """Class representing a datastore entity for a long-running job."""

    # The job type.
    job_type = datastore_services.StringProperty(indexed=True)
    # The time at which the job was queued, in milliseconds since the epoch.
    time_queued_msec = datastore_services.FloatProperty(indexed=True)
    # The time at which the job was started, in milliseconds since the epoch.
    # This is never set if the job was canceled before it was started.
    time_started_msec = datastore_services.FloatProperty(indexed=True)
    # The time at which the job was completed, failed or canceled, in
    # milliseconds since the epoch.
    time_finished_msec = datastore_services.FloatProperty(indexed=True)
    # The current status code for the job.
    status_code = datastore_services.StringProperty(
        indexed=True,
        default=STATUS_CODE_NEW,
        choices=[
            STATUS_CODE_NEW, STATUS_CODE_QUEUED, STATUS_CODE_STARTED,
            STATUS_CODE_COMPLETED, STATUS_CODE_FAILED, STATUS_CODE_CANCELED
        ])
    # Any metadata for the job, such as the root pipeline id for mapreduce
    # jobs.
    metadata = datastore_services.JsonProperty(indexed=False)
    # The output of the job. This is only populated if the job has status code
    # STATUS_CODE_COMPLETED, and is None otherwise. If populated, this is
    # expected to be a list of strings.
    output = datastore_services.JsonProperty(indexed=False)
    # The error message, if applicable. Only populated if the job has status
    # code STATUS_CODE_FAILED or STATUS_CODE_CANCELED; None otherwise.
    error = datastore_services.TextProperty(indexed=False)
    # Whether the datastore models associated with this job have been cleaned
    # up (i.e., deleted).
    has_been_cleaned_up = (datastore_services.BooleanProperty(default=False,
                                                              indexed=True))
    # Store additional params passed with job.
    additional_job_params = datastore_services.JsonProperty(default=None)

    @staticmethod
    def get_deletion_policy() -> base_models.DELETION_POLICY:
        """Model doesn't contain any data directly corresponding to a user."""
        return base_models.DELETION_POLICY.NOT_APPLICABLE

    @staticmethod
    def get_model_association_to_user(
    ) -> base_models.MODEL_ASSOCIATION_TO_USER:
        """Model does not contain user data."""
        return base_models.MODEL_ASSOCIATION_TO_USER.NOT_CORRESPONDING_TO_USER

    @classmethod
    def get_export_policy(cls) -> Dict[str, base_models.EXPORT_POLICY]:
        """Model doesn't contain any data directly corresponding to a user."""
        return dict(
            super(cls, cls).get_export_policy(), **{
                'job_type': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'time_queued_msec': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'time_started_msec': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'time_finished_msec': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'status_code': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'metadata': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'output': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'error': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'has_been_cleaned_up':
                base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'additional_job_params':
                base_models.EXPORT_POLICY.NOT_APPLICABLE
            })

    @property
    def is_cancelable(self) -> bool:
        """Checks if the job is cancelable.

        Returns:
            bool. Whether the job's status_code is 'queued' or 'started'.
        """
        # Whether the job is currently in 'queued' or 'started' status.
        return self.status_code in [STATUS_CODE_QUEUED, STATUS_CODE_STARTED]

    @classmethod
    def get_all_unfinished_jobs(cls, limit: int) -> Sequence['JobModel']:
        """Gets at most `limit` unfinished jobs.

        Args:
            limit: int. A limit on the number of jobs to return.

        Returns:
            list(JobModel) or None. A list of at most `limit` number
            of unfinished jobs.
        """
        return cls.query().filter(
            JobModel.status_code.IN([
                STATUS_CODE_QUEUED, STATUS_CODE_STARTED
            ])).order(-cls.time_queued_msec).fetch(limit)

    @classmethod
    def get_unfinished_jobs(cls, job_type: str) -> datastore_services.Query:
        """Gets jobs that are unfinished.

        Args:
            job_type: str. The type of jobs that may be unfinished.

        Returns:
            list(JobModel) or None. A list of all jobs that belong
            to the given job_type.
        """
        return cls.query().filter(cls.job_type == job_type).filter(
            JobModel.status_code.IN([STATUS_CODE_QUEUED, STATUS_CODE_STARTED]))

    @classmethod
    def do_unfinished_jobs_exist(cls, job_type: str) -> bool:
        """Checks if unfinished jobs exist.

        Args:
            job_type: str. Type of job for which to check.

        Returns:
            bool. True if unfinished jobs exist, otherwise false.
        """
        return bool(cls.get_unfinished_jobs(job_type).count(limit=1))
Example #3
0
class CoreModel(base_models.BaseModel):
    """Simple BaseModel subclass with a 'prop' float property."""

    prop = datastore_services.FloatProperty()
Example #4
0
class CollectionRightsModel(base_models.VersionedModel):
    """Storage model for rights related to a collection.

    The id of each instance is the id of the corresponding collection.
    """

    SNAPSHOT_METADATA_CLASS = CollectionRightsSnapshotMetadataModel
    SNAPSHOT_CONTENT_CLASS = CollectionRightsSnapshotContentModel
    ALLOW_REVERT = False

    # The user_ids of owners of this collection.
    owner_ids = datastore_services.StringProperty(indexed=True, repeated=True)
    # The user_ids of users who are allowed to edit this collection.
    editor_ids = datastore_services.StringProperty(indexed=True, repeated=True)
    # The user_ids of users who are allowed to voiceover this collection.
    voice_artist_ids = (
        datastore_services.StringProperty(indexed=True, repeated=True))
    # The user_ids of users who are allowed to view this collection.
    viewer_ids = datastore_services.StringProperty(indexed=True, repeated=True)

    # Whether this collection is owned by the community.
    community_owned = (
        datastore_services.BooleanProperty(indexed=True, default=False))
    # For private collections, whether this collection can be viewed
    # by anyone who has the URL. If the collection is not private, this
    # setting is ignored.
    viewable_if_private = (
        datastore_services.BooleanProperty(indexed=True, default=False))
    # Time, in milliseconds, when the collection was first published.
    first_published_msec = (
        datastore_services.FloatProperty(indexed=True, default=None))

    # The publication status of this collection.
    status = datastore_services.StringProperty(
        default=constants.ACTIVITY_STATUS_PRIVATE, indexed=True,
        choices=[
            constants.ACTIVITY_STATUS_PRIVATE,
            constants.ACTIVITY_STATUS_PUBLIC
        ]
    )

    @staticmethod
    def get_deletion_policy() -> base_models.DELETION_POLICY:
        """Model contains data to pseudonymize or delete corresponding
        to a user: viewer_ids, voice_artist_ids, editor_ids,
        and owner_ids fields.
        """
        return (
            base_models.DELETION_POLICY.PSEUDONYMIZE_IF_PUBLIC_DELETE_IF_PRIVATE
        )

    @staticmethod
    def get_model_association_to_user(
    ) -> base_models.MODEL_ASSOCIATION_TO_USER:
        """Model is exported as one instance shared across users since multiple
        users contribute to collections and have varying rights.
        """
        return (
            base_models
            .MODEL_ASSOCIATION_TO_USER
            .ONE_INSTANCE_SHARED_ACROSS_USERS)

    @classmethod
    def get_field_name_mapping_to_takeout_keys(cls) -> Dict[str, str]:
        """Defines the mapping of field names to takeout keys since this model
        is exported as one instance shared across users.
        """
        return {
            'owner_ids': 'owned_collection_ids',
            'editor_ids': 'editable_collection_ids',
            'voice_artist_ids': 'voiced_collection_ids',
            'viewer_ids': 'viewable_collection_ids'
        }

    @classmethod
    def get_export_policy(cls) -> Dict[str, base_models.EXPORT_POLICY]:
        """Model contains data to export/delete corresponding to a user."""
        return dict(super(cls, cls).get_export_policy(), **{
            'owner_ids': base_models.EXPORT_POLICY.EXPORTED,
            'editor_ids': base_models.EXPORT_POLICY.EXPORTED,
            'voice_artist_ids': base_models.EXPORT_POLICY.EXPORTED,
            'viewer_ids': base_models.EXPORT_POLICY.EXPORTED,
            'community_owned': base_models.EXPORT_POLICY.NOT_APPLICABLE,
            'viewable_if_private': base_models.EXPORT_POLICY.NOT_APPLICABLE,
            'status': base_models.EXPORT_POLICY.NOT_APPLICABLE,
            'first_published_msec': base_models.EXPORT_POLICY.NOT_APPLICABLE
        })

    @classmethod
    def has_reference_to_user_id(cls, user_id: str) -> bool:
        """Check whether CollectionRightsModel references the given user.

        Args:
            user_id: str. The ID of the user whose data should be checked.

        Returns:
            bool. Whether any models refer to the given user ID.
        """
        return cls.query(datastore_services.any_of(
            cls.owner_ids == user_id,
            cls.editor_ids == user_id,
            cls.voice_artist_ids == user_id,
            cls.viewer_ids == user_id
        )).get(keys_only=True) is not None

    # TODO(#13523): Change 'commit_cmds' to domain object/TypedDict to
    # remove Any from type-annotation below.
    def save(
            self,
            committer_id: str,
            commit_message: str,
            commit_cmds: List[Dict[str, Any]]
    ) -> None:
        """Updates the collection rights model by applying the given
        commit_cmds, then saves it.

        Args:
            committer_id: str. The user_id of the user who committed the
                change.
            commit_message: str. The commit description message.
            commit_cmds: list(dict). A list of commands, describing changes
                made in this model, which should give sufficient information to
                reconstruct the commit. Each dict always contains:
                    cmd: str. Unique command.
                and additional arguments for that command.
        """
        super(CollectionRightsModel, self).commit(
            committer_id, commit_message, commit_cmds)

    # TODO(#13523): Change 'model_dict' to domain object/TypedDict to
    # remove Any from type-annotation below.
    @staticmethod
    def convert_to_valid_dict(model_dict: Dict[str, Any]) -> Dict[str, Any]:
        """Replace invalid fields and values in the CollectionRightsModel dict.

        Some old CollectionRightsSnapshotContentModels can contain fields
        and field values that are no longer supported and would cause
        an exception when we try to reconstitute a CollectionRightsModel from
        them. We need to remove or replace these fields and values.

        Args:
            model_dict: dict. The content of the model. Some fields and field
                values might no longer exist in the CollectionRightsModel
                schema.

        Returns:
            dict. The content of the model. Only valid fields and values are
            present.
        """
        # The status field could historically take the value 'publicized', this
        # value is now equivalent to 'public'.
        if model_dict['status'] == 'publicized':
            model_dict['status'] = constants.ACTIVITY_STATUS_PUBLIC

        # The voice_artist_ids field was previously named translator_ids. We
        # need to move the values from translator_ids field to voice_artist_ids
        # and delete translator_ids.
        if 'translator_ids' in model_dict and model_dict['translator_ids']:
            model_dict['voice_artist_ids'] = model_dict['translator_ids']
            del model_dict['translator_ids']

        # We need to remove pseudonymous IDs from all the fields that contain
        # user IDs.
        for field_name in (
                'owner_ids', 'editor_ids', 'voice_artist_ids', 'viewer_ids'):
            model_dict[field_name] = [
                user_id for user_id in model_dict[field_name]
                if not utils.is_pseudonymous_id(user_id)
            ]

        return model_dict

    # TODO(#13523): Change 'snapshot_dict' to domain object/TypedDict to
    # remove Any from type-annotation below.
    def _reconstitute(
            self, snapshot_dict: Dict[str, Any]
    ) -> CollectionRightsModel:
        """Populates the model instance with the snapshot.

        Some old CollectionRightsSnapshotContentModels can contain fields
        and field values that are no longer supported and would cause
        an exception when we try to reconstitute a CollectionRightsModel from
        them. We need to remove or replace these fields and values.

        Args:
            snapshot_dict: dict(str, *). The snapshot with the model
                property values.

        Returns:
            VersionedModel. The instance of the VersionedModel class populated
            with the the snapshot.
        """
        self.populate(
            **CollectionRightsModel.convert_to_valid_dict(snapshot_dict))
        return self

    # TODO(#13523): Change 'commit_cmds' to domain object/TypedDict to
    # remove Any from type-annotation below.
    def compute_models_to_commit(
        self,
        committer_id: str,
        commit_type: str,
        commit_message: str,
        commit_cmds: List[Dict[str, Any]],
        # We expect Mapping because we want to allow models that inherit
        # from BaseModel as the values, if we used Dict this wouldn't
        # be allowed.
        additional_models: Mapping[str, base_models.BaseModel]
    ) -> base_models.ModelsToPutDict:
        """Record the event to the commit log after the model commit.

        Note that this overrides the superclass method.

        Args:
            committer_id: str. The user_id of the user who committed the
                change.
            commit_type: str. The type of commit. Possible values are in
                core.storage.base_models.COMMIT_TYPE_CHOICES.
            commit_message: str. The commit description message.
            commit_cmds: list(dict). A list of commands, describing changes
                made in this model, should give sufficient information to
                reconstruct the commit. Each dict always contains:
                    cmd: str. Unique command.
                and then additional arguments for that command.
            additional_models: dict(str, BaseModel). Additional models that are
                needed for the commit process.

        Returns:
            ModelsToPutDict. A dict of models that should be put into
            the datastore.
        """
        models_to_put = super().compute_models_to_commit(
            committer_id,
            commit_type,
            commit_message,
            commit_cmds,
            additional_models
        )

        snapshot_metadata_model = models_to_put['snapshot_metadata_model']
        snapshot_metadata_model.content_user_ids = list(sorted(
            set(self.owner_ids) |
            set(self.editor_ids) |
            set(self.voice_artist_ids) |
            set(self.viewer_ids)
        ))

        commit_cmds_user_ids = set()
        for commit_cmd in commit_cmds:
            user_id_attribute_names = next(
                cmd['user_id_attribute_names']
                for cmd in feconf.COLLECTION_RIGHTS_CHANGE_ALLOWED_COMMANDS
                if cmd['name'] == commit_cmd['cmd']
            )
            for user_id_attribute_name in user_id_attribute_names:
                commit_cmds_user_ids.add(commit_cmd[user_id_attribute_name])
        snapshot_metadata_model.commit_cmds_user_ids = list(
            sorted(commit_cmds_user_ids))

        # Create and delete events will already be recorded in the
        # CollectionModel.
        if commit_type not in ['create', 'delete']:
            collection_commit_log = CollectionCommitLogEntryModel(
                id=('rights-%s-%s' % (self.id, self.version)),
                user_id=committer_id,
                collection_id=self.id,
                commit_type=commit_type,
                commit_message=commit_message,
                commit_cmds=commit_cmds,
                version=None,
                post_commit_status=self.status,
                post_commit_community_owned=self.community_owned,
                post_commit_is_private=(
                    self.status == constants.ACTIVITY_STATUS_PRIVATE)
            )
            return {
                'snapshot_metadata_model': (
                    models_to_put['snapshot_metadata_model']),
                'snapshot_content_model': (
                    models_to_put['snapshot_content_model']),
                'commit_log_model': collection_commit_log,
                'versioned_model': models_to_put['versioned_model'],
            }

        return models_to_put

    @classmethod
    def export_data(cls, user_id: str) -> Dict[str, List[str]]:
        """(Takeout) Export user-relevant properties of CollectionRightsModel.

        Args:
            user_id: str. The user_id denotes which user's data to extract.

        Returns:
            dict. The user-relevant properties of CollectionRightsModel
            in a python dict format. In this case, we are returning all the
            ids of collections that the user is connected to, so they either
            own, edit, voice, or have permission to view.
        """
        owned_collections = cls.get_all().filter(cls.owner_ids == user_id)
        editable_collections = cls.get_all().filter(cls.editor_ids == user_id)
        voiced_collections = (
            cls.get_all().filter(cls.voice_artist_ids == user_id))
        viewable_collections = cls.get_all().filter(cls.viewer_ids == user_id)

        owned_collection_ids = [col.key.id() for col in owned_collections]
        editable_collection_ids = [col.key.id() for col in editable_collections]
        voiced_collection_ids = [col.key.id() for col in voiced_collections]
        viewable_collection_ids = [col.key.id() for col in viewable_collections]

        return {
            'owned_collection_ids': owned_collection_ids,
            'editable_collection_ids': editable_collection_ids,
            'voiced_collection_ids': voiced_collection_ids,
            'viewable_collection_ids': viewable_collection_ids
        }
Example #5
0
class ExpSummaryModel(base_models.BaseModel):
    """Summary model for an Oppia exploration.

    This should be used whenever the content blob of the exploration is not
    needed (e.g. in search results, etc).

    A ExpSummaryModel instance stores the following information:

        id, title, category, objective, language_code, tags,
        last_updated, created_on, status (private, public),
        community_owned, owner_ids, editor_ids,
        viewer_ids, version.

    The key of each instance is the exploration id.
    """

    # What this exploration is called.
    title = datastore_services.StringProperty(required=True)
    # The category this exploration belongs to.
    category = datastore_services.StringProperty(required=True, indexed=True)
    # The objective of this exploration.
    objective = datastore_services.TextProperty(required=True, indexed=False)
    # The ISO 639-1 code for the language this exploration is written in.
    language_code = (datastore_services.StringProperty(required=True,
                                                       indexed=True))
    # Tags associated with this exploration.
    tags = datastore_services.StringProperty(repeated=True, indexed=True)

    # Aggregate user-assigned ratings of the exploration.
    ratings = datastore_services.JsonProperty(default=None, indexed=False)

    # Scaled average rating for the exploration.
    scaled_average_rating = datastore_services.FloatProperty(indexed=True)

    # Time when the exploration model was last updated (not to be
    # confused with last_updated, which is the time when the
    # exploration *summary* model was last updated).
    exploration_model_last_updated = (datastore_services.DateTimeProperty(
        indexed=True))
    # Time when the exploration model was created (not to be confused
    # with created_on, which is the time when the exploration *summary*
    # model was created).
    exploration_model_created_on = (datastore_services.DateTimeProperty(
        indexed=True))
    # Time when the exploration was first published.
    first_published_msec = datastore_services.FloatProperty(indexed=True)

    # The publication status of this exploration.
    status = datastore_services.StringProperty(
        default=constants.ACTIVITY_STATUS_PRIVATE,
        indexed=True,
        choices=[
            constants.ACTIVITY_STATUS_PRIVATE, constants.ACTIVITY_STATUS_PUBLIC
        ])

    # Whether this exploration is owned by the community.
    community_owned = (datastore_services.BooleanProperty(required=True,
                                                          indexed=True))

    # The user_ids of owners of this exploration.
    owner_ids = datastore_services.StringProperty(indexed=True, repeated=True)
    # The user_ids of users who are allowed to edit this exploration.
    editor_ids = datastore_services.StringProperty(indexed=True, repeated=True)
    # The user_ids of users who are allowed to voiceover this exploration.
    voice_artist_ids = (datastore_services.StringProperty(indexed=True,
                                                          repeated=True))
    # The user_ids of users who are allowed to view this exploration.
    viewer_ids = datastore_services.StringProperty(indexed=True, repeated=True)
    # The user_ids of users who have contributed (humans who have made a
    # positive (not just a revert) change to the exploration's content).
    # NOTE TO DEVELOPERS: contributor_ids and contributors_summary need to be
    # synchronized, meaning that the keys in contributors_summary need be
    # equal to the contributor_ids list.
    contributor_ids = (datastore_services.StringProperty(indexed=True,
                                                         repeated=True))
    # A dict representing the contributors of non-trivial commits to this
    # exploration. Each key of this dict is a user_id, and the corresponding
    # value is the number of non-trivial commits that the user has made.
    contributors_summary = (datastore_services.JsonProperty(default={},
                                                            indexed=False))
    # The version number of the exploration after this commit. Only populated
    # for commits to an exploration (as opposed to its rights, etc.).
    version = datastore_services.IntegerProperty()

    @staticmethod
    def get_deletion_policy() -> base_models.DELETION_POLICY:
        """Model contains data to pseudonymize or delete corresponding
        to a user: viewer_ids, voice_artist_ids, editor_ids, owner_ids,
        contributor_ids, and contributors_summary fields.
        """
        return (base_models.DELETION_POLICY.
                PSEUDONYMIZE_IF_PUBLIC_DELETE_IF_PRIVATE)

    @classmethod
    def has_reference_to_user_id(cls, user_id: str) -> bool:
        """Check whether ExpSummaryModel references user.

        Args:
            user_id: str. The ID of the user whose data should be checked.

        Returns:
            bool. Whether any models refer to the given user ID.
        """
        return cls.query(
            datastore_services.any_of(cls.owner_ids == user_id,
                                      cls.editor_ids == user_id,
                                      cls.voice_artist_ids == user_id,
                                      cls.viewer_ids == user_id,
                                      cls.contributor_ids == user_id)).get(
                                          keys_only=True) is not None

    @classmethod
    def get_non_private(cls) -> Sequence[ExpSummaryModel]:
        """Returns an iterable with non-private ExpSummary models.

        Returns:
            iterable. An iterable with non-private ExpSummary models.
        """
        return ExpSummaryModel.query().filter(
            ExpSummaryModel.status != constants.ACTIVITY_STATUS_PRIVATE
        ).filter(ExpSummaryModel.deleted == False  # pylint: disable=singleton-comparison
                 ).fetch(feconf.DEFAULT_QUERY_LIMIT)

    @classmethod
    def get_top_rated(cls, limit: int) -> Sequence[ExpSummaryModel]:
        """Fetches the top-rated exp summaries that are public in descending
        order of scaled_average_rating.

        Args:
            limit: int. The maximum number of results to return.

        Returns:
            iterable. An iterable with the top rated exp summaries that are
            public in descending order of scaled_average_rating.
        """
        return ExpSummaryModel.query().filter(
            ExpSummaryModel.status == constants.ACTIVITY_STATUS_PUBLIC).filter(
                ExpSummaryModel.deleted == False  # pylint: disable=singleton-comparison
            ).order(-ExpSummaryModel.scaled_average_rating).fetch(limit)

    @classmethod
    def get_private_at_least_viewable(
            cls, user_id: str) -> Sequence[ExpSummaryModel]:
        """Fetches private exp summaries that are at least viewable by the
        given user.

        Args:
            user_id: str. The id of the given user.

        Returns:
            iterable. An iterable with private exp summaries that are at least
            viewable by the given user.
        """
        return ExpSummaryModel.query().filter(
            ExpSummaryModel.status ==
            constants.ACTIVITY_STATUS_PRIVATE).filter(
                datastore_services.any_of(
                    ExpSummaryModel.owner_ids == user_id,
                    ExpSummaryModel.editor_ids == user_id,
                    ExpSummaryModel.voice_artist_ids == user_id,
                    ExpSummaryModel.viewer_ids == user_id)).filter(
                        ExpSummaryModel.deleted == False  # pylint: disable=singleton-comparison
                    ).fetch(feconf.DEFAULT_QUERY_LIMIT)

    @classmethod
    def get_at_least_editable(cls, user_id: str) -> Sequence[ExpSummaryModel]:
        """Fetches exp summaries that are at least editable by the given user.

        Args:
            user_id: str. The id of the given user.

        Returns:
            iterable. An iterable with exp summaries that are at least
            editable by the given user.
        """
        return ExpSummaryModel.query().filter(
            datastore_services.any_of(
                ExpSummaryModel.owner_ids == user_id,
                ExpSummaryModel.editor_ids == user_id)).filter(
                    ExpSummaryModel.deleted == False  # pylint: disable=singleton-comparison
                ).fetch(feconf.DEFAULT_QUERY_LIMIT)

    @classmethod
    def get_recently_published(cls, limit: int) -> Sequence[ExpSummaryModel]:
        """Fetches exp summaries that are recently published.

        Args:
            limit: int. The maximum number of results to return.

        Returns:
            iterable. An iterable with exp summaries that are
            recently published. The returned list is sorted by the time of
            publication with latest being first in the list.
        """
        return ExpSummaryModel.query().filter(
            ExpSummaryModel.status == constants.ACTIVITY_STATUS_PUBLIC).filter(
                ExpSummaryModel.deleted == False  # pylint: disable=singleton-comparison
            ).order(-ExpSummaryModel.first_published_msec).fetch(limit)

    @staticmethod
    def get_model_association_to_user(
    ) -> base_models.MODEL_ASSOCIATION_TO_USER:
        """Model data has already been exported as a part of the
        ExplorationModel and thus does not need a separate export.
        """
        return base_models.MODEL_ASSOCIATION_TO_USER.NOT_CORRESPONDING_TO_USER

    @classmethod
    def get_export_policy(cls) -> Dict[str, base_models.EXPORT_POLICY]:
        """Model contains data corresponding to a user, but this isn't exported
        because because noteworthy details that belong to this model have
        already been exported as a part of the ExplorationModel.
        """
        return dict(
            super(cls, cls).get_export_policy(), **{
                'title': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'category': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'objective': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'language_code': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'tags': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'ratings': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'scaled_average_rating':
                base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'exploration_model_last_updated':
                base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'exploration_model_created_on':
                base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'first_published_msec':
                base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'status': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'community_owned': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'owner_ids': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'editor_ids': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'voice_artist_ids': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'viewer_ids': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'contributor_ids': base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'contributors_summary':
                base_models.EXPORT_POLICY.NOT_APPLICABLE,
                'version': base_models.EXPORT_POLICY.NOT_APPLICABLE
            })