Example #1
0
def collect_files_being_pushed(ref_list, remote):
    """Collect modified files and filter those that need linting.

    Parameter:
        ref_list: list of references to parse (provided by git in stdin)
        remote: the remote being pushed to

    Returns:
        dict. Dict mapping branch names to 2-tuples of the form (list of
        changed files, list of files to lint).
    """
    if not ref_list:
        return {}
    # Avoid testing of non branch pushes (tags for instance) or deletions.
    ref_heads_only = [
        ref for ref in ref_list if ref.local_ref.startswith('refs/heads/')
    ]
    # Get branch name from e.g. local_ref='refs/heads/lint_hook'.
    branches = [ref.local_ref.split('/')[-1] for ref in ref_heads_only]
    hashes = [ref.local_sha1 for ref in ref_heads_only]
    collected_files = {}
    # Git allows that multiple branches get pushed simultaneously with the "all"
    # flag. Therefore we need to loop over the ref_list provided.
    for branch, _ in python_utils.ZIP(branches, hashes):
        # Get the difference to remote/develop.
        modified_files = compare_to_remote(
            remote, branch, remote_branch=get_parent_branch_name_for_diff())
        files_to_lint = extract_files_to_lint(modified_files)
        collected_files[branch] = (modified_files, files_to_lint)

    for branch, (modified_files, files_to_lint) in collected_files.items():
        if modified_files:
            python_utils.PRINT('\nModified files in %s:' % branch)
            pprint.pprint(modified_files)
            python_utils.PRINT('\nFiles to lint in %s:' % branch)
            pprint.pprint(files_to_lint)
            python_utils.PRINT('\n')
    return collected_files
Example #2
0
def save_pending_deletion_requests(pending_deletion_requests):
    """Save a list of pending deletion request domain objects as
    PendingDeletionRequestModel entities in the datastore.

    Args:
        pending_deletion_requests: list(PendingDeletionRequest). List of pending
            deletion request objects to be saved in the datastore.
    """
    user_ids = [request.user_id for request in pending_deletion_requests]
    pending_deletion_request_models = (
        user_models.PendingDeletionRequestModel.get_multi(
            user_ids, include_deleted=True))
    final_pending_deletion_request_models = []
    for deletion_request_model, deletion_request in python_utils.ZIP(
            pending_deletion_request_models, pending_deletion_requests):
        deletion_request.validate()
        deletion_request_dict = {
            'email':
            deletion_request.email,
            'role':
            deletion_request.role,
            'deletion_complete':
            deletion_request.deletion_complete,
            'pseudonymizable_entity_mappings':
            (deletion_request.pseudonymizable_entity_mappings)
        }
        if deletion_request_model is not None:
            deletion_request_model.populate(**deletion_request_dict)
        else:
            deletion_request_dict['id'] = deletion_request.user_id
            deletion_request_model = user_models.PendingDeletionRequestModel(
                **deletion_request_dict)
        final_pending_deletion_request_models.append(deletion_request_model)

    user_models.PendingDeletionRequestModel.update_timestamps_multi(
        final_pending_deletion_request_models)
    user_models.PendingDeletionRequestModel.put_multi(
        final_pending_deletion_request_models)
Example #3
0
    def delete_multi(cls,
                     entity_ids,
                     committer_id,
                     commit_message,
                     force_deletion=False):
        """Deletes the given cls instances with the given entity_ids.

        Note that this extends the superclass method.

        Args:
            entity_ids: list(str). Ids of entities to delete.
            committer_id: str. The user_id of the user who committed the change.
            commit_message: str. The commit description message.
            force_deletion: bool. If True these models are deleted completely
                from storage, otherwise there are only marked as deleted.
                Default is False.
        """
        super(CollectionModel, cls).delete_multi(entity_ids,
                                                 committer_id,
                                                 commit_message,
                                                 force_deletion=force_deletion)

        if not force_deletion:
            commit_log_models = []
            collection_rights_models = CollectionRightsModel.get_multi(
                entity_ids, include_deleted=True)
            versioned_models = cls.get_multi(entity_ids, include_deleted=True)
            for model, rights_model in python_utils.ZIP(
                    versioned_models, collection_rights_models):
                collection_commit_log = CollectionCommitLogEntryModel.create(
                    model.id, model.version, committer_id,
                    cls._COMMIT_TYPE_DELETE, commit_message,
                    [{
                        'cmd': cls.CMD_DELETE_COMMIT
                    }], rights_model.status, rights_model.community_owned)
                collection_commit_log.collection_id = model.id
                commit_log_models.append(collection_commit_log)
            ndb.put_multi_async(commit_log_models)
Example #4
0
def _send_batch_emails(recipient_list, feedback_message_reference,
                       exploration_id, has_suggestion):
    """Adds the given FeedbackMessageReference to each of the recipient's email
    buffers. The collected messages will be sent out as a batch after a short
    delay.

    Args:
        recipient_list: list(str). A list of user_ids of all recipients of the
            email.
        feedback_message_reference: FeedbackMessageReference. The reference to
            add to each email buffer.
        exploration_id: str. The id of exploration that received new message.
        has_suggestion: bool. Whether this thread has a related learner
            suggestion.
    """
    can_recipients_receive_email = email_manager.can_users_receive_thread_email(
        recipient_list, exploration_id, has_suggestion)
    for recipient_id, can_receive_email in python_utils.ZIP(
            recipient_list, can_recipients_receive_email):
        if can_receive_email:
            transaction_services.run_in_transaction(
                _add_feedback_message_reference, recipient_id,
                feedback_message_reference)
Example #5
0
    def assert_domains_equal_models(self, beam_job_runs, beam_job_run_models):
        """Asserts that the domain objects have the same values as the models.

        Args:
            beam_job_runs: list(BeamJobRun). The domain objects.
            beam_job_run_models: list(BeamJobRunModel). The models.

        Raises:
            AssertionError. At least one domain object and model pair has at
                least one difference.
        """
        domain_objs = sorted(beam_job_runs, key=lambda j: j.job_id)
        model_objs = sorted(beam_job_run_models, key=lambda m: m.id)
        for domain_obj, model_obj in python_utils.ZIP(domain_objs, model_objs):
            self.assertEqual(domain_obj.job_id, model_obj.id)
            self.assertEqual(domain_obj.job_name, model_obj.job_name)
            self.assertEqual(domain_obj.job_state, model_obj.latest_job_state)
            self.assertEqual(domain_obj.job_arguments, model_obj.job_arguments)
            self.assertEqual(domain_obj.job_started_on, model_obj.created_on)
            self.assertEqual(domain_obj.job_updated_on, model_obj.last_updated)
            self.assertEqual(
                domain_obj.job_is_synchronous,
                model_obj.dataflow_job_id is None)
    def _fetch_field_name_to_external_model_references(cls, item):
        """Fetch external models based on _get_external_id_relationships.

        This should be called before we call other _validate methods.

        Args:
            item: datastore_services.Model. Entity to validate.
        """
        multiple_models_ids_to_fetch = {}

        for external_model_fetcher_details in (
                cls._get_external_id_relationships(item)):
            for error in external_model_fetcher_details.model_id_errors:
                cls._add_error(
                    ERROR_CATEGORY_INVALID_IDS_IN_FIELD,
                    'Entity id %s: %s' % (item.id, error))
            multiple_models_ids_to_fetch[
                external_model_fetcher_details.field_name] = (
                    external_model_fetcher_details.model_class,
                    external_model_fetcher_details.model_ids)

        fetched_model_instances_for_all_ids = (
            datastore_services.fetch_multiple_entities_by_ids_and_models(
                list(multiple_models_ids_to_fetch.values())))

        for index, field_name in enumerate(multiple_models_ids_to_fetch):
            (model_class, model_ids) = (
                multiple_models_ids_to_fetch[field_name])
            fetched_model_instances = (
                fetched_model_instances_for_all_ids[index])

            for (model_id, model_instance) in python_utils.ZIP(
                    model_ids, fetched_model_instances):
                cls.field_name_to_external_model_references[
                    field_name].append(
                        ExternalModelReference(
                            model_class, model_id, model_instance))
Example #7
0
def put_tasks(tasks, update_last_updated_time=True):
    """Puts each of the given tasks into storage if necessary, conditionally
    updating their last updated time.

    If the values of a task are the same as the corresponding model in storage,
    then that model will not be updated as part of the put operation.

    Args:
        tasks: list(improvements_domain.TaskEntry). Domain objects for each task
            being placed into storage.
        update_last_updated_time: bool. Whether to update the last_updated field
            of the task models.
    """
    task_models = improvements_models.TaskEntryModel.get_multi(
        [t.task_id for t in tasks])
    models_to_put = []
    for task, model in python_utils.ZIP(tasks, task_models):
        if model is None:
            models_to_put.append(
                improvements_models.TaskEntryModel(
                    id=task.task_id,
                    composite_entity_id=task.composite_entity_id,
                    entity_type=task.entity_type,
                    entity_id=task.entity_id,
                    entity_version=task.entity_version,
                    task_type=task.task_type,
                    target_type=task.target_type,
                    target_id=task.target_id,
                    issue_description=task.issue_description,
                    status=task.status,
                    resolver_id=task.resolver_id,
                    resolved_on=task.resolved_on))
        elif apply_changes_to_model(task, model):
            models_to_put.append(model)
    improvements_models.TaskEntryModel.update_timestamps_multi(
        models_to_put, update_last_updated_time=update_last_updated_time)
    improvements_models.TaskEntryModel.put_multi(models_to_put)
Example #8
0
    def map(item):
        if item.deleted:
            return

        exp_model = exp_models.ExplorationModel.get_by_id(item.id)
        if exp_model is None or exp_model.deleted:
            yield ('Removed recommendation model', item.id)
            item.delete()
            return

        fetched_exploration_model_instances = (
            datastore_services.fetch_multiple_entities_by_ids_and_models(
                [('ExplorationModel', item.recommended_exploration_ids)]))[0]

        exp_ids_removed = []
        for exp_id, exp_instance in list(python_utils.ZIP(
                item.recommended_exploration_ids,
                fetched_exploration_model_instances)):
            if exp_instance is None or exp_instance.deleted:
                exp_ids_removed.append(exp_id)
                item.recommended_exploration_ids.remove(exp_id)
        if exp_ids_removed:
            item.put()
            yield ('Removed deleted exp ids from recommendations', item.id)
Example #9
0
def _send_feedback_thread_status_change_emails(
        recipient_list, feedback_message_reference, old_status, new_status,
        exploration_id, has_suggestion):
    """Notifies the given recipients about the status change.

    Args:
        recipient_list: list(str). A list of recipient ids.
        feedback_message_reference: FeedbackMessageReference. The reference to
            add to each email buffer.
        old_status: str. One of STATUS_CHOICES.
        new_status: str. One of STATUS_CHOICES.
        exploration_id: str. The id of the exploration that received a new
            message.
        has_suggestion: bool. Whether this thread has a related learner
            suggestion.
    """
    can_recipients_receive_email = email_manager.can_users_receive_thread_email(
        recipient_list, exploration_id, has_suggestion)
    for recipient_id, can_receive_email in python_utils.ZIP(
            recipient_list, can_recipients_receive_email):
        if can_receive_email:
            _enqueue_feedback_thread_status_change_email_task_transactional(
                recipient_id, feedback_message_reference, old_status,
                new_status)
Example #10
0
    def run(self):
        """Returns a PCollection of audit errors aggregated from all models.

        Returns:
            PCollection. A PCollection of audit errors discovered during the
            audit.

        Raises:
            ValueError. When the `datastoreio` option, which provides the
                PTransforms for performing datastore IO operations, is None.
        """
        existing_models, deleted_models = (
            self.pipeline
            | 'Get all models' >> ndb_io.GetModels(
                datastore_services.query_everything(), self.datastoreio_stub)
            | 'Partition by model.deleted' >> (
                beam.Partition(lambda model, _: int(model.deleted), 2))
        )

        models_of_kind_by_index = (
            existing_models
            # NOTE: Partition returns a statically-sized list of PCollections.
            # Creating partitions is wasteful when there are fewer items than
            # there are partitions, like in our unit tests. In exchange, in
            # production the job will be able to take advantage of the high
            # parallelizability of PCollections, which are designed for enormous
            # datasets and parallel processing.
            #
            # Alternatively, we could have used GroupBy. However, that returns
            # an _iterable_ of items rather than a PCollection, and so it is
            # vulnerable to out-of-memory errors.
            #
            # Since this job is concerned with running audits on EVERY MODEL IN
            # STORAGE, Partition is the clear winner regardless of the overhead
            # we'll see in unit tests.
            | 'Split models into parallelizable PCollections' >> beam.Partition(
                lambda m, _, kinds: kinds.index(job_utils.get_model_kind(m)),
                # NOTE: Partition requires a hard-coded number of slices; it
                # cannot be used with dynamic numbers generated in a pipeline.
                # KIND_BY_INDEX is a constant tuple so that requirement is
                # satisfied in this case.
                len(KIND_BY_INDEX), KIND_BY_INDEX)
        )

        existing_key_count_pcolls = []
        missing_key_error_pcolls = []
        audit_error_pcolls = [
            deleted_models
            | 'Apply ValidateDeletedModel on deleted models' >> (
                beam.ParDo(base_validation.ValidateDeletedModel()))
        ]

        model_groups = python_utils.ZIP(KIND_BY_INDEX, models_of_kind_by_index)
        for kind, models_of_kind in model_groups:
            audit_error_pcolls.extend(models_of_kind | ApplyAuditDoFns(kind))

            if kind in ALL_MODEL_KINDS_REFERENCED_BY_PROPERTIES:
                existing_key_count_pcolls.append(
                    models_of_kind | GetExistingModelKeyCounts(kind))

            if kind in ID_REFERENCING_PROPERTIES_BY_KIND_OF_POSSESSOR:
                missing_key_error_pcolls.extend(
                    models_of_kind | GetMissingModelKeyErrors(kind))

        existing_key_counts = (
            existing_key_count_pcolls
            | 'Flatten PCollections of existing key counts' >> beam.Flatten()
        )
        missing_key_errors = (
            missing_key_error_pcolls
            | 'Flatten PCollections of missing key errors' >> beam.Flatten()
        )
        audit_error_pcolls.append(
            (existing_key_counts, missing_key_errors)
            | 'Group counts and errors by key' >> beam.CoGroupByKey()
            | 'Filter keys without any errors' >> (
                beam.FlatMapTuple(self._get_model_relationship_errors))
        )

        return audit_error_pcolls | 'Combine audit results' >> beam.Flatten()
Example #11
0
def migrate_state_training_jobs(state_training_jobs_mapping):
    """Migrate exploration training jobs to latest version of algorithm_id
    and algorithm_version.

    This function lazily migrates an older classifier training job and
    trains new classifiers. Specifically, if training job exploration mapping of
    an <exploration, version, state> triplet is missing job_id for some
    algorithm_id, or if the job_id exists but it has been trained on a now
    obsolete algorithm, we re-submit the jobs.

    The function goes through existing training job exploration mapping and
    identifies three different types of algorithm IDs.
        1. algorithm_ids_to_upgrade: Those which exist but needs to be
            upgraded a new algorithm (because existing one has been deprecated)
            by re-submitting the training job.
        2. algorithm_ids_to_add: Those which doesn't exist and needs to be added
            by submitting a new training job.
        3. algorithm_ids_to_remove: Those which needs to be removed since these
            algorithms are no longer supported.

    Once all three types of algorithm IDs are filtered, the function performs
    specific tasks tailored to each of them. We call this a lazy migration
    because it happens only when there is a query to retrieve a trained model
    for given <exploration, version, state> and algorithm_id.

    Args:
        state_training_jobs_mapping: StateTrainingJobsMapping. Domain
            object containing exploration to training job id mapping. This
            mapping is used to figure out jobs that need to be re-submitted,
            added or removed.
    """
    exp_id = state_training_jobs_mapping.exp_id
    exp_version = state_training_jobs_mapping.exp_version
    state_name = state_training_jobs_mapping.state_name

    exploration = exp_fetchers.get_exploration_by_id(exp_id,
                                                     version=exp_version)
    interaction_id = exploration.states[state_name].interaction.id

    algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[interaction_id][
        'algorithm_id']
    algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[interaction_id][
        'algorithm_version']

    algorithm_id_to_algorithm_version = {algorithm_id: algorithm_version}

    # The list below contains only one element because as of now we only
    # support a single algorithm id per interaction type. However once the
    # support for multiple algorithm ids (see issue #10217) is added, the list
    # of possible algorithm ids can be retrieved from
    # feconf.INTERACTION_CLASSIFIER_MAPPING.
    possible_algorithm_ids = [algorithm_id]

    algorithm_ids_to_add = set(possible_algorithm_ids).difference(
        set(state_training_jobs_mapping.algorithm_ids_to_job_ids.keys()))

    algorithm_ids_to_remove = set(state_training_jobs_mapping.
                                  algorithm_ids_to_job_ids.keys()).difference(
                                      set(possible_algorithm_ids))

    algorithm_ids_to_upgrade = set(possible_algorithm_ids).intersection(
        set(state_training_jobs_mapping.algorithm_ids_to_job_ids.keys()))

    if len(algorithm_ids_to_add) > 0:
        job_dicts = []

        for algorithm_id in algorithm_ids_to_add:
            next_scheduled_check_time = datetime.datetime.utcnow()
            training_data = exploration.states[state_name].get_training_data()

            classifier_domain.ClassifierTrainingJob(
                'job_id_dummy', algorithm_id, interaction_id, exp_id,
                exp_version, next_scheduled_check_time, state_name,
                feconf.TRAINING_JOB_STATUS_NEW, training_data,
                algorithm_version).validate()

            job_dicts.append({
                'algorithm_id': algorithm_id,
                'interaction_id': interaction_id,
                'exp_id': exp_id,
                'exp_version': exp_version,
                'next_scheduled_check_time': next_scheduled_check_time,
                'state_name': state_name,
                'training_data': training_data,
                'status': feconf.TRAINING_JOB_STATUS_NEW,
                'algorithm_version': algorithm_version
            })

        job_ids = classifier_models.ClassifierTrainingJobModel.create_multi(
            job_dicts)

        for algorithm_id, job_id in python_utils.ZIP(algorithm_ids_to_add,
                                                     job_ids):
            state_training_jobs_mapping.algorithm_ids_to_job_ids[
                algorithm_id] = job_id

    if algorithm_ids_to_upgrade:
        for algorithm_id in algorithm_ids_to_upgrade:
            classifier_training_job = (
                classifier_models.ClassifierTrainingJobModel.get_by_id(
                    state_training_jobs_mapping.
                    algorithm_ids_to_job_ids[algorithm_id]))
            classifier_training_job.algorithm_version = (
                algorithm_id_to_algorithm_version[algorithm_id])
            classifier_training_job.next_scheduled_check_time = (
                datetime.datetime.utcnow())
            classifier_training_job.status = feconf.TRAINING_JOB_STATUS_NEW
            classifier_training_job.update_timestamps()
            classifier_training_job.put()

    if algorithm_ids_to_remove:
        for algorithm_id in algorithm_ids_to_remove:
            delete_classifier_training_job(
                state_training_jobs_mapping.
                algorithm_ids_to_job_ids[algorithm_id])
            state_training_jobs_mapping.algorithm_ids_to_job_ids.pop(
                algorithm_id)

    state_training_jobs_mapping_model = (
        classifier_models.StateTrainingJobsMappingModel.get_model(
            exp_id, exp_version, state_name))
    state_training_jobs_mapping.validate()
    state_training_jobs_mapping_model.algorithm_ids_to_job_ids = (
        state_training_jobs_mapping.algorithm_ids_to_job_ids)
    state_training_jobs_mapping_model.update_timestamps()
    state_training_jobs_mapping_model.put()
Example #12
0
def get_exp_thread_summaries(user_id, thread_ids):
    """Returns a list of summaries corresponding to the exploration threads from
    the given thread ids. Non-exploration threads are not included in the list.
    It also returns the number of threads that are currently not read by the
    user.

    Args:
        user_id: str. The id of the user.
        thread_ids: list(str). The ids of the threads for which we have to fetch
            the summaries.

    Returns:
        tuple(thread_summaries, number_of_unread_threads). Where:
            thread_summaries: list(FeedbackThreadSummary).
            number_of_unread_threads: int. The number of threads not read by the
                user.
    """
    # We need to fetch the thread models first to filter out the threads which
    # don't refer to an exploration.
    exp_thread_models = [
        model for model in
        feedback_models.GeneralFeedbackThreadModel.get_multi(thread_ids)
        if model and model.entity_type == feconf.ENTITY_TYPE_EXPLORATION
    ]

    exp_thread_user_model_ids = [
        feedback_models.GeneralFeedbackThreadUserModel.generate_full_id(
            user_id, model.id) for model in exp_thread_models
    ]
    exp_model_ids = [model.entity_id for model in exp_thread_models]

    exp_thread_user_models, exp_models = (
        datastore_services.fetch_multiple_entities_by_ids_and_models([
            ('GeneralFeedbackThreadUserModel', exp_thread_user_model_ids),
            ('ExplorationModel', exp_model_ids),
        ]))

    threads = [_get_thread_from_model(m) for m in exp_thread_models]
    flattened_last_two_message_models_of_threads = (
        feedback_models.GeneralFeedbackMessageModel.get_multi(
            itertools.chain.from_iterable(t.get_last_two_message_ids()
                                          for t in threads)))
    last_two_message_models_of_threads = [
        flattened_last_two_message_models_of_threads[i:i + 2]
        for i in python_utils.RANGE(
            0, len(flattened_last_two_message_models_of_threads), 2)
    ]

    thread_summaries = []
    number_of_unread_threads = 0
    for thread, last_two_message_models, thread_user_model, exp_model in (
            python_utils.ZIP(threads, last_two_message_models_of_threads,
                             exp_thread_user_models, exp_models)):
        message_ids_read_by_user = (() if thread_user_model is None else
                                    thread_user_model.message_ids_read_by_user)

        last_message_model, second_last_message_model = last_two_message_models
        # We don't need to check if the last message is None because all threads
        # have at least one message.
        last_message_is_read = (last_message_model.message_id
                                in message_ids_read_by_user)
        author_last_message = (last_message_model.author_id
                               and user_services.get_username(
                                   last_message_model.author_id))
        # The second-to-last message, however, might be None.
        second_last_message_is_read = (second_last_message_model is not None
                                       and second_last_message_model.message_id
                                       in message_ids_read_by_user)
        author_second_last_message = (second_last_message_model
                                      and second_last_message_model.author_id
                                      and user_services.get_username(
                                          second_last_message_model.author_id))

        if not last_message_is_read:
            number_of_unread_threads += 1
        thread_summaries.append(
            feedback_domain.FeedbackThreadSummary(
                thread.status, thread.original_author_id, thread.last_updated,
                last_message_model.text, thread.message_count,
                last_message_is_read, second_last_message_is_read,
                author_last_message, author_second_last_message,
                exp_model.title, exp_model.id, thread.id))
    return thread_summaries, number_of_unread_threads
Example #13
0
def create_messages(thread_ids,
                    author_id,
                    updated_status,
                    updated_subject,
                    text,
                    received_via_email=False):
    """Creates a new message for each of the distinct threads in thread_ids and
    for each message, subscribes the author to the thread.

    Args:
        thread_ids: list(str). The thread ids to append the messages to.
        author_id: str. The id of the author who creates the messages.
        updated_status: str. One of STATUS_CHOICES. Applied to each thread.
            Must be supplied if this is the first message of the threads.
            Otherwise, this property should only exist when the status
            changes.
        updated_subject: str. New thread subject. Applied to each thread. Must
            be supplied if this is the first message of the threads. Otherwise,
            this property should only exist when the subject changes.
        text: str. The text of the feedback message. This may be ''.
        received_via_email: bool. Whether the new message(s) are received via
            email or web.

    Returns:
        list(FeedbackMessage). The domain objects representing the new messages
        added in the datastore.

    Raises:
        Exception. Thread_ids must be distinct.
        Exception. One or more GeneralFeedbackThreadModel entities not found.
    """
    from core.domain import event_services

    # Check that the thread_ids are distinct.
    if len(set(thread_ids)) != len(thread_ids):
        raise Exception(
            'Thread ids must be distinct when calling create_messsages.')

    # Get the threads at the outset, in order to check that there are models
    # corresponding to each of the thread_ids.
    thread_models = feedback_models.GeneralFeedbackThreadModel.get_multi(
        thread_ids)
    thread_ids_that_do_not_have_models = []
    for index, thread_model in enumerate(thread_models):
        if thread_model is None:
            thread_ids_that_do_not_have_models.append(thread_ids[index])
    if len(thread_ids_that_do_not_have_models) > 0:
        multiple_thread_models_are_missing = (
            len(thread_ids_that_do_not_have_models) > 1)
        raise Exception(
            'Thread%s belonging to the GeneralFeedbackThreadModel class with '
            'id%s:[%s] %s not found.' %
            ('s' if multiple_thread_models_are_missing else '',
             's' if multiple_thread_models_are_missing else '',
             ' '.join(thread_ids_that_do_not_have_models),
             'were' if multiple_thread_models_are_missing else 'was'))

    # Get the corresponding message ids, which are required for message
    # creation.
    message_ids = (feedback_models.GeneralFeedbackMessageModel.
                   get_message_counts(thread_ids))

    # Create a list of FullyQualifiedMessageIdentifier objects so that each
    # (thread_id, message_id) pair is kept together.
    message_identifiers = []
    for thread_id, message_id in python_utils.ZIP(thread_ids, message_ids):
        message_identifiers.append(
            feedback_domain.FullyQualifiedMessageIdentifier(
                thread_id, message_id))

    # Create the GeneralFeedbackMessageModel instances.
    message_models = feedback_models.GeneralFeedbackMessageModel.create_multi(
        message_identifiers)

    # Update the message instances.
    for index, message_model in enumerate(message_models):
        message_model.thread_id = thread_ids[index]
        message_model.message_id = message_ids[index]
        message_model.author_id = author_id
        message_model.text = text
        message_model.received_via_email = received_via_email
        # Get the corresponding thread in storage.
        thread_model = thread_models[index]
        if updated_status:
            message_model.updated_status = updated_status
            if message_model.message_id == 0:
                # New thread.
                if thread_model.entity_type == feconf.ENTITY_TYPE_EXPLORATION:
                    event_services.FeedbackThreadCreatedEventHandler.record(
                        thread_model.entity_id)
            else:
                # Thread status changed.
                if thread_model.entity_type == feconf.ENTITY_TYPE_EXPLORATION:
                    (event_services.FeedbackThreadStatusChangedEventHandler.
                     record(thread_model.entity_id, thread_model.status,
                            updated_status))
        if updated_subject:
            message_model.updated_subject = updated_subject
    feedback_models.GeneralFeedbackMessageModel.put_multi(message_models)

    # Update the message data cache of the threads.
    for thread_model in thread_models:
        thread_model.message_count += 1
        if text:
            thread_model.last_nonempty_message_text = text
            thread_model.last_nonempty_message_author_id = author_id

    # We do a put() even if the status and subject are not updated, so that the
    # last_updated time of the threads reflects the last time a message was
    # added to it.
    old_statuses = [thread_model.status for thread_model in thread_models]
    new_statuses = old_statuses
    if updated_status or updated_subject:
        new_statuses = []
        for index, thread_model in enumerate(thread_models):
            # Can't be the first thread.
            if message_ids[index] != 0:
                if updated_status and (updated_status != thread_model.status):
                    thread_model.status = updated_status
                if updated_subject and (updated_subject !=
                                        thread_model.subject):
                    thread_model.subject = updated_subject
            new_statuses.append(thread_model.status)
    feedback_models.GeneralFeedbackThreadModel.put_multi(thread_models)

    # For each thread, we do a put on the suggestion linked (if it exists) to
    # the thread, so that the last_updated time changes to show that there is
    # activity in the thread.
    thread_ids_that_have_linked_suggestions = []
    for thread_model in thread_models:
        if thread_model.has_suggestion:
            thread_ids_that_have_linked_suggestions.append(thread_model.id)
    general_suggestion_models = (
        suggestion_models.GeneralSuggestionModel.get_multi(
            thread_ids_that_have_linked_suggestions))
    suggestion_models_to_update = []
    for suggestion_model in general_suggestion_models:
        # As the thread is created before the suggestion, for the first message
        # we need not update the suggestion.
        if suggestion_model:
            suggestion_models_to_update.append(suggestion_model)
    suggestion_models.GeneralSuggestionModel.put_multi(
        suggestion_models_to_update)

    if (feconf.CAN_SEND_EMAILS
            and (feconf.CAN_SEND_FEEDBACK_MESSAGE_EMAILS
                 and user_services.is_user_registered(author_id))):
        for index, thread_model in enumerate(thread_models):
            _add_message_to_email_buffer(author_id,
                                         thread_model.id, message_ids[index],
                                         len(text), old_statuses[index],
                                         new_statuses[index])

    if author_id:
        subscription_services.subscribe_to_threads(author_id, thread_ids)
        add_message_ids_to_read_by_list(author_id, message_identifiers)

    # Convert the GeneralFeedbackMessageModels into a list of FeedbackMessage
    # domain objects.
    feedback_messages = [
        _get_message_from_model(message_model)
        for message_model in message_models
    ]

    return feedback_messages
Example #14
0
def memoize(func):
    """Decorator which provides thread-safe, cached-access to the return values
    of function calls.

    NOTE: This function uses dicts to manage the cache. This means that all
    values provided as arguments to func *must be hashable!*

    Args:
        func: callable. The callable function that is going to be run in
            thread-safe, cached-access environment.

    Returns:
        callable. The same func, but calls to it using the same arguments are
        made exactly once.
    """
    key_locks = {}
    lock_for_key_locks = threading.Lock()

    def threadsafe_access(key):
        """Returns a threading.Lock unique to the given key.

        Args:
            key: *. A hashable value.

        Returns:
            threading.Lock. A lock unique to the given key.
        """
        # Use double-checked locking to prevent race-conditions.
        if key not in key_locks:
            with lock_for_key_locks:
                if key not in key_locks:
                    key_locks[key] = threading.Lock()
        return key_locks[key]

    cache = {}

    def get_from_cache(key, factory):
        """Returns and associates a factory-provided value to the given key if a
        value isn't associated to it yet. Otherwise, returns the pre-existing
        associated value.

        Args:
            key: *. A hashable value.
            factory: callable. A value producer that takes no arguments.

        Returns:
            *. The result of factory(), or the last value to be associated to
            key.
        """
        if key in cache:
            return cache[key]
        with threadsafe_access(key):
            if key not in cache:
                cache[key] = factory()
        return cache[key]

    # In order to allow calls to functions with default arguments to use the
    # same hash as calls which explicitly supply them, we fetch those default
    # values and use them to build the kwargs that func will actually see.
    arg_names, _, _, defaults = inspect.getargspec(func)
    defaults = defaults if defaults is not None else ()
    default_func_kwargs = dict(
        python_utils.ZIP(arg_names[-len(defaults):], defaults))

    @functools.wraps(func)
    def memoized_func(*args, **kwargs):
        """The same func, but calls to it using the same argument values are
        made exactly once.

        Returns:
            function(*). The value of func(*args, **kwargs).
        """
        func_kwargs = default_func_kwargs.copy()
        func_kwargs.update(kwargs)
        key = (tuple(args), tuple(sorted(func_kwargs.items())))
        return get_from_cache(key, lambda: func(*args, **kwargs))

    return memoized_func
    def map(user_settings_model):
        user_id = user_settings_model.id
        user_dates_list = [
            (
                'UserSettingsModel_created_on',
                user_settings_model.created_on
            ),
            (
                'UserSettingsModel_last_updated',
                user_settings_model.last_updated
            ),
            (
                'UserSettingsModel_last_agreed_to_terms',
                user_settings_model.last_agreed_to_terms
            ),
            (
                'UserSettingsModel_last_started_state_editor_tutorial',
                user_settings_model.last_started_state_editor_tutorial
            ),
            (
                'UserSettingsModel_last_started_state_translation_tutorial',
                user_settings_model.last_started_state_translation_tutorial
            ),
            (
                'UserSettingsModel_last_logged_in',
                user_settings_model.last_logged_in
            ),
            (
                'UserSettingsModel_last_edited_an_exploration',
                user_settings_model.last_edited_an_exploration
            ),
            (
                'UserSettingsModel_last_created_an_exploration',
                user_settings_model.last_created_an_exploration
            ),
        ]

        if user_settings_model.first_contribution_msec is not None:
            user_dates_list.append(
                (
                    'UserSettingsModel_first_contribution_msec',
                    datetime.datetime.fromtimestamp(
                        python_utils.divide(
                            user_settings_model.first_contribution_msec, 1000.0)
                    )
                )
            )

        # Models in user storage module having user_id as an attribute.
        exploration_user_data_model = (
            user_models.ExplorationUserDataModel.query(
                user_models.ExplorationUserDataModel.user_id == user_id).get()
        )

        all_models_linked_with_user_settings_model = [
            ('ExplorationUserDataModel', exploration_user_data_model)
        ]

        # Models in user storage module keyed by user_id.
        model_names_and_ids_to_be_fetched_in_batch = [
            ('UserContributionsModel', [user_id]),
            ('UserEmailPreferencesModel', [user_id]),
            ('UserStatsModel', [user_id]),
            ('UserSubscriptionsModel', [user_id]),
        ]
        fetched_batch_models = (
            datastore_services.fetch_multiple_entities_by_ids_and_models(
                model_names_and_ids_to_be_fetched_in_batch)
        )

        for model_name_tuple, model_list in list(python_utils.ZIP(
                model_names_and_ids_to_be_fetched_in_batch,
                fetched_batch_models)):
            model_name = model_name_tuple[0]
            actual_model = model_list[0]
            all_models_linked_with_user_settings_model.append(
                (model_name, actual_model)
            )

        for model_name, model in all_models_linked_with_user_settings_model:
            if model is not None:
                user_dates_list.append(
                    (
                        model_name + python_utils.UNICODE('_last_updated'),
                        model.last_updated
                    )
                )
                user_dates_list.append(
                    (
                        model_name + python_utils.UNICODE('_created_on'),
                        model.created_on
                    )
                )
                if model_name == 'UserSubscriptionsModel':
                    user_dates_list.append(
                        (
                            'UserSubscriptionsModel_last_checked',
                            model.last_checked
                        )
                    )
                if model_name == 'ExplorationUserDataModel':
                    user_dates_list.append(
                        (
                            'ExplorationUserDataModel_rated_on',
                            model.rated_on
                        )
                    )
                    user_dates_list.append(
                        (
                            'ExplorationUserDataModel_draft_change_list_last_'
                            'updated',
                            model.draft_change_list_last_updated
                        )
                    )

        filtered_user_dates_list = [
            (attribute_name, date) for attribute_name, date in user_dates_list
            if date is not None
        ]
        model_name, min_date = min(filtered_user_dates_list, key=lambda x: x[1])
        time_delta_for_correctness = datetime.timedelta(minutes=5)
        if user_settings_model.created_on - min_date > (
                time_delta_for_correctness):
            yield (
                'ERROR_NEED_TO_UPDATE_USING_' + python_utils.UNICODE(
                    model_name), user_id)
        else:
            yield ('SUCCESS_ALREADY_UP_TO_DATE', 1)
    def map(user_settings_model):
        user_id = user_settings_model.id
        user_dates_list = [
            (
                'UserSettingsModel_last_updated',
                user_settings_model.last_updated
            ),
            (
                'UserSettingsModel_last_agreed_to_terms',
                user_settings_model.last_agreed_to_terms
            ),
            (
                'UserSettingsModel_last_started_state_editor_tutorial',
                user_settings_model.last_started_state_editor_tutorial
            ),
            (
                'UserSettingsModel_last_started_state_translation_tutorial',
                user_settings_model.last_started_state_translation_tutorial
            ),
            (
                'UserSettingsModel_last_logged_in',
                user_settings_model.last_logged_in
            ),
            (
                'UserSettingsModel_last_edited_an_exploration',
                user_settings_model.last_edited_an_exploration
            ),
            (
                'UserSettingsModel_last_created_an_exploration',
                user_settings_model.last_created_an_exploration
            ),
        ]

        if user_settings_model.first_contribution_msec is not None:
            user_dates_list.append(
                (
                    'UserSettingsModel_first_contribution_msec',
                    datetime.datetime.fromtimestamp(
                        python_utils.divide(
                            user_settings_model.first_contribution_msec, 1000.0)
                    )
                )
            )

        # Models in user storage module having user_id as an attribute.
        exploration_user_data_model = (
            user_models.ExplorationUserDataModel.query(
                user_models.ExplorationUserDataModel.user_id == user_id).get()
        )

        all_models_linked_with_user_settings_model = [
            ('ExplorationUserDataModel', exploration_user_data_model)
        ]

        # Models in user storage module keyed by user_id itself.
        model_names_and_ids_to_be_fetched_in_batch = [
            ('UserContributionsModel', [user_id]),
            ('UserEmailPreferencesModel', [user_id]),
            ('UserStatsModel', [user_id]),
            ('UserSubscriptionsModel', [user_id]),
        ]
        fetched_batch_models = (
            datastore_services.fetch_multiple_entities_by_ids_and_models(
                model_names_and_ids_to_be_fetched_in_batch)
        )

        for model_name_tuple, model_list in list(python_utils.ZIP(
                model_names_and_ids_to_be_fetched_in_batch,
                fetched_batch_models)):
            model_name = model_name_tuple[0]
            actual_model = model_list[0]
            all_models_linked_with_user_settings_model.append(
                (model_name, actual_model)
            )

        for model_name, model in all_models_linked_with_user_settings_model:
            if model is not None:
                user_dates_list.append(
                    (
                        model_name + python_utils.UNICODE('_last_updated'),
                        model.last_updated
                    )
                )
                user_dates_list.append(
                    (
                        model_name + python_utils.UNICODE('_created_on'),
                        model.created_on
                    )
                )
                if model_name == 'UserSubscriptionsModel':
                    user_dates_list.append(
                        (
                            'UserSubscriptionsModel_last_checked',
                            model.last_checked
                        )
                    )
                if model_name == 'ExplorationUserDataModel':
                    user_dates_list.append(
                        (
                            'ExplorationUserDataModel_rated_on',
                            model.rated_on
                        )
                    )
                    user_dates_list.append(
                        (
                            'ExplorationUserDataModel_draft_change_list_last_'
                            'updated',
                            model.draft_change_list_last_updated
                        )
                    )

        filtered_user_dates_list = [
            (attribute_name, date) for attribute_name, date in user_dates_list
            if date is not None
        ]
        model_name, min_date = min(filtered_user_dates_list, key=lambda x: x[1])
        time_delta_for_update = datetime.timedelta(minutes=5)

        # This method for converting date_time_string to datettime object has
        # also been used here:
        # https://github.com/oppia/oppia/blob/d394b6a186acc74b5ec9c3fecc20cc3f1954f441/utils.py#L479
        correction_cutoff_timestamp = datetime.datetime.strptime(
            'Jul 1 2020', '%b %d %Y')
        if user_settings_model.created_on - min_date > time_delta_for_update:
            user_settings_model.update_timestamps(
                update_last_updated_time=False)
            user_settings_model.created_on = min_date
            user_settings_model.put()
            yield (
                'SUCCESS_UPDATED_USING_' + python_utils.UNICODE(model_name), 1)

            # Yield an additional error key for user_models created after
            # cutoff date July 1, 2020 and having a discrepancy in their
            # created_on.
            if min_date >= correction_cutoff_timestamp:
                yield ('ERROR_NOT_UP_TO_DATE_USER', user_id)
        else:
            yield ('SUCCESS_ALREADY_UP_TO_DATE', 1)
Example #17
0
    def run(self):
        """Returns a PCollection of audit errors aggregated from all models.

        Returns:
            PCollection. A PCollection of audit errors discovered during the
            audit.

        Raises:
            ValueError. When the `model_getter` option, which should be the type
                of PTransform we will use to fetch models from the datastore, is
                None.
        """
        if self.job_options.model_getter is None:
            raise ValueError('JobOptions.model_getter must not be None')

        existing_models, deleted_models = (
            self.pipeline
            | 'Get all models' >> self.job_options.model_getter()
            | 'Partition by model.deleted' >>
            (beam.Partition(lambda model, _: int(model.deleted), 2)))

        models_of_kind_by_index = (
            existing_models
            # NOTE: Partition returns a statically-sized list of PCollections.
            # Creating partitions is wasteful when there are fewer items than
            # there are partitions, like in our unit tests. In exchange, in
            # production the job will be able to take advantage of the high
            # parallelizability of PCollections, which are designed for enormous
            # datasets and parallel processing.
            #
            # Alternatively, we could have used GroupBy. However, that returns
            # an _iterable_ of items rather than a PCollection, and so it is
            # vulnerable to out-of-memory errors.
            #
            # Since this job is concerned with running audits on EVERY MODEL IN
            # STORAGE, Partition is the clear winner regardless of the overhead
            # we'll see in unit tests.
            |
            'Split models into parallelizable PCollections' >> beam.Partition(
                lambda m, _, kinds: kinds.index(job_utils.get_model_kind(m)),
                # NOTE: Partition requires a hard-coded number of slices; it
                # cannot be used with dynamic numbers generated in a pipeline.
                # KIND_BY_INDEX is a constant tuple so that requirement is
                # satisfied in this case.
                len(KIND_BY_INDEX),
                KIND_BY_INDEX))

        audit_error_pcolls = [
            deleted_models
            | 'Apply ValidateDeletedModel on deleted models' >>
            (beam.ParDo(base_model_audits.ValidateDeletedModel()))
        ]

        model_groups = python_utils.ZIP(KIND_BY_INDEX, models_of_kind_by_index)
        for kind, models_of_kind in model_groups:
            # NOTE: Using extend() instead of append() because ApplyAuditDoFns
            # produces an iterable of PCollections rather than a single one.
            # NOTE: Label is missing because ApplyAuditDoFns labels itself.
            audit_error_pcolls.extend(models_of_kind | ApplyAuditDoFns(kind))

        return audit_error_pcolls | 'Combine audit results' >> beam.Flatten()
Example #18
0
    def reduce(key, values):
        if key == POPULATED_KEY:
            yield (AUDIT_KEY, len(values))
            return
        elif key == SYSTEM_COMMITTER_ACK:
            yield (SYSTEM_COMMITTER_ACK, values)
            return

        try:
            # NOTE: "app" is the term Firebase uses for the "entry point" to the
            # Firebase SDK. Oppia only has one server, so it only needs to
            # instantiate one app.
            firebase_connection = firebase_admin.initialize_app()
        except Exception as exception:
            yield (WARNING_KEY, repr(exception))
            return

        # NOTE: This is only sorted to make unit testing easier.
        user_fields = sorted(ast.literal_eval(v) for v in values)
        user_records = [
            firebase_auth.ImportUserRecord(uid=auth_id,
                                           email=email,
                                           email_verified=True)
            for auth_id, _, email in user_fields
        ]

        # The Firebase Admin SDK places a hard-limit on the number of users that
        # can be "imported" in a single call. To compensate, we break up the
        # users into chunks.
        offsets = python_utils.RANGE(0, len(user_records),
                                     MAX_USERS_FIREBASE_CAN_IMPORT_PER_CALL)
        results = (_populate_firebase(
            [record for record in record_group if record])
                   for record_group in _grouper(
                       user_records, MAX_USERS_FIREBASE_CAN_IMPORT_PER_CALL))

        assocs_to_create = []
        for offset, (result, exception) in python_utils.ZIP(offsets, results):
            if exception is not None:
                yield (FAILURE_KEY, repr(exception))
            else:
                successful_indices = set(
                    python_utils.RANGE(result.success_count +
                                       result.failure_count))
                for error in result.errors:
                    successful_indices.remove(error.index)
                    debug_info = (
                        'Import user_id=%r failed: %s' %
                        (user_fields[offset + error.index][1], error.reason))
                    yield (FAILURE_KEY, debug_info)
                assocs_to_create.extend(
                    auth_domain.AuthIdUserIdPair(*user_fields[offset + i][:2])
                    for i in successful_indices)

        if assocs_to_create:
            firebase_auth_services.associate_multi_auth_ids_with_user_ids(
                assocs_to_create)
            yield (SUCCESS_KEY, len(assocs_to_create))

        try:
            # NOTE: This is not dangerous. We are just deleting the resources
            # used to form a connection to Firebase servers.
            firebase_admin.delete_app(firebase_connection)
        except Exception as exception:
            yield (WARNING_KEY, repr(exception))
Example #19
0
def get_thread_summaries(user_id, thread_ids):
    """Returns a list of summaries corresponding to each of the threads given.
    It also returns the number of threads that are currently not read by the
    user.

    Args:
        user_id: str. The id of the user.
        thread_ids: list(str). The ids of the threads for which we have to fetch
            the summaries.

    Returns:
        tuple(thread_summaries, number_of_unread_threads), where:
            thread_summaries: list(FeedbackThreadSummary).
            number_of_unread_threads: int. The number of threads not read by the
                user.
    """
    thread_user_ids = [
        feedback_models.GeneralFeedbackThreadUserModel.generate_full_id(
            user_id, thread_id) for thread_id in thread_ids
    ]
    thread_exp_ids = [
        get_exp_id_from_thread_id(thread_id) for thread_id in thread_ids
    ]

    thread_models, thread_user_models, thread_exp_models = (
        datastore_services.fetch_multiple_entities_by_ids_and_models([
            ('GeneralFeedbackThreadModel', thread_ids),
            ('GeneralFeedbackThreadUserModel', thread_user_ids),
            ('ExplorationModel', thread_exp_ids),
        ]))

    threads = [_get_thread_from_model(m) for m in thread_models]
    flat_last_two_message_models = (
        feedback_models.GeneralFeedbackMessageModel.get_multi(
            itertools.chain.from_iterable(t.get_last_two_message_ids()
                                          for t in threads)))
    last_two_message_models_of_threads = [
        flat_last_two_message_models[i:i + 2]
        for i in python_utils.RANGE(0, len(flat_last_two_message_models), 2)
    ]

    thread_summaries = []
    number_of_unread_threads = 0
    for thread, last_two_message_models, user_model, exp_model in (
            python_utils.ZIP(threads, last_two_message_models_of_threads,
                             thread_user_models, thread_exp_models)):
        last_message_model, second_last_message_model = last_two_message_models
        message_ids_read_by_user = (() if user_model is None else
                                    user_model.message_ids_read_by_user)

        # The last message is never None because all threads have at least one
        # message.
        last_message_is_read = (last_message_model.message_id
                                in message_ids_read_by_user)
        author_last_message = (last_message_model.author_id
                               and user_services.get_username(
                                   last_message_model.author_id))

        # The second-to-last message, however, can be None.
        second_last_message_is_read = (second_last_message_model is not None
                                       and second_last_message_model.message_id
                                       in message_ids_read_by_user)
        author_second_last_message = (second_last_message_model
                                      and second_last_message_model.author_id
                                      and user_services.get_username(
                                          second_last_message_model.author_id))

        if not last_message_is_read:
            number_of_unread_threads += 1
        thread_summaries.append(
            feedback_domain.FeedbackThreadSummary(
                thread.status, thread.original_author_id, thread.last_updated,
                last_message_model.text, thread.message_count,
                last_message_is_read, second_last_message_is_read,
                author_last_message, author_second_last_message,
                exp_model.title, exp_model.id, thread.id))
    return thread_summaries, number_of_unread_threads