Beispiel #1
0
    def map(item):
        if item.deleted:
            return
        err_dict = {}

        try:
            exploration = exp_fetchers.get_exploration_from_model(item)
        except Exception as e:
            yield ('Error %s when loading exploration' %
                   python_utils.UNICODE(e), [item.id])
            return

        html_list = exploration.get_all_html_content_strings()
        err_dict = html_validation_service.validate_customization_args(
            html_list)
        for key in err_dict:
            err_value_with_exp_id = err_dict[key]
            err_value_with_exp_id.append('Exp ID: %s' % item.id)
            yield (key, err_value_with_exp_id)
Beispiel #2
0
def save_hashes_to_file(file_hashes):
    """Return JS code that loads hashes needed for frontend into variable.

    Args:
        file_hashes: dict(str, str). Dictionary with filepaths as keys and
            hashes of file content as values.

    Returns:
        str. JS code loading hashes as JSON into variable.
    """
    # Only some of the hashes are needed in the frontend.
    filtered_hashes = filter_hashes(file_hashes)

    ensure_directory_exists(HASHES_JSON_FILEPATH)
    with python_utils.open_file(HASHES_JSON_FILEPATH, 'w+') as hashes_json_file:
        hashes_json_file.write(
            python_utils.UNICODE(
                json.dumps(filtered_hashes, ensure_ascii=False)))
        hashes_json_file.write(u'\n')
Beispiel #3
0
def run_job_sync(
        job_name: str,
        job_args: List[str],
        namespace: Optional[str] = None) -> beam_job_domain.BeamJobRun:
    """Runs the specified job synchronously.

    In other words, the function will wait for the job to finish running before
    returning a value.

    Args:
        job_name: str. The name of the job to run.
        job_args: list(str). The arguments to the job's run() method.
        namespace: str. The namespace in which models should be created.

    Returns:
        BeamJobRun. Contains metadata related to the execution status of the
        job.
    """
    job_pipeline = beam.Pipeline(
        runner=runners.DirectRunner(),
        options=job_options.JobOptions(namespace=namespace))
    job_class = registry.get_job_class_by_name(job_name)

    job = job_class(job_pipeline)
    run_model = beam_job_services.create_beam_job_run_model(job_name, job_args)

    try:
        with job_pipeline:
            unused_pdone = job.run(*job_args) | job_io.PutResults(run_model.id)
    except Exception as exception:
        run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value
        # If the pipeline fails to put the results into storage, then we'll
        # explicitly write them to storage by using the caught exception.
        result_model = beam_job_services.create_beam_job_run_result_model(
            run_model.id, '', python_utils.UNICODE(exception))
        result_model.put()
    else:
        run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value
    finally:
        run_model.put()

    return beam_job_services.get_beam_job_run_from_model(run_model)
def add_svg_filenames_for_latex_strings_in_html_string(
        raw_latex_to_image_data_dict, html_string):
    """Adds the filenames for math rich-text components with empty svg_filename
    field based on the given images data.

    TODO(#10045): Remove this function once all the math-rich text components in
    explorations have a valid math SVG stored in the datastore.

    Args:
        raw_latex_to_image_data_dict: dict(str, LatexStringSvgImageData). The
            dictionary having the key as a LaTeX string and the corresponding
            value as the SVG image data for that LaTeX string.
        html_string: str. HTML string to modify.

    Returns:
        str. Updated HTML string with all Math component tags having a filename.
    """

    soup = bs4.BeautifulSoup(html_string.encode(encoding='utf-8'),
                             'html.parser')
    for math_tag in soup.findAll(name='oppia-noninteractive-math'):
        math_content_dict = (json.loads(
            unescape_html(math_tag['math_content-with-value'])))
        raw_latex = (objects.UnicodeString.normalize(
            math_content_dict['raw_latex']))
        svg_filename = (objects.UnicodeString.normalize(
            math_content_dict['svg_filename']))
        if svg_filename == '' and (raw_latex
                                   in raw_latex_to_image_data_dict.keys()):
            dimensions = (raw_latex_to_image_data_dict[raw_latex].
                          latex_string_svg_image_dimensions)
            filename = (generate_math_svgs_filename(dimensions))
            math_content_dict = {
                'raw_latex': raw_latex,
                'svg_filename': objects.UnicodeString.normalize(filename)
            }
            normalized_math_content_dict = (
                objects.MathExpressionContent.normalize(math_content_dict))
            math_tag['math_content-with-value'] = (escape_html(
                json.dumps(normalized_math_content_dict, sort_keys=True)))

    return python_utils.UNICODE(soup)
Beispiel #5
0
    def test_recursively_convert_to_str_with_nested_structure(self):
        test_var_1_in_unicode = python_utils.UNICODE('test_var_1')
        test_list_1 = [
            test_var_1_in_unicode, test_var_1_in_unicode.encode('utf-8'),
            'test_var_2', b'test_var_3', {'test_var_4': b'test_var_5'}]
        test_dict = {test_var_1_in_unicode: test_list_1}
        self.assertEqual(
            test_dict,
            {
                'test_var_1': [
                    'test_var_1', b'test_var_1', 'test_var_2', b'test_var_3',
                    {'test_var_4': b'test_var_5'}]
            }
        )

        dict_in_str = python_utils._recursively_convert_to_str(test_dict)  # pylint: disable=protected-access
        self.assertEqual(
            dict_in_str,
            {
                'test_var_1': [
                    'test_var_1', b'test_var_1', 'test_var_2', 'test_var_3',
                    {'test_var_4': 'test_var_5'}]
            }
        )

        for key, value in dict_in_str.items():
            self.assertNotEqual(type(key), future.types.newstr)
            self.assertNotEqual(type(key), future.types.newbytes)
            self.assertTrue(isinstance(key, unicode))

            for item in value:
                self.assertNotEqual(type(item), future.types.newstr)
                self.assertNotEqual(type(item), future.types.newbytes)
                self.assertTrue(isinstance(item, (unicode, bytes, dict)))

            for k, v in value[-1].items():
                self.assertNotEqual(type(k), future.types.newstr)
                self.assertNotEqual(type(k), future.types.newbytes)
                self.assertNotEqual(type(v), future.types.newstr)
                self.assertNotEqual(type(v), future.types.newbytes)
                self.assertEqual(type(k), unicode)
                self.assertEqual(type(v), bytes)
Beispiel #6
0
 def test_all_fields_have_export_policy(self):
     """Ensure every field in every model has an export policy defined."""
     all_models = [
         clazz for clazz in test_utils.get_storage_model_classes()
         if (not clazz.__name__ in
             test_utils.BASE_MODEL_CLASSES_WITHOUT_DATA_POLICIES)
     ]
     for model in all_models:
         export_policy = model.get_export_policy()
         self.assertEqual(
             sorted([
                 python_utils.UNICODE(prop) for prop in model._properties
             ]),  # pylint: disable=protected-access
             sorted(export_policy.keys()))
         self.assertTrue(
             set(export_policy.values()).issubset({
                 base_models.EXPORT_POLICY.EXPORTED,
                 (base_models.EXPORT_POLICY.EXPORTED_AS_KEY_FOR_TAKEOUT_DICT
                  ), base_models.EXPORT_POLICY.NOT_APPLICABLE
             }))
def add_caption_attr_to_image(html_string):
    """Adds caption attribute to all oppia-noninteractive-image tags.

    Args:
        html_string: str. HTML string in which the caption attribute is to be
            added.

    Returns:
        str. Updated HTML string with the caption attribute for all
        oppia-noninteractive-image tags.
    """
    soup = bs4.BeautifulSoup(html_string.encode(encoding='utf-8'),
                             'html.parser')

    for image in soup.findAll(name='oppia-noninteractive-image'):
        attrs = image.attrs
        if 'caption-with-value' not in attrs:
            image['caption-with-value'] = escape_html(json.dumps(''))

    return python_utils.UNICODE(soup)
Beispiel #8
0
def gather_logs(start, stop='HEAD'):
    """Gathers the logs between the start and endpoint.

    Args:
        start: str. Tag, Branch or SHA1 of start point
        stop: str.  Tag, Branch or SHA1 of end point, defaults to HEAD

    Returns:
        list(Log): List of Logs.
    """
    get_logs_cmd = GIT_CMD_GET_LOGS_FORMAT_STRING.format(
        GROUP_SEP, start, stop)
    # The unicode conversion is required because there can be non-ascii
    # characters in the logs and it can result in breaking the flow
    # of release summary generation.
    out = python_utils.UNICODE(common.run_cmd(get_logs_cmd.split(' ')),
                               'utf-8').split('\x00')
    if len(out) == 1 and out[0] == '':
        return []
    else:
        return [Log(*line.strip().split(GROUP_SEP)) for line in out]
Beispiel #9
0
    def generate_new_blog_post_id(cls):
        """Generates a new blog post ID which is unique and is in the form of
        random hash of 12 chars.

        Returns:
            str. A blog post ID that is different from the IDs of all
            the existing blog posts.

        Raises:
            Exception. There were too many collisions with existing blog post
                IDs when attempting to generate a new blog post ID.
        """
        for _ in python_utils.RANGE(base_models.MAX_RETRIES):
            blog_post_id = utils.convert_to_hash(
                python_utils.UNICODE(
                    utils.get_random_int(base_models.RAND_RANGE)),
                base_models.ID_LENGTH)
            if not cls.get_by_id(blog_post_id):
                return blog_post_id
        raise Exception(
            'New blog post id generator is producing too many collisions.')
def convert_tag_contents_to_rte_format(html_data, rte_conversion_fn):
    """This function converts the rich text content within tabs and
    collapsible components to given RTE format. If the html_data
    does not contain tab or collapsible components it will do nothing.

    Args:
        html_data: str. The HTML string whose content is to be converted.
        rte_conversion_fn: function. The RTE conversion function for
            html strings.

    Returns:
        str. The HTML string with converted content within tag.
    """
    soup = bs4.BeautifulSoup(html_data.encode(encoding='utf-8'), 'html.parser')

    for collapsible in soup.findAll(name='oppia-noninteractive-collapsible'):
        # To ensure that collapsible tags have content-with-value attribute.
        if 'content-with-value' not in collapsible.attrs or (
                collapsible['content-with-value'] == ''):
            collapsible['content-with-value'] = escape_html(json.dumps(''))

        content_html = json.loads(
            unescape_html(collapsible['content-with-value']))
        collapsible['content-with-value'] = escape_html(
            json.dumps(rte_conversion_fn(content_html)))

        # To ensure that collapsible tags have heading-with-value attribute.
        if 'heading-with-value' not in collapsible.attrs:
            collapsible['heading-with-value'] = escape_html(json.dumps(''))

    for tabs in soup.findAll(name='oppia-noninteractive-tabs'):
        tab_content_json = unescape_html(tabs['tab_contents-with-value'])
        tab_content_list = json.loads(tab_content_json)
        for index, tab_content in enumerate(tab_content_list):
            tab_content_list[index]['content'] = rte_conversion_fn(
                tab_content['content'])
        tabs['tab_contents-with-value'] = escape_html(
            json.dumps(tab_content_list))

    return python_utils.UNICODE(soup)
Beispiel #11
0
    def _sort_and_slice_similarities(
            similarities: Iterable[Dict[str, Union[str, float]]]) -> List[str]:
        """Sorts similarities of explorations and slices them to
        a maximum length.

        Args:
            similarities:iterable(). Iterable of dictionaries. The structure of
                the dictionaries is:
                    exp_id: str. The ID of the similar exploration.
                    similarity_score: float. The similarity score for
                        the exploration.

        Returns:
            list(str). List of exploration IDs, sorted by the similarity.
        """
        sorted_similarities = sorted(similarities,
                                     reverse=True,
                                     key=lambda x: x['similarity_score'])
        return [
            python_utils.UNICODE(item['exp_id'])
            for item in sorted_similarities
        ][:MAX_RECOMMENDATIONS]
    def test_reply_to_id_flag(self):
        """Verifies that the reply_to_id flag is working properly."""
        mailgun_api = self.swap(feconf, 'MAILGUN_API_KEY', 'api')
        mailgun_domain = self.swap(feconf, 'MAILGUN_DOMAIN_NAME', 'domain')
        allow_emailing = self.swap(feconf, 'CAN_SEND_EMAILS', True)
        reply_id = 123

        # Lambda function, will replace post_to_mailgun().
        req_post_lambda = (lambda data=None: self.assertEqual(
            data['h:Reply-To'], 'reply+' + python_utils.UNICODE(reply_id) + '@'
            + feconf.INCOMING_EMAILS_DOMAIN_NAME))
        post_request = self.swap(mailgun_email_services, 'post_to_mailgun',
                                 req_post_lambda)

        with mailgun_api, mailgun_domain, post_request, allow_emailing:
            mailgun_email_services.send_mail(feconf.SYSTEM_EMAIL_ADDRESS,
                                             feconf.ADMIN_EMAIL_ADDRESS,
                                             'subject',
                                             'body',
                                             'html',
                                             bcc_admin=False,
                                             reply_to_id=reply_id)
def establish_firebase_connection():
    """Establishes the connection to Firebase needed by the rest of the SDK.

    All Firebase operations require an "app", the abstraction used for a
    Firebase server connection. The initialize_app() function raises an error
    when it's called more than once, however, so we make this function
    idempotent by trying to "get" the app first.

    Returns:
        firebase_admin.App. The App being by the Firebase SDK.

    Raises:
        Exception. The Firebase app has a genuine problem.
    """
    try:
        firebase_admin.get_app()
    except ValueError as error:
        if 'initialize_app' in python_utils.UNICODE(error):
            firebase_admin.initialize_app(
                options={'projectId': feconf.OPPIA_PROJECT_ID})
        else:
            raise
Beispiel #14
0
def permanently_delete_user_from_list(user_email: str) -> None:
    """Permanently deletes the user with the given email from the Mailchimp
    list.

    NOTE TO DEVELOPERS: This should only be called from the wipeout service
    since once a user is permanently deleted from mailchimp, they cannot be
    programmatically added back via their API (the user would have to manually
    resubscribe back).

    Args:
        user_email: str. Email ID of the user. Email is used to uniquely
            identify the user in the mailchimp DB.

    Raises:
        Exception. Any error raised by the mailchimp API.
    """
    client = _get_mailchimp_class()
    if not client:
        return None
    subscriber_hash = _get_subscriber_hash(user_email)

    try:
        client.lists.members.get(
            feconf.MAILCHIMP_AUDIENCE_ID, subscriber_hash)
        client.lists.members.delete_permanent(
            feconf.MAILCHIMP_AUDIENCE_ID, subscriber_hash)
    except mailchimpclient.MailChimpError as error:
        # This has to be done since the message can only be accessed from
        # MailChimpError by error.message in Python2, but this is deprecated in
        # Python3.
        # In Python3, the message can be accessed directly by KeyError
        # (https://github.com/VingtCinq/python-mailchimp/pull/65), so as a
        # workaround for Python2, the 'message' attribute is obtained by
        # str() and then it is converted to dict. This works in Python3 as well.
        error_message = ast.literal_eval(python_utils.UNICODE(error))
        # Ignore if the error corresponds to "User does not exist".
        if error_message['status'] != 404:
            raise Exception(error_message['detail'])
Beispiel #15
0
    def _fetch_field_name_to_external_model_references(cls, item):
        """Fetch external models based on _get_external_id_relationships.

        This should be called before we call other _validate methods.

        Args:
            item: datastore_services.Model. Entity to validate.
        """
        multiple_models_ids_to_fetch = {}

        try:
            for external_model_fetcher_details in (
                    cls._get_external_id_relationships(item)):
                multiple_models_ids_to_fetch[
                    external_model_fetcher_details.field_name] = (
                        external_model_fetcher_details.model_class,
                        external_model_fetcher_details.model_ids)
        except utils.ValidationError as err:
            cls._add_error(
                ERROR_CATEGORY_INVALID_USER_SETTING_IDS,
                'Entity id %s: %s' % (item.id, python_utils.UNICODE(err)))
        else:
            fetched_model_instances_for_all_ids = (
                datastore_services.fetch_multiple_entities_by_ids_and_models(
                    list(multiple_models_ids_to_fetch.values())))

            for index, field_name in enumerate(multiple_models_ids_to_fetch):
                (model_class, model_ids) = (
                    multiple_models_ids_to_fetch[field_name])
                fetched_model_instances = fetched_model_instances_for_all_ids[
                    index]

                for (model_id, model_instance) in python_utils.ZIP(
                        model_ids, fetched_model_instances):
                    cls.field_name_to_external_model_references[
                        field_name].append(
                            ExternalModelReference(
                                model_class, model_id, model_instance))
Beispiel #16
0
def _create_user_in_mailchimp_db(user_email: str) -> bool:
    """Creates a new user in the mailchimp database and handles the case where
    the user was permanently deleted from the database.

    Args:
        user_email: str. Email ID of the user. Email is used to uniquely
            identify the user in the mailchimp DB.

    Returns:
        bool. Whether the user was successfully added to the db. (This will be
        False if the user was permanently deleted earlier and therefore cannot
        be added back.)

    Raises:
        Exception. Any error (other than the one mentioned below) raised by the
            mailchimp API.
    """
    post_data = {
        'email_address': user_email,
        'status': 'subscribed'
    }
    client = _get_mailchimp_class()

    try:
        client.lists.members.create(feconf.MAILCHIMP_AUDIENCE_ID, post_data)
    except mailchimpclient.MailChimpError as error:
        error_message = ast.literal_eval(python_utils.UNICODE(error))
        # This is the specific error message returned for the case where the
        # user was permanently deleted from the Mailchimp database earlier.
        # This was found by experimenting with the MailChimp API. Note that the
        # error reference
        # (https://mailchimp.com/developer/marketing/docs/errors/) is not
        # comprehensive, since, under status 400, they only list a subset of the
        # common error titles.
        if error_message['title'] == 'Forgotten Email Not Subscribed':
            return False
        raise Exception(error_message['detail'])
    return True
Beispiel #17
0
def _job_bookkeeping_context(
    job_name: str
) -> Iterator[beam_job_models.BeamJobRunModel]:
    """Returns a context manager which commits failure details if an exception
    occurs.

    Args:
        job_name: str. The name of the job.

    Yields:
        BeamJobRunModel. The bookkeeping model used to record execution details.
    """
    run_model = beam_job_services.create_beam_job_run_model(job_name)

    try:
        yield run_model

    except Exception as exception:
        run_model.latest_job_state = beam_job_models.BeamJobState.FAILED.value
        _put_job_stderr(run_model.id, python_utils.UNICODE(exception))

    finally:
        run_model.put()
Beispiel #18
0
    def _get_new_id(cls):
        """Generates a unique ID for the question in the form of random hash
        of 12 chars.

        Returns:
            new_id: int. ID of the new QuestionModel instance.

        Raises:
            Exception. The ID generator for QuestionModel is
                producing too many collisions.
        """

        for _ in python_utils.RANGE(base_models.MAX_RETRIES):
            new_id = utils.convert_to_hash(
                python_utils.UNICODE(
                    utils.get_random_int(base_models.RAND_RANGE)),
                base_models.ID_LENGTH)
            if not cls.get_by_id(new_id):
                return new_id

        raise Exception(
            'The id generator for QuestionModel is producing too many '
            'collisions.')
Beispiel #19
0
    def run(self, job_id, job_class_str, output):
        """Extracts the results of a MR job and registers its completion.

        Args:
            job_id: str. The ID of the job to run.
            job_class_str: str. Should uniquely identify each type of job.
            output: str. The output produced by the job.
        """
        job_class = mapreduce_util.for_name(job_class_str)

        try:
            iterator = input_readers.GoogleCloudStorageInputReader(output, 0)
            results_list = []
            for item_reader in iterator:
                for item in item_reader:
                    results_list.append(json.loads(item))
            job_class.register_completion(job_id, results_list)
        except Exception as e:
            logging.exception('Job %s failed at %s' %
                              (job_id, utils.get_current_time_in_millisecs()))
            job_class.register_failure(
                job_id,
                '%s\n%s' % (python_utils.UNICODE(e), traceback.format_exc()))
    def map(item):
        if item.deleted:
            return

        exploration = exp_fetchers.get_exploration_from_model(item)
        error_messages = []
        for _, state in exploration.states.items():
            if state.interaction.id is None:
                continue
            try:
                ca_specs = (
                    interaction_registry.Registry.get_interaction_by_id(
                        state.interaction.id).customization_arg_specs
                )

                customization_args_dict = {}
                for ca_name in state.interaction.customization_args:
                    customization_args_dict[ca_name] = (
                        state.interaction.customization_args[
                            ca_name].to_customization_arg_dict()
                    )

                customization_args_util.validate_customization_args_and_values(
                    'interaction',
                    state.interaction.id,
                    customization_args_dict,
                    ca_specs,
                    fail_on_validation_errors=True
                )
            except Exception as e:
                error_messages.append(
                    '%s: %s' % (state.interaction.id, python_utils.UNICODE(e)))

        if error_messages:
            yield (
                'Failed customization args validation for exp '
                'id %s' % item.id, ', '.join(error_messages))
def get_extra_commits_in_new_release(base_commit, repo):
    """Gets extra commits in the new release.

    Args:
        base_commit: str. The base commit common between current branch and the
            latest release.
        repo: github.Repository.Repository. The PyGithub object for the repo.

    Returns:
        list(github.Commit.Commit). List of commits from the base commit up to
        the current commit, which haven't been cherrypicked already.
    """
    get_commits_cmd = GIT_CMD_TEMPLATE_GET_NEW_COMMITS % base_commit
    out = python_utils.UNICODE(common.run_cmd(get_commits_cmd.split(' ')),
                               'utf-8').split('\n')
    commits = []
    for line in out:
        # Lines that start with a - are already cherrypicked. The commits of
        # interest are on lines that start with +.
        if line[0] == '+':
            line = line[2:]
            commit = repo.get_commit(line[:line.find(' ')])
            commits.append(commit)
    return commits
Beispiel #22
0
 def mock_print(*args):
     """Mock for python_utils.PRINT."""
     self.log = ' '.join(python_utils.UNICODE(arg) for arg in args)
Beispiel #23
0
    def delete_multi(cls,
                     entity_ids,
                     committer_id,
                     commit_message,
                     force_deletion=False):
        """Deletes the given cls instancies with the given entity_ids.

        Args:
            entity_ids: list(str). Ids of entities to delete.
            committer_id: str. The user_id of the user who committed the change.
            commit_message: str. The commit description message.
            force_deletion: bool. If True these models are deleted completely
                from storage, otherwise there are only marked as deleted.
                Default is False.

        Raises:
            Exception: This model instance has been already deleted.
        """
        versioned_models = cls.get_multi(entity_ids)
        if force_deletion:
            all_models_metadata_keys = []
            all_models_content_keys = []
            for model in versioned_models:
                model_version_numbers = [
                    python_utils.UNICODE(num + 1)
                    for num in python_utils.RANGE(model.version)
                ]
                model_snapshot_ids = [
                    model.get_snapshot_id(model.id, version_number)
                    for version_number in model_version_numbers
                ]

                all_models_metadata_keys.extend([
                    ndb.Key(model.SNAPSHOT_METADATA_CLASS, snapshot_id)
                    for snapshot_id in model_snapshot_ids
                ])
                all_models_content_keys.extend([
                    ndb.Key(model.SNAPSHOT_CONTENT_CLASS, snapshot_id)
                    for snapshot_id in model_snapshot_ids
                ])
            versioned_models_keys = [model.key for model in versioned_models]
            transaction_services.run_in_transaction(
                ndb.delete_multi, all_models_metadata_keys +
                all_models_content_keys + versioned_models_keys)
        else:
            for model in versioned_models:
                model._require_not_marked_deleted()  # pylint: disable=protected-access
                model.deleted = True

            commit_cmds = [{'cmd': cls.CMD_DELETE_COMMIT}]
            snapshot_metadata_models = []
            snapshot_content_models = []
            for model in versioned_models:
                model.version += 1
                snapshot = model.compute_snapshot()
                snapshot_id = model.get_snapshot_id(model.id, model.version)

                snapshot_metadata_models.append(
                    model.SNAPSHOT_METADATA_CLASS.create(
                        snapshot_id, committer_id, cls._COMMIT_TYPE_DELETE,
                        commit_message, commit_cmds))
                snapshot_content_models.append(
                    model.SNAPSHOT_CONTENT_CLASS.create(snapshot_id, snapshot))

            transaction_services.run_in_transaction(
                BaseModel.put_multi, snapshot_metadata_models +
                snapshot_content_models + versioned_models)
Beispiel #24
0
    def map(snapshot_model):
        job_class = AddMissingCommitLogsOneOffJob
        model_class_name = snapshot_model.__class__.__name__
        model_id, version_str = snapshot_model.id.rsplit('-', 1)
        model_properties = job_class.MODEL_NAMES_TO_PROPERTIES[
            model_class_name]
        version = int(version_str)
        commit_log_id = (model_properties['id_string_format'] %
                         (model_id, version))
        commit_log_model = (model_properties['commit_log_model_class'].
                            get_by_id(commit_log_id))
        commit_logs_should_exist = True

        parent_model = (
            model_properties['parent_model_class'].get_by_id(model_id))
        if model_class_name == 'ExplorationRightsSnapshotMetadataModel':
            if snapshot_model.commit_type in ['create', 'delete']:
                commit_logs_should_exist = False

        if commit_log_model is not None or not commit_logs_should_exist:
            yield ('Found commit log model-%s' % model_class_name, 1)
            return

        if parent_model is None:
            yield ('Missing Parent Model-No changes-%s' % model_class_name, 1)
            return

        if parent_model.deleted:
            model_properties['delete_method'](feconf.SYSTEM_COMMITTER_ID,
                                              parent_model.id,
                                              force_deletion=True)
            yield ('SUCCESS-Parent model marked deleted-' +
                   'Deleted all related models-%s' % (model_class_name),
                   snapshot_model.id)
            return

        commit_log_model = model_properties['commit_log_model_class'](
            id=python_utils.UNICODE(commit_log_id),
            user_id=snapshot_model.committer_id,
            commit_type=snapshot_model.commit_type,
            commit_message=snapshot_model.commit_message,
            commit_cmds=snapshot_model.commit_cmds,
            created_on=snapshot_model.created_on,
            last_updated=snapshot_model.last_updated,
            version=version)
        setattr(commit_log_model, model_properties['id_field'], model_id)
        if model_class_name in (
                job_class.MODEL_NAMES_WITH_DEFAULT_COMMIT_STATUS):
            commit_log_model.post_commit_status = (
                constants.ACTIVITY_STATUS_PUBLIC)
        elif model_class_name in (
                job_class.MODEL_NAMES_WITH_COMMIT_STATUS_IN_RIGHTS):
            rights_model = exp_models.ExplorationRightsModel.get_version(
                model_id, version)
            commit_log_model.post_commit_status = rights_model.status
        commit_log_model.post_commit_is_private = (
            commit_log_model.post_commit_status == (
                constants.ACTIVITY_STATUS_PRIVATE))
        commit_log_model.update_timestamps(update_last_updated_time=False)
        commit_log_model.put()
        yield ('SUCCESS-Added missing commit log model-%s' % model_class_name,
               snapshot_model.id)
Beispiel #25
0
def normalize_against_schema(obj, schema, apply_custom_validators=True):
    """Validate the given object using the schema, normalizing if necessary.

    Args:
        obj: *. The object to validate and normalize.
        schema: dict(str, *). The schema to validate and normalize the value
            against.
        apply_custom_validators: bool. Whether to validate the normalized
             object using the validators defined in the schema.

    Returns:
        *. The normalized object.

    Raises:
        AssertionError: The object fails to validate against the schema.
    """
    normalized_obj = None

    if schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_BOOL:
        assert isinstance(obj, bool), ('Expected bool, received %s' % obj)
        normalized_obj = obj
    elif schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_CUSTOM:
        # Importing this at the top of the file causes a circular dependency.
        # TODO(sll): Either get rid of custom objects or find a way to merge
        # them into the schema framework -- probably the latter.
        from core.domain import obj_services
        obj_class = obj_services.Registry.get_object_class_by_type(
            schema[SCHEMA_KEY_OBJ_TYPE])
        if not apply_custom_validators:
            normalized_obj = normalize_against_schema(
                obj, obj_class.SCHEMA, apply_custom_validators=False)
        else:
            normalized_obj = obj_class.normalize(obj)
    elif schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_DICT:
        assert isinstance(obj, dict), ('Expected dict, received %s' % obj)
        expected_dict_keys = [
            p[SCHEMA_KEY_NAME] for p in schema[SCHEMA_KEY_PROPERTIES]
        ]
        assert set(obj.keys()) == set(expected_dict_keys), (
            'Missing keys: %s, Extra keys: %s' %
            (list(set(expected_dict_keys) - set(obj.keys())),
             list(set(obj.keys()) - set(expected_dict_keys))))

        normalized_obj = {}
        for prop in schema[SCHEMA_KEY_PROPERTIES]:
            key = prop[SCHEMA_KEY_NAME]
            normalized_obj[key] = normalize_against_schema(
                obj[key], prop[SCHEMA_KEY_SCHEMA])
    elif schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_FLOAT:
        obj = float(obj)
        assert isinstance(obj,
                          numbers.Real), ('Expected float, received %s' % obj)
        normalized_obj = obj
    elif schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_INT:
        obj = int(obj)
        assert isinstance(
            obj, numbers.Integral), ('Expected int, received %s' % obj)
        assert isinstance(obj, int), ('Expected int, received %s' % obj)
        normalized_obj = obj
    elif schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_HTML:
        assert isinstance(obj, python_utils.BASESTRING), (
            'Expected unicode HTML string, received %s' % obj)
        if isinstance(obj, bytes):
            obj = obj.decode('utf-8')
        else:
            obj = python_utils.UNICODE(obj)
        assert isinstance(
            obj, python_utils.UNICODE), ('Expected unicode, received %s' % obj)
        normalized_obj = html_cleaner.clean(obj)
    elif schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_LIST:
        assert isinstance(obj, list), ('Expected list, received %s' % obj)
        item_schema = schema[SCHEMA_KEY_ITEMS]
        if SCHEMA_KEY_LEN in schema:
            assert len(obj) == schema[SCHEMA_KEY_LEN]
        normalized_obj = [
            normalize_against_schema(item, item_schema) for item in obj
        ]
    elif schema[SCHEMA_KEY_TYPE] == SCHEMA_TYPE_UNICODE:
        assert isinstance(
            obj,
            python_utils.BASESTRING), ('Expected unicode string, received %s' %
                                       obj)
        if isinstance(obj, bytes):
            obj = obj.decode('utf-8')
        else:
            obj = python_utils.UNICODE(obj)
        assert isinstance(
            obj, python_utils.UNICODE), ('Expected unicode, received %s' % obj)
        normalized_obj = obj
    else:
        raise Exception('Invalid schema type: %s' % schema[SCHEMA_KEY_TYPE])

    if SCHEMA_KEY_CHOICES in schema:
        assert normalized_obj in schema[SCHEMA_KEY_CHOICES], (
            'Received %s which is not in the allowed range of choices: %s' %
            (normalized_obj, schema[SCHEMA_KEY_CHOICES]))

    # When type normalization is finished, apply the post-normalizers in the
    # given order.
    if SCHEMA_KEY_POST_NORMALIZERS in schema:
        for normalizer in schema[SCHEMA_KEY_POST_NORMALIZERS]:
            kwargs = dict(normalizer)
            del kwargs['id']
            normalized_obj = Normalizers.get(normalizer['id'])(normalized_obj,
                                                               **kwargs)

    # Validate the normalized object.
    if apply_custom_validators:
        if SCHEMA_KEY_VALIDATORS in schema:
            for validator in schema[SCHEMA_KEY_VALIDATORS]:
                kwargs = dict(validator)
                del kwargs['id']
                assert get_validator(validator['id'])(
                    normalized_obj,
                    **kwargs), ('Validation failed: %s (%s) for object %s' %
                                (validator['id'], kwargs, normalized_obj))

    return normalized_obj
Beispiel #26
0
    def handle_exception(self, exception, unused_debug_mode):
        """Overwrites the default exception handler.

        Args:
            exception: Exception. The exception that was thrown.
            unused_debug_mode: bool. True if the web application is running
                in debug mode.
        """
        if isinstance(exception, self.NotLoggedInException):
            # This checks if the response should be JSON or HTML.
            # For GET requests, there is no payload, so we check against
            # GET_HANDLER_ERROR_RETURN_TYPE.
            # Otherwise, we check whether self.payload exists.
            if (self.payload is not None or
                    self.GET_HANDLER_ERROR_RETURN_TYPE ==
                    feconf.HANDLER_TYPE_JSON):
                self.error(401)
                self._render_exception(
                    401, {
                        'error': (
                            'You must be logged in to access this resource.')})
            else:
                self.redirect(user_services.create_login_url(self.request.uri))
            return

        logging.exception('Exception raised: %s', exception)

        if isinstance(exception, self.PageNotFoundException):
            logging.warning('Invalid URL requested: %s', self.request.uri)
            self.error(404)
            self._render_exception(
                404, {
                    'error': 'Could not find the page %s.' % self.request.uri})
            return

        logging.exception('Exception raised: %s', exception)

        if isinstance(exception, self.UnauthorizedUserException):
            self.error(401)
            self._render_exception(
                401, {'error': python_utils.UNICODE(exception)})
            return

        if isinstance(exception, self.InvalidInputException):
            self.error(400)
            self._render_exception(
                400, {'error': python_utils.UNICODE(exception)})
            return

        if isinstance(exception, self.InternalErrorException):
            self.error(500)
            self._render_exception(
                500, {'error': python_utils.UNICODE(exception)})
            return

        if isinstance(exception, self.TemporaryMaintenanceException):
            self.error(503)
            self._render_exception(
                503, {'error': python_utils.UNICODE(exception)})
            return

        self.error(500)
        self._render_exception(
            500, {'error': python_utils.UNICODE(exception)})
Beispiel #27
0
 def post(self):
     """Handles POST requests."""
     try:
         result = {}
         if self.payload.get('action') == 'reload_exploration':
             exploration_id = self.payload.get('exploration_id')
             self._reload_exploration(exploration_id)
         elif self.payload.get('action') == 'reload_collection':
             collection_id = self.payload.get('collection_id')
             self._reload_collection(collection_id)
         elif self.payload.get('action') == 'generate_dummy_explorations':
             num_dummy_exps_to_generate = self.payload.get(
                 'num_dummy_exps_to_generate')
             num_dummy_exps_to_publish = self.payload.get(
                 'num_dummy_exps_to_publish')
             if not isinstance(num_dummy_exps_to_generate, int):
                 raise self.InvalidInputException(
                     '%s is not a number' % num_dummy_exps_to_generate)
             elif not isinstance(num_dummy_exps_to_publish, int):
                 raise self.InvalidInputException('%s is not a number' %
                                                  num_dummy_exps_to_publish)
             elif num_dummy_exps_to_generate < num_dummy_exps_to_publish:
                 raise self.InvalidInputException(
                     'Generate count cannot be less than publish count')
             else:
                 self._generate_dummy_explorations(
                     num_dummy_exps_to_generate, num_dummy_exps_to_publish)
         elif self.payload.get('action') == 'clear_search_index':
             search_services.clear_collection_search_index()
             search_services.clear_exploration_search_index()
         elif (self.payload.get('action') ==
               'generate_dummy_new_structures_data'):
             self._load_dummy_new_structures_data()
         elif self.payload.get('action') == (
                 'flush_migration_bot_contribution_data'):
             user_services.flush_migration_bot_contributions_model()
         elif self.payload.get('action') == 'save_config_properties':
             new_config_property_values = self.payload.get(
                 'new_config_property_values')
             logging.info('[ADMIN] %s saved config property values: %s' %
                          (self.user_id, new_config_property_values))
             for (name, value) in new_config_property_values.items():
                 config_services.set_property(self.user_id, name, value)
         elif self.payload.get('action') == 'revert_config_property':
             config_property_id = self.payload.get('config_property_id')
             logging.info('[ADMIN] %s reverted config property: %s' %
                          (self.user_id, config_property_id))
             config_services.revert_property(self.user_id,
                                             config_property_id)
         elif self.payload.get('action') == 'start_new_job':
             for klass in (jobs_registry.ONE_OFF_JOB_MANAGERS +
                           (jobs_registry.AUDIT_JOB_MANAGERS)):
                 if klass.__name__ == self.payload.get('job_type'):
                     klass.enqueue(klass.create_new())
                     break
         elif self.payload.get('action') == 'cancel_job':
             job_id = self.payload.get('job_id')
             job_type = self.payload.get('job_type')
             for klass in (jobs_registry.ONE_OFF_JOB_MANAGERS +
                           (jobs_registry.AUDIT_JOB_MANAGERS)):
                 if klass.__name__ == job_type:
                     klass.cancel(job_id, self.user_id)
                     break
         elif self.payload.get('action') == 'start_computation':
             computation_type = self.payload.get('computation_type')
             for klass in jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS:
                 if klass.__name__ == computation_type:
                     klass.start_computation()
                     break
         elif self.payload.get('action') == 'stop_computation':
             computation_type = self.payload.get('computation_type')
             for klass in jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS:
                 if klass.__name__ == computation_type:
                     klass.stop_computation(self.user_id)
                     break
         elif self.payload.get('action') == 'upload_topic_similarities':
             data = self.payload.get('data')
             recommendations_services.update_topic_similarities(data)
         elif self.payload.get('action') == (
                 'regenerate_topic_related_opportunities'):
             topic_id = self.payload.get('topic_id')
             opportunities_count = (
                 opportunity_services.
                 regenerate_opportunities_related_to_topic(
                     topic_id, delete_existing_opportunities=True))
             result = {'opportunities_count': opportunities_count}
         self.render_json(result)
     except Exception as e:
         self.render_json({'error': python_utils.UNICODE(e)})
         raise
Beispiel #28
0
def add_math_content_to_math_rte_components(html_string):
    """Replaces the attribute raw_latex-with-value in all Math component tags
    with a new attribute math_content-with-value. The new attribute has an
    additional field for storing SVG filenames. The field for SVG filename will
    be an empty string.

    Args:
        html_string: str. HTML string to modify.

    Returns:
        str. Updated HTML string with all Math component tags having the new
        attribute.
    """
    soup = bs4.BeautifulSoup(
        html_string.encode(encoding='utf-8'), 'html.parser')
    for math_tag in soup.findAll(name='oppia-noninteractive-math'):
        if math_tag.has_attr('raw_latex-with-value'):
            # There was a case in prod where the attr value was empty. This was
            # dealt with manually in an earlier migration (states schema v34),
            # but we are not sure how it arose. We can't migrate those snapshots
            # manually, hence the addition of the logic here. After all
            # snapshots are migrated to states schema v42 (or above), this
            # 'if' branch will no longer be needed.
            if not math_tag['raw_latex-with-value']:
                math_tag.decompose()
                continue

            try:
                # The raw_latex attribute value should be enclosed in
                # double quotes(&amp;quot;) and should be a valid unicode
                # string.
                raw_latex = (
                    json.loads(unescape_html(math_tag['raw_latex-with-value'])))
                normalized_raw_latex = (
                    objects.UnicodeString.normalize(raw_latex))
            except Exception as e:
                logging.exception(
                    'Invalid raw_latex string found in the math tag : %s' % (
                        python_utils.UNICODE(e)
                    )
                )
                python_utils.reraise_exception()
            math_content_dict = {
                'raw_latex': normalized_raw_latex,
                'svg_filename': ''
            }
            # Normalize and validate the value before adding to the math
            # tag.
            normalized_math_content_dict = (
                objects.MathExpressionContent.normalize(math_content_dict))
            # Add the new attribute math_expression_contents-with-value.
            math_tag['math_content-with-value'] = (
                escape_html(
                    json.dumps(normalized_math_content_dict, sort_keys=True)))
            # Delete the attribute raw_latex-with-value.
            del math_tag['raw_latex-with-value']
        elif math_tag.has_attr('math_content-with-value'):
            pass
        else:
            # Invalid math tag with no proper attribute found.
            math_tag.decompose()

    # We need to replace the <br/> tags (if any) with  <br> because for passing
    # the textangular migration tests we need to have only <br> tags.
    return python_utils.UNICODE(soup).replace('<br/>', '<br>')
Beispiel #29
0
def add_or_update_user_status(user_email, can_receive_email_updates):
    """Subscribes/unsubscribes an existing user or creates a new user with
    correct status in the mailchimp DB.

    NOTE: Callers should ensure that the user's corresponding
    UserEmailPreferencesModel.site_updates field is kept in sync.

    Args:
        user_email: str. Email ID of the user. Email is used to uniquely
            identify the user in the mailchimp DB.
        can_receive_email_updates: bool. Whether they want to be subscribed to
            the bulk email list or not.

    Returns:
        bool. Whether the user was successfully added to the db. (This will be
        False if the user was permanently deleted earlier and therefore cannot
        be added back.)

    Raises:
        Exception. Any error (other than the case where the user was permanently
            deleted earlier) raised by the mailchimp API.
    """
    client = _get_mailchimp_class()
    subscriber_hash = _get_subscriber_hash(user_email)

    subscribed_mailchimp_data = {
        'email_address': user_email,
        'status': 'subscribed'
    }

    unsubscribed_mailchimp_data = {
        'email_address': user_email,
        'status': 'unsubscribed'
    }

    try:
        member_details = client.lists.members.get(feconf.MAILCHIMP_AUDIENCE_ID,
                                                  subscriber_hash)

        # If member is already added to mailchimp list, we cannot permanently
        # delete a list member, since they cannot be programmatically added
        # back, so we change their status based on preference.
        if (can_receive_email_updates
                and member_details['status'] != 'subscribed'):
            client.lists.members.update(feconf.MAILCHIMP_AUDIENCE_ID,
                                        subscriber_hash,
                                        subscribed_mailchimp_data)
        elif (not can_receive_email_updates
              and member_details['status'] == 'subscribed'):
            client.lists.members.update(feconf.MAILCHIMP_AUDIENCE_ID,
                                        subscriber_hash,
                                        unsubscribed_mailchimp_data)

    except mailchimpclient.MailChimpError as error:
        # This has to be done since the message can only be accessed from
        # MailChimpError by error.message in Python2, but this is deprecated in
        # Python3.
        # In Python3, the message can be accessed directly by KeyError
        # (https://github.com/VingtCinq/python-mailchimp/pull/65), so as a
        # workaround for Python2, the 'message' attribute is obtained by
        # str() and then it is converted to dict. This works in Python3 as well.
        error_message = ast.literal_eval(python_utils.UNICODE(error))
        # Error 404 corresponds to "User does not exist".
        if error_message['status'] == 404:
            if can_receive_email_updates:
                user_creation_successful = _create_user_in_mailchimp_db(
                    user_email)
                if not user_creation_successful:
                    return False
        else:
            raise Exception(error_message['detail'])
    return True
Beispiel #30
0
    def post(self):
        """Handles POST requests."""
        action = self.normalized_payload.get('action')
        try:
            result = {}
            if action == 'reload_exploration':
                exploration_id = self.normalized_payload.get('exploration_id')
                self._reload_exploration(exploration_id)
            elif action == 'reload_collection':
                collection_id = self.normalized_payload.get('collection_id')
                self._reload_collection(collection_id)
            elif action == 'generate_dummy_explorations':
                num_dummy_exps_to_generate = self.normalized_payload.get(
                    'num_dummy_exps_to_generate')
                num_dummy_exps_to_publish = self.normalized_payload.get(
                    'num_dummy_exps_to_publish')

                if num_dummy_exps_to_generate < num_dummy_exps_to_publish:
                    raise self.InvalidInputException(
                        'Generate count cannot be less than publish count')
                else:
                    self._generate_dummy_explorations(
                        num_dummy_exps_to_generate, num_dummy_exps_to_publish)
            elif action == 'clear_search_index':
                search_services.clear_collection_search_index()
                search_services.clear_exploration_search_index()
            elif action == 'generate_dummy_new_structures_data':
                self._load_dummy_new_structures_data()
            elif action == 'generate_dummy_new_skill_data':
                self._generate_dummy_skill_and_questions()
            elif action == 'save_config_properties':
                new_config_property_values = self.normalized_payload.get(
                    'new_config_property_values')
                logging.info('[ADMIN] %s saved config property values: %s' %
                             (self.user_id, new_config_property_values))
                for (name, value) in new_config_property_values.items():
                    config_services.set_property(self.user_id, name, value)
            elif action == 'revert_config_property':
                config_property_id = self.normalized_payload.get(
                    'config_property_id')
                logging.info('[ADMIN] %s reverted config property: %s' %
                             (self.user_id, config_property_id))
                config_services.revert_property(self.user_id,
                                                config_property_id)
            elif action == 'upload_topic_similarities':
                data = self.normalized_payload.get('data')
                recommendations_services.update_topic_similarities(data)
            elif action == 'regenerate_topic_related_opportunities':
                topic_id = self.normalized_payload.get('topic_id')
                opportunities_count = (
                    opportunity_services.
                    regenerate_opportunities_related_to_topic(
                        topic_id, delete_existing_opportunities=True))
                result = {'opportunities_count': opportunities_count}
            elif action == 'update_feature_flag_rules':
                feature_name = self.normalized_payload.get('feature_name')
                new_rule_dicts = self.normalized_payload.get('new_rules')
                commit_message = self.normalized_payload.get('commit_message')

                try:
                    feature_services.update_feature_flag_rules(
                        feature_name, self.user_id, commit_message,
                        new_rule_dicts)
                except (utils.ValidationError,
                        feature_services.FeatureFlagNotFoundException) as e:
                    raise self.InvalidInputException(e)
                logging.info('[ADMIN] %s updated feature %s with new rules: '
                             '%s.' %
                             (self.user_id, feature_name, new_rule_dicts))
            self.render_json(result)
        except Exception as e:
            logging.exception('[ADMIN] %s', e)
            self.render_json({'error': python_utils.UNICODE(e)})
            python_utils.reraise_exception()