Python sanitize_html_preserve_formattingの例、karmaworld.apps.notes.sanitizer.sanitize_html_preserve_formatting Pythonの例

コード例 #1

0

ファイルを表示

    def save(self, *args, **kwargs):
        if self.markdown and not self.html:
            self.html = markdown.markdown(self.markdown)
        if self.note.is_editable():
            self.html = sanitizer.sanitize_html_to_editable(self.html)
        else:
            self.html = sanitizer.sanitize_html_preserve_formatting(self.html)

        super(NoteMarkdown, self).save(*args, **kwargs)

コード例 #2

0

ファイルを表示

ファイル: models.py プロジェクト: FinalsClub/karmaworld

    def save(self, *args, **kwargs):
        if self.markdown and not self.html:
            self.html = markdown.markdown(self.markdown)
        if self.note.is_editable():
            self.html = sanitizer.sanitize_html_to_editable(self.html)
        else:
            self.html = sanitizer.sanitize_html_preserve_formatting(self.html)

        super(NoteMarkdown, self).save(*args, **kwargs)

コード例 #3

0

ファイルを表示

    def test_font_face_data_uri(self):
        # Note: this data-uri is not a valid font (it's the red dot).
        html = '''<style>@font-face { src: url('data:application/font-woff;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg=='); }</style>'''

        s3ified = sanitizer.data_uris_to_s3(html)
        self.assertFalse(re.search(r"url\('data:application", s3ified),
                         "data URL not removed: {}".format(s3ified))
        self.assertTrue(re.search(r"url\('https?://[^\)]+\)", s3ified),
                        "URL not inserted: {}".format(s3ified))

        # Ensure that cleaning is idempotent.
        self.assertHTMLEqual(
            s3ified, sanitizer.sanitize_html_preserve_formatting(s3ified))

コード例 #4

0

ファイルを表示

ファイル: tests.py プロジェクト: FinalsClub/karmaworld

    def test_font_face_data_uri(self):
        # Note: this data-uri is not a valid font (it's the red dot).
        html = '''<style>@font-face { src: url('data:application/font-woff;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg=='); }</style>'''

        s3ified = sanitizer.data_uris_to_s3(html)
        self.assertFalse(re.search(r"url\('data:application", s3ified),
                "data URL not removed: {}".format(s3ified))
        self.assertTrue(re.search(r"url\('https?://[^\)]+\)", s3ified),
                "URL not inserted: {}".format(s3ified))

        # Ensure that cleaning is idempotent.
        self.assertHTMLEqual(s3ified,
                sanitizer.sanitize_html_preserve_formatting(s3ified))

コード例 #5

0

ファイルを表示

ファイル: 0021_get_html_from_s3.py プロジェクト: FinalsClub/karmaworld

    def forwards(self, orm):
        "Write your forwards methods here."
        # Note: Don't use "from appname.models import ModelName". 
        # Use orm.ModelName to refer to models in this application,
        # and orm['appname.ModelName'] for models in other applications.

        # keep score. save as lists for debugging purposes if needed.
        good = []
        edit = []
        nonedit = []
        bad = []

        # at the time of migration, editable categories are limited to
        EDITABLE_CATEGORIES = ('LECTURE_NOTES',)

        # at the time of migration, translated PDFs were based on mimetypes
        PDF_MIMETYPES = (
          'application/pdf',
          'application/vnd.ms-powerpoint',
          'application/vnd.openxmlformats-officedocument.presentationml.presentation'
        )

        necessary_notes = orm['notes.Note'].objects.filter(notemarkdown__html__isnull=True)
        n_notes = necessary_notes.count()

        # perform migration in discrete chunks to deal with the transaction
        # (just delete the migration from the south table and run again)
        limitkey = 'NOTE_LIMIT_0021'
        sys.stdout.write('Running until ')
        if os.environ.has_key(limitkey):
            max_notes = int(os.environ[limitkey])
            display_counts(max_notes, n_notes)
        else:
            max_notes = n_notes
            display_counts(n_notes, n_notes)

        # visualiation to show how well this is moving through a large database.
        counter = 0
        display_counts(counter, max_notes)
        # find each Note without an html field, download its S3 html, and
        # store it in the local database.
        for note in necessary_notes:
            # download the s3 content
            html = ''
            # copy/pasted from model code for Note.get_relative_s3_path
            note_s3_path = 'html/{0}.html'.format(note.slug)
            sys.stdout.write(':')
            sys.stdout.flush()
            key = default_storage.bucket.get_key(note_s3_path)
            if key:
                html = key.read()

            # check the downloaded html
            if not html:
                sys.stdout.write('( ')
                bad.append(note.slug)
                counter = counter + 1
                continue
            else:
                good.append(note.slug)

            # clean the html in a consistent way with note uploads as of the
            # time of this migration.
            # handle embedded images from pdf2htmlEX or other sources
            html = sanitizer.data_uris_to_s3(html)
            if note.category in EDITABLE_CATEGORIES:
                # make HTML editable
                html = sanitizer.sanitize_html_to_editable(html)
                sys.stdout.write(']')
                edit.append(note)
            else:
                # clean up HTML without concern for editing
                html = sanitizer.sanitize_html_preserve_formatting(html)
                sys.stdout.write(')')
                nonedit.append(note)

            # store the html in the corresponding NoteMarkdown object
            nmd = orm['notes.NoteMarkdown'].objects.get_or_create(note=note)[0]
            nmd.html = html
            nmd.save()

            # manage the display
            counter = counter + 1
            sys.stdout.write(' ')
            # track 20 notes per line
            if counter % 20 == 0:
                # finish off previous line and start new line
                display_counts(counter, max_notes)
                # flush per line, just in case it isn't outputting
                sys.stdout.flush()

            # perform migration in discrete chunks to deal with the transaction
            if counter == max_notes:
                break

        # Display the score
        print "Migrated {0} notes and failed to migrate {1} notes.".format(
          len(good), len(bad))
        print "Of good notes, {0} are editable and {1} are not.".format(
          len(edit), len(nonedit))

        print "Failed list:"
        for slug in bad:
            print slug

コード例 #6

0

ファイルを表示

    def forwards(self, orm):
        "Write your forwards methods here."
        # Note: Don't use "from appname.models import ModelName".
        # Use orm.ModelName to refer to models in this application,
        # and orm['appname.ModelName'] for models in other applications.

        # keep score. save as lists for debugging purposes if needed.
        good = []
        edit = []
        nonedit = []
        bad = []

        # at the time of migration, editable categories are limited to
        EDITABLE_CATEGORIES = ('LECTURE_NOTES', )

        # at the time of migration, translated PDFs were based on mimetypes
        PDF_MIMETYPES = (
            'application/pdf', 'application/vnd.ms-powerpoint',
            'application/vnd.openxmlformats-officedocument.presentationml.presentation'
        )

        necessary_notes = orm['notes.Note'].objects.filter(
            notemarkdown__html__isnull=True)
        n_notes = necessary_notes.count()

        # perform migration in discrete chunks to deal with the transaction
        # (just delete the migration from the south table and run again)
        limitkey = 'NOTE_LIMIT_0021'
        sys.stdout.write('Running until ')
        if os.environ.has_key(limitkey):
            max_notes = int(os.environ[limitkey])
            display_counts(max_notes, n_notes)
        else:
            max_notes = n_notes
            display_counts(n_notes, n_notes)

        # visualiation to show how well this is moving through a large database.
        counter = 0
        display_counts(counter, max_notes)
        # find each Note without an html field, download its S3 html, and
        # store it in the local database.
        for note in necessary_notes:
            # download the s3 content
            html = ''
            # copy/pasted from model code for Note.get_relative_s3_path
            note_s3_path = 'html/{0}.html'.format(note.slug)
            sys.stdout.write(':')
            sys.stdout.flush()
            key = default_storage.bucket.get_key(note_s3_path)
            if key:
                html = key.read()

            # check the downloaded html
            if not html:
                sys.stdout.write('( ')
                bad.append(note.slug)
                counter = counter + 1
                continue
            else:
                good.append(note.slug)

            # clean the html in a consistent way with note uploads as of the
            # time of this migration.
            # handle embedded images from pdf2htmlEX or other sources
            html = sanitizer.data_uris_to_s3(html)
            if note.category in EDITABLE_CATEGORIES:
                # make HTML editable
                html = sanitizer.sanitize_html_to_editable(html)
                sys.stdout.write(']')
                edit.append(note)
            else:
                # clean up HTML without concern for editing
                html = sanitizer.sanitize_html_preserve_formatting(html)
                sys.stdout.write(')')
                nonedit.append(note)

            # store the html in the corresponding NoteMarkdown object
            nmd = orm['notes.NoteMarkdown'].objects.get_or_create(note=note)[0]
            nmd.html = html
            nmd.save()

            # manage the display
            counter = counter + 1
            sys.stdout.write(' ')
            # track 20 notes per line
            if counter % 20 == 0:
                # finish off previous line and start new line
                display_counts(counter, max_notes)
                # flush per line, just in case it isn't outputting
                sys.stdout.flush()

            # perform migration in discrete chunks to deal with the transaction
            if counter == max_notes:
                break

        # Display the score
        print "Migrated {0} notes and failed to migrate {1} notes.".format(
            len(good), len(bad))
        print "Of good notes, {0} are editable and {1} are not.".format(
            len(edit), len(nonedit))

        print "Failed list:"
        for slug in bad:
            print slug