コード例 #1
0
    def handle(self, *args, **kwargs):
        """ On all calls, clean all notes with html and not text using html2text """
        notes = Note.objects.only('static_html', 'mimetype',
                                  'slug').filter(static_html=True).iterator()
        converted_notes = 0
        for note in notes:
            if note.static_html and not note.is_pdf():
                h = html2text.HTML2Text()
                h.google_doc = True
                h.escape_snob = True
                h.unicode_snob = True

                with default_storage.open(note.get_relative_s3_path(),
                                          'r') as html:
                    markdown = h.handle(html.read().decode('utf8', 'ignore'))
                    if note.has_markdown():
                        note_markdown = note.notemarkdown
                        note_markdown.markdown = markdown
                    else:
                        note_markdown = NoteMarkdown(note=note,
                                                     markdown=markdown)
                    note_markdown.save()
                converted_notes += 1
                print 'Processed {n}'.format(n=note)

        print 'Processed %s notes' % converted_notes
コード例 #2
0
 def test_note_markdown_rendering(self):
     rich = NoteMarkdown(
         note=self.note,
         markdown="""# This is fun\n[oh](http://yeah.com)""")
     rich.save()
     self.assertHTMLEqual(
         rich.html,
         """<h1>This is fun</h1>\n<p><a href="http://yeah.com" rel="nofollow" target="_blank">oh</a></p>"""
     )
コード例 #3
0
ファイル: tests.py プロジェクト: FinalsClub/karmaworld
    def test_note_rich_text_sanitization(self):
        rich = NoteMarkdown(note=self.note, html="""
            <script>unsafe</script>
            <h1 class='obtrusive'>Something</h1>
            <h2>OK</h2>
            &amp;
            &rdquo;
            <a href='javascript:alert("Oh no")'>This stuff</a>
            <a href='http://google.com'>That guy</a>
        """)

        rich.save()
        self.assertHTMLEqual(rich.html, u"""
            <h1>Something</h1>
            <h2>OK</h2>
            &amp;
            \u201d
            <a target='_blank' rel='nofollow'>This stuff</a>
            <a href="http://google.com" target="_blank" rel="nofollow">That guy</a>
        """)
コード例 #4
0
    def test_note_rich_text_sanitization(self):
        rich = NoteMarkdown(note=self.note,
                            html="""
            <script>unsafe</script>
            <h1 class='obtrusive'>Something</h1>
            <h2>OK</h2>
            &amp;
            &rdquo;
            <a href='javascript:alert("Oh no")'>This stuff</a>
            <a href='http://google.com'>That guy</a>
        """)

        rich.save()
        self.assertHTMLEqual(
            rich.html, u"""
            <h1>Something</h1>
            <h2>OK</h2>
            &amp;
            \u201d
            <a target='_blank' rel='nofollow'>This stuff</a>
            <a href="http://google.com" target="_blank" rel="nofollow">That guy</a>
        """)
コード例 #5
0
    def handle(self, *args, **kwargs):
        """ On all calls, clean all notes with html and not text using html2text """
        notes = Note.objects.only('static_html', 'mimetype', 'slug').filter(static_html=True).iterator()
        converted_notes = 0
        for note in notes:
            if note.static_html and not note.is_pdf():
                h = html2text.HTML2Text()
                h.google_doc = True
                h.escape_snob = True
                h.unicode_snob = True

                with default_storage.open(note.get_relative_s3_path(),'r') as html:
                    markdown = h.handle(html.read().decode('utf8', 'ignore'))
                    if note.has_markdown():
                        note_markdown = note.notemarkdown
                        note_markdown.markdown = markdown
                    else:
                        note_markdown = NoteMarkdown(note=note, markdown=markdown)
                    note_markdown.save()
                converted_notes += 1
                print 'Processed {n}'.format(n=note)

        print 'Processed %s notes' % converted_notes
コード例 #6
0
 def save(self, *args, **kwargs):
     # TODO: use transaction.atomic for this when we switch to Django 1.6+
     instance = super(NoteForm, self).save(*args, **kwargs)
     instance.tags.set(*self.cleaned_data['tags'])
     if instance.is_hidden:
         instance.is_hidden = False
         instance.save()
     if instance.is_editable() and self.cleaned_data.get('html'):
         try:
             note_markdown = instance.notemarkdown
         except NoteMarkdown.DoesNotExist:
             note_markdown = NoteMarkdown(note=instance)
         note_markdown.html = self.cleaned_data['html']
         note_markdown.full_clean()
         note_markdown.save()
     return instance
コード例 #7
0
ファイル: forms.py プロジェクト: FinalsClub/karmaworld
 def save(self, *args, **kwargs):
     # TODO: use transaction.atomic for this when we switch to Django 1.6+
     instance = super(NoteForm, self).save(*args, **kwargs)
     instance.tags.set(*self.cleaned_data['tags'])
     if instance.is_hidden:
         instance.is_hidden = False
         instance.save()
     if instance.is_editable() and self.cleaned_data.get('html'):
         try:
             note_markdown = instance.notemarkdown
         except NoteMarkdown.DoesNotExist:
             note_markdown = NoteMarkdown(note=instance)
         note_markdown.html = self.cleaned_data['html']
         note_markdown.full_clean()
         note_markdown.save()
     return instance
コード例 #8
0
ファイル: tests.py プロジェクト: FinalsClub/karmaworld
 def test_note_markdown_rendering(self):
     rich = NoteMarkdown(note=self.note,
         markdown="""# This is fun\n[oh](http://yeah.com)""")
     rich.save()
     self.assertHTMLEqual(rich.html,
             """<h1>This is fun</h1>\n<p><a href="http://yeah.com" rel="nofollow" target="_blank">oh</a></p>""")
コード例 #9
0
ファイル: gdrive.py プロジェクト: dangan249/karmaworld
def convert_raw_document(raw_document, user=None):
    """ Upload a raw document to google drive and get a Note back"""
    fp_file = raw_document.get_file()

    # extract some properties from the document metadata
    filename = raw_document.name
    print "this is the mimetype of the document to check:"
    mimetype = raw_document.mimetype
    print mimetype
    print ""

    # A special case for Evernotes
    if raw_document.mimetype == 'text/enml':
        raw_document.mimetype = 'text/html'

    original_content = fp_file.read()

    # Include mimetype parameter if there is one to include
    extra_flags = {'mimetype': raw_document.mimetype} if raw_document.mimetype \
                  else {}
    media = MediaInMemoryUpload(original_content, chunksize=1024*1024, \
                                resumable=True, **extra_flags)


    service = build_api_service()

    # upload to google drive
    file_dict = upload_to_gdrive(service, media, filename, mimetype=mimetype)

    # download from google drive
    content_dict = download_from_gdrive(service, file_dict, mimetype=mimetype)

    # this should have already happened, lets see why it hasn't
    raw_document.is_processed = True
    raw_document.save()

    note = raw_document.convert_to_note()

    # Cache the uploaded file's URL
    note.gdrive_url = file_dict['alternateLink']

    # Extract HTML from the appropriate place
    html = ''
    convert_to_markdown = False
    if raw_document.mimetype == PDF_MIMETYPE:
        html = pdf2html(original_content)
    elif raw_document.mimetype in PPT_MIMETYPES:
        html = pdf2html(content_dict['pdf'])
    elif 'html' in content_dict and content_dict['html']:
        html = content_dict['html']
        convert_to_markdown = True
    # cleanup the HTML
    html = note.filter_html(html)

    # upload the HTML file to static host if it is not already there
    note.send_to_s3(html, do_save=False)

    note.text = content_dict['text']

    if convert_to_markdown:
        h = html2text.HTML2Text()
        h.google_doc = True
        h.escape_snob = True
        h.unicode_snob = True
        markdown = h.handle(html.decode('utf8', 'ignore'))

        note_markdown = NoteMarkdown(note=note, markdown=markdown)
        note_markdown.save()

    # If we know the user who uploaded this,
    # associate them with the note
    if user:
        note.user = user
        NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
    else:
        try:
            mapping = UserUploadMapping.objects.get(fp_file=raw_document.fp_file)
            note.user = mapping.user
            note.save()
            NoteKarmaEvent.create_event(mapping.user, note, NoteKarmaEvent.UPLOAD)
        except (ObjectDoesNotExist, MultipleObjectsReturned):
            logger.info("Zero or multiple mappings found with fp_file " + raw_document.fp_file.name)

    # Finally, save whatever data we got back from google
    note.save()