def handle(self, *args, **kwargs): """ On all calls, clean all notes with html and not text using html2text """ notes = Note.objects.only('static_html', 'mimetype', 'slug').filter(static_html=True).iterator() converted_notes = 0 for note in notes: if note.static_html and not note.is_pdf(): h = html2text.HTML2Text() h.google_doc = True h.escape_snob = True h.unicode_snob = True with default_storage.open(note.get_relative_s3_path(), 'r') as html: markdown = h.handle(html.read().decode('utf8', 'ignore')) if note.has_markdown(): note_markdown = note.notemarkdown note_markdown.markdown = markdown else: note_markdown = NoteMarkdown(note=note, markdown=markdown) note_markdown.save() converted_notes += 1 print 'Processed {n}'.format(n=note) print 'Processed %s notes' % converted_notes
def test_note_markdown_rendering(self): rich = NoteMarkdown( note=self.note, markdown="""# This is fun\n[oh](http://yeah.com)""") rich.save() self.assertHTMLEqual( rich.html, """<h1>This is fun</h1>\n<p><a href="http://yeah.com" rel="nofollow" target="_blank">oh</a></p>""" )
def save(self, *args, **kwargs): # TODO: use transaction.atomic for this when we switch to Django 1.6+ instance = super(NoteForm, self).save(*args, **kwargs) instance.tags.set(*self.cleaned_data['tags']) if instance.is_hidden: instance.is_hidden = False instance.save() if instance.is_editable() and self.cleaned_data.get('html'): try: note_markdown = instance.notemarkdown except NoteMarkdown.DoesNotExist: note_markdown = NoteMarkdown(note=instance) note_markdown.html = self.cleaned_data['html'] note_markdown.full_clean() note_markdown.save() return instance
def test_note_rich_text_sanitization(self): rich = NoteMarkdown(note=self.note, html=""" <script>unsafe</script> <h1 class='obtrusive'>Something</h1> <h2>OK</h2> & ” <a href='javascript:alert("Oh no")'>This stuff</a> <a href='http://google.com'>That guy</a> """) rich.save() self.assertHTMLEqual(rich.html, u""" <h1>Something</h1> <h2>OK</h2> & \u201d <a target='_blank' rel='nofollow'>This stuff</a> <a href="http://google.com" target="_blank" rel="nofollow">That guy</a> """)
def test_note_rich_text_sanitization(self): rich = NoteMarkdown(note=self.note, html=""" <script>unsafe</script> <h1 class='obtrusive'>Something</h1> <h2>OK</h2> & ” <a href='javascript:alert("Oh no")'>This stuff</a> <a href='http://google.com'>That guy</a> """) rich.save() self.assertHTMLEqual( rich.html, u""" <h1>Something</h1> <h2>OK</h2> & \u201d <a target='_blank' rel='nofollow'>This stuff</a> <a href="http://google.com" target="_blank" rel="nofollow">That guy</a> """)
def handle(self, *args, **kwargs): """ On all calls, clean all notes with html and not text using html2text """ notes = Note.objects.only('static_html', 'mimetype', 'slug').filter(static_html=True).iterator() converted_notes = 0 for note in notes: if note.static_html and not note.is_pdf(): h = html2text.HTML2Text() h.google_doc = True h.escape_snob = True h.unicode_snob = True with default_storage.open(note.get_relative_s3_path(),'r') as html: markdown = h.handle(html.read().decode('utf8', 'ignore')) if note.has_markdown(): note_markdown = note.notemarkdown note_markdown.markdown = markdown else: note_markdown = NoteMarkdown(note=note, markdown=markdown) note_markdown.save() converted_notes += 1 print 'Processed {n}'.format(n=note) print 'Processed %s notes' % converted_notes
def test_note_markdown_rendering(self): rich = NoteMarkdown(note=self.note, markdown="""# This is fun\n[oh](http://yeah.com)""") rich.save() self.assertHTMLEqual(rich.html, """<h1>This is fun</h1>\n<p><a href="http://yeah.com" rel="nofollow" target="_blank">oh</a></p>""")
def convert_raw_document(raw_document, user=None): """ Upload a raw document to google drive and get a Note back""" fp_file = raw_document.get_file() # extract some properties from the document metadata filename = raw_document.name print "this is the mimetype of the document to check:" mimetype = raw_document.mimetype print mimetype print "" # A special case for Evernotes if raw_document.mimetype == 'text/enml': raw_document.mimetype = 'text/html' original_content = fp_file.read() # Include mimetype parameter if there is one to include extra_flags = {'mimetype': raw_document.mimetype} if raw_document.mimetype \ else {} media = MediaInMemoryUpload(original_content, chunksize=1024*1024, \ resumable=True, **extra_flags) service = build_api_service() # upload to google drive file_dict = upload_to_gdrive(service, media, filename, mimetype=mimetype) # download from google drive content_dict = download_from_gdrive(service, file_dict, mimetype=mimetype) # this should have already happened, lets see why it hasn't raw_document.is_processed = True raw_document.save() note = raw_document.convert_to_note() # Cache the uploaded file's URL note.gdrive_url = file_dict['alternateLink'] # Extract HTML from the appropriate place html = '' convert_to_markdown = False if raw_document.mimetype == PDF_MIMETYPE: html = pdf2html(original_content) elif raw_document.mimetype in PPT_MIMETYPES: html = pdf2html(content_dict['pdf']) elif 'html' in content_dict and content_dict['html']: html = content_dict['html'] convert_to_markdown = True # cleanup the HTML html = note.filter_html(html) # upload the HTML file to static host if it is not already there note.send_to_s3(html, do_save=False) note.text = content_dict['text'] if convert_to_markdown: h = html2text.HTML2Text() h.google_doc = True h.escape_snob = True h.unicode_snob = True markdown = h.handle(html.decode('utf8', 'ignore')) note_markdown = NoteMarkdown(note=note, markdown=markdown) note_markdown.save() # If we know the user who uploaded this, # associate them with the note if user: note.user = user NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD) else: try: mapping = UserUploadMapping.objects.get(fp_file=raw_document.fp_file) note.user = mapping.user note.save() NoteKarmaEvent.create_event(mapping.user, note, NoteKarmaEvent.UPLOAD) except (ObjectDoesNotExist, MultipleObjectsReturned): logger.info("Zero or multiple mappings found with fp_file " + raw_document.fp_file.name) # Finally, save whatever data we got back from google note.save()