def process_entry(self, e, appid): title = e.title allura_base.log.info(" ...entry '%s'", title) parsed_content = filter( None, e.get('content') or [e.get('summary_detail')]) if parsed_content: content = u'' for ct in parsed_content: if ct.type != 'text/html': content += plain2markdown(ct.value) else: html2md = html2text.HTML2Text(baseurl=e.link) html2md.escape_snob = True markdown_content = html2md.handle(ct.value) content += markdown_content else: content = plain2markdown(getattr(e, 'summary', getattr(e, 'subtitle', getattr(e, 'title')))) content += u' [link](%s)' % e.link updated = datetime.utcfromtimestamp(calendar.timegm(e.updated_parsed)) base_slug = BM.BlogPost.make_base_slug(title, updated) b_count = BM.BlogPost.query.find( dict(slug=base_slug, app_config_id=appid)).count() if b_count == 0: post = BM.BlogPost(title=title, text=content, timestamp=updated, app_config_id=appid, state='published') post.neighborhood_id = c.project.neighborhood_id post.make_slug() post.commit()
def process_entry(self, e, appid): title = e.title allura_base.log.info(" ...entry '%s'", title) parsed_content = filter(None, e.get('content') or [e.get('summary_detail')]) if parsed_content: content = u'' for ct in parsed_content: if ct.type != 'text/html': content += plain2markdown(ct.value) else: html2md = html2text.HTML2Text(baseurl=e.link) html2md.escape_snob = True markdown_content = html2md.handle(ct.value) content += markdown_content else: content = plain2markdown( getattr(e, 'summary', getattr(e, 'subtitle', getattr(e, 'title')))) content += u' [link](%s)' % e.link updated = datetime.utcfromtimestamp(calendar.timegm(e.updated_parsed)) base_slug = BM.BlogPost.make_base_slug(title, updated) b_count = BM.BlogPost.query.find( dict(slug=base_slug, app_config_id=appid)).count() if b_count == 0: post = BM.BlogPost(title=title, text=content, timestamp=updated, app_config_id=appid, state='published') post.neighborhood_id = c.project.neighborhood_id post.make_slug() post.commit()
def _as_markdown(tag, project_name): fragments = [] for fragment in tag: if getattr(fragment, 'name', None) == 'a': href = urlparse(fragment['href']) qs = parse_qs(href.query) gc_link = not href.netloc or href.netloc == 'code.google.com' path_parts = href.path.split('/') target_project = None if gc_link: if len(path_parts) >= 5 and path_parts[1] == 'a': target_project = '/'.join(path_parts[1:5]) elif len(path_parts) >= 3: target_project = path_parts[2] internal_link = target_project == project_name if gc_link and internal_link and 'id' in qs: # rewrite issue 123 project-internal issue links fragment = '[%s](#%s)' % (fragment.text, qs['id'][0]) elif gc_link and internal_link and 'r' in qs: # rewrite r123 project-internal revision links fragment = '[r%s]' % qs['r'][0] elif gc_link: # preserve GC-internal links (probably issue PROJECT:123 # inter-project issue links) fragment = '[%s](%s)' % ( h.plain2markdown(fragment.text, preserve_multiple_spaces=True, has_html_entities=True), # possibly need to adjust this URL for /a/ hosted domain URLs, # but it seems fragment['href'] always starts with / so it replaces the given path urljoin( 'https://code.google.com/p/%s/issues/' % project_name, fragment['href']), ) else: # convert all other links to Markdown syntax fragment = '[%s](%s)' % (fragment.text, fragment['href']) elif getattr(fragment, 'name', None) == 'i': # preserve styling of "(No comment was entered for this change.)" # messages fragment = '*%s*' % h.plain2markdown(fragment.text, preserve_multiple_spaces=True, has_html_entities=True) elif getattr(fragment, 'name', None) == 'b': # preserve styling of issue template fragment = '**%s**' % h.plain2markdown( fragment.text, preserve_multiple_spaces=True, has_html_entities=True) elif getattr(fragment, 'name', None) == 'br': # preserve forced line-breaks fragment = '\n' else: # convert all others to plain MD fragment = h.plain2markdown(unicode(fragment), preserve_multiple_spaces=True, has_html_entities=True) fragments.append(fragment) return ''.join(fragments).strip()
def _as_markdown(tag, project_name): fragments = [] for fragment in tag: if getattr(fragment, 'name', None) == 'a': href = urlparse(fragment['href']) qs = parse_qs(href.query) gc_link = not href.netloc or href.netloc == 'code.google.com' path_parts = href.path.split('/') target_project = None if gc_link: if len(path_parts) >= 5 and path_parts[1] == 'a': target_project = '/'.join(path_parts[1:5]) elif len(path_parts) >= 3: target_project = path_parts[2] internal_link = target_project == project_name if gc_link and internal_link and 'id' in qs: # rewrite issue 123 project-internal issue links fragment = '[%s](#%s)' % (fragment.text, qs['id'][0]) elif gc_link and internal_link and 'r' in qs: # rewrite r123 project-internal revision links fragment = '[r%s]' % qs['r'][0] elif gc_link: # preserve GC-internal links (probably issue PROJECT:123 # inter-project issue links) fragment = '[%s](%s)' % ( h.plain2markdown( fragment.text, preserve_multiple_spaces=True, has_html_entities=True), # possibly need to adjust this URL for /a/ hosted domain URLs, # but it seems fragment['href'] always starts with / so it replaces the given path urljoin('https://code.google.com/p/%s/issues/' % project_name, fragment['href']), ) else: # convert all other links to Markdown syntax fragment = '[%s](%s)' % (fragment.text, fragment['href']) elif getattr(fragment, 'name', None) == 'i': # preserve styling of "(No comment was entered for this change.)" # messages fragment = '*%s*' % h.plain2markdown(fragment.text, preserve_multiple_spaces=True, has_html_entities=True) elif getattr(fragment, 'name', None) == 'b': # preserve styling of issue template fragment = '**%s**' % h.plain2markdown(fragment.text, preserve_multiple_spaces=True, has_html_entities=True) elif getattr(fragment, 'name', None) == 'br': # preserve forced line-breaks fragment = '\n' else: # convert all others to plain MD fragment = h.plain2markdown( unicode(fragment), preserve_multiple_spaces=True, has_html_entities=True) fragments.append(fragment) return ''.join(fragments).strip()
def test_plain2markdown(): """Test plain2markdown using fallback regexp to escape markdown. Potentially MD-special characters are aggresively escaped, as without knowledge of the MD parsing rules it's better to be excessive but safe. """ text = '''paragraph 4 spaces before this *blah* here's a <tag> that should be <b>preserved</b> Literal > Ò ¼ & & ሿ M & Ms - amp doesn't get escaped http://blah.com/?x=y&a=b - not escaped either back\\-slash escaped ''' expected = '''paragraph 4 spaces before this \*blah\* here's a <tag> that should be <b>preserved</b> Literal &gt; &Ograve; &frac14; &amp; &\#38; &\#x123F; M & Ms \- amp doesn't get escaped http://blah\.com/?x=y&a=b \- not escaped either back\\\\\-slash escaped ''' dd.assert_equal(h.plain2markdown(text), expected) dd.assert_equal( h.plain2markdown('a foo bar\n\n code here?', preserve_multiple_spaces=True), 'a foo bar\n\n code here?') dd.assert_equal( h.plain2markdown('\ttab before (stuff)', preserve_multiple_spaces=True), ' tab before \(stuff\)') dd.assert_equal( h.plain2markdown('\ttab before (stuff)', preserve_multiple_spaces=False), 'tab before \(stuff\)')
def process_fields(self, ticket, issue): ticket.summary = issue.get_issue_summary() ticket.status = issue.get_issue_status() ticket.created_date = datetime.strptime(issue.get_issue_created_date(), '%c') ticket.mod_date = datetime.strptime(issue.get_issue_mod_date(), '%c') ticket.votes_up = issue.get_issue_stars() ticket.votes = issue.get_issue_stars() owner = issue.get_issue_owner() if owner: owner_line = '*Originally owned by:* {owner}\n'.format(owner=owner) else: owner_line = '' ticket.description = (u'*Originally created by:* {creator}\n' u'{owner}' u'\n' u'{body}').format( creator=issue.get_issue_creator(), owner=owner_line, body=h.plain2markdown( issue.get_issue_description(), preserve_multiple_spaces=True, has_html_entities=True), ) ticket.add_multiple_attachments(issue.get_issue_attachments())
def test_plain2markdown_with_html2text(): """Test plain2markdown using html2text to escape markdown, if available.""" text = '''paragraph 4 spaces before this *blah* here's a <tag> that should be <b>preserved</b> Literal > Ò ¼ & & ሿ M & Ms - doesn't get escaped http://blah.com/?x=y&a=b - not escaped either ''' expected = '''paragraph 4 spaces before this \*blah\* here's a <tag> that should be <b>preserved</b> Literal &gt; &Ograve; &frac14; &amp; &\#38; &\#x123F; M & Ms - doesn't get escaped http://blah.com/?x=y&a=b - not escaped either ''' dd.assert_equal(h.plain2markdown(text), expected) dd.assert_equal( h.plain2markdown('a foo bar\n\n code here?', preserve_multiple_spaces=True), 'a foo bar\n\n code here?') dd.assert_equal( h.plain2markdown('\ttab before (stuff)', preserve_multiple_spaces=True), ' tab before \(stuff\)') dd.assert_equal( h.plain2markdown('\ttab before (stuff)', preserve_multiple_spaces=False), 'tab before \(stuff\)')
def _as_markdown(tag, project_name): fragments = [] for fragment in tag: if getattr(fragment, "name", None) == "a": href = urlparse(fragment["href"]) qs = parse_qs(href.query) gc_link = not href.netloc or href.netloc == "code.google.com" path_parts = href.path.split("/") target_project = path_parts[2] if gc_link and len(path_parts) >= 3 else "" internal_link = target_project == project_name if gc_link and internal_link and "id" in qs: # rewrite issue 123 project-internal issue links fragment = "[%s](#%s)" % (fragment.text, qs["id"][0]) elif gc_link and internal_link and "r" in qs: # rewrite r123 project-internal revision links fragment = "[r%s]" % qs["r"][0] elif gc_link: # preserve GC-internal links (probably issue PROJECT:123 inter-project issue links) fragment = "[%s](%s)" % ( h.plain2markdown(fragment.text, preserve_multiple_spaces=True, has_html_entities=True), urljoin("https://code.google.com/p/%s/issues/" % project_name, fragment["href"]), ) else: # convert all other links to Markdown syntax fragment = "[%s](%s)" % (fragment.text, fragment["href"]) elif getattr(fragment, "name", None) == "i": # preserve styling of "(No comment was entered for this change.)" messages fragment = "*%s*" % h.plain2markdown(fragment.text, preserve_multiple_spaces=True, has_html_entities=True) elif getattr(fragment, "name", None) == "b": # preserve styling of issue template fragment = "**%s**" % h.plain2markdown(fragment.text, preserve_multiple_spaces=True, has_html_entities=True) elif getattr(fragment, "name", None) == "br": # preserve forced line-breaks fragment = "\n" else: # convert all others to plain MD fragment = h.plain2markdown(unicode(fragment), preserve_multiple_spaces=True, has_html_entities=True) fragments.append(fragment) return "".join(fragments).strip()
def annotated_text(self): text = (u'*Originally posted by:* {author}\n' u'\n' u'{body}\n' u'\n' u'{updates}').format( author=self.author, body=h.plain2markdown(self.body, preserve_multiple_spaces=True, has_html_entities=True), updates='\n'.join('**%s** %s' % (k, v) for k, v in self.updates.items()), ) return text
def annotated_text(self): text = ( u'*Originally posted by:* {author}\n' u'\n' u'{body}\n' u'\n' u'{updates}' ).format( author=self.author, body=h.plain2markdown(self.body, preserve_multiple_spaces=True, has_html_entities=True), updates='\n'.join( '**%s** %s' % (k,v) for k,v in self.updates.items() ), ) return text
def process_fields(self, ticket, issue): ticket.summary = issue.get_issue_summary() ticket.status = issue.get_issue_status() ticket.created_date = datetime.strptime(issue.get_issue_created_date(), '%c') ticket.mod_date = datetime.strptime(issue.get_issue_mod_date(), '%c') ticket.votes_up = issue.get_issue_stars() ticket.votes = issue.get_issue_stars() owner = issue.get_issue_owner() if owner: owner_line = '*Originally owned by:* {owner}\n'.format(owner=owner) else: owner_line = '' ticket.description = ( u'*Originally created by:* {creator}\n' u'{owner}' u'\n' u'{body}').format( creator=issue.get_issue_creator(), owner=owner_line, body=h.plain2markdown(issue.get_issue_description(), preserve_multiple_spaces=True, has_html_entities=True), ) ticket.add_multiple_attachments(issue.get_issue_attachments())