def import_tool(self, project, user, project_name, mount_point=None, mount_label=None, **kw): import_id_converter = ImportIdConverter.get() project_name = '%s/%s' % (kw['user_name'], project_name) extractor = GitHubProjectExtractor(project_name, user=user) if not extractor.has_tracker(): return app = project.install_app('tickets', mount_point, mount_label, EnableVoting=False, open_status_names='open', closed_status_names='closed', import_id={ 'source': self.source, 'project_name': project_name, }) self.github_markdown_converter = GitHubMarkdownConverter( kw['user_name'], project_name) ThreadLocalORMSession.flush_all() try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, user=M.User.anonymous(), app=app): for ticket_num, issue in extractor.iter_issues(): self.max_ticket_num = max(ticket_num, self.max_ticket_num) ticket = TM.Ticket(app_config_id=app.config._id, custom_fields=dict(), ticket_num=ticket_num, import_id=import_id_converter.expand( ticket_num, app)) self.process_fields(extractor, ticket, issue) self.process_comments(extractor, ticket, issue) self.process_events(extractor, ticket, issue) self.process_milestones(ticket, issue) session(ticket).flush(ticket) session(ticket).expunge(ticket) app.globals.custom_fields = self.postprocess_milestones() app.globals.last_ticket_num = self.max_ticket_num ThreadLocalORMSession.flush_all() M.AuditLog.log( 'import tool %s from %s on %s' % (app.config.options.mount_point, project_name, self.source), project=project, user=user, url=app.url) g.post_event('project_updated') app.globals.invalidate_bin_counts() return app finally: M.session.artifact_orm_session._get().skip_mod_date = False
def test_process_fields(self): ticket = mock.Mock() issue = { 'title': 'title', 'state': 'New', 'created_at': 'created_at', 'updated_at': 'updated_at', 'assignee': {'login': '******'}, 'user': {'login': '******'}, 'body': 'hello', 'labels': [{'name': 'first'}, {'name': 'second'}], } importer = tracker.GitHubTrackerImporter() importer.github_markdown_converter = GitHubMarkdownConverter( 'user', 'project') extractor = mock.Mock() extractor.urlopen().read.return_value = 'data' with mock.patch.object(tracker, 'datetime') as dt: dt.strptime.side_effect = lambda s, f: s importer.process_fields(extractor, ticket, issue) self.assertEqual(ticket.summary, 'title') self.assertEqual(ticket.description, '*Originally created by:* [creator](https://github.com/creator)\n*Originally owned by:* [owner](https://github.com/owner)\n\nhello') self.assertEqual(ticket.status, 'New') self.assertEqual(ticket.created_date, 'created_at') self.assertEqual(ticket.mod_date, 'updated_at') self.assertEqual(dt.strptime.call_args_list, [ mock.call('created_at', '%Y-%m-%dT%H:%M:%SZ'), mock.call('updated_at', '%Y-%m-%dT%H:%M:%SZ'), ]) self.assertEqual(ticket.labels, ['first', 'second'])
def test_github_markdown_converted_in_comments(self): ticket = mock.Mock() extractor = mock.Mock() body = '''Hello ```python def hello(name): print "Hello, " + name ```''' body_converted = '''*Originally posted by:* [me](https://github.com/me) Hello :::python def hello(name): print "Hello, " + name''' issue = {'comments_url': '/comments'} extractor.iter_comments.return_value = [ { 'body': body, 'created_at': '2013-08-26T16:57:53Z', 'user': {'login': '******'}, } ] importer = tracker.GitHubTrackerImporter() importer.github_markdown_converter = GitHubMarkdownConverter( 'user', 'project') importer.process_comments(extractor, ticket, issue) self.assertEqual(ticket.discussion_thread.add_post.call_args_list[0], mock.call( text=body_converted, timestamp=datetime(2013, 8, 26, 16, 57, 53), ignore_security=True, ))
def import_tool( self, project, user, project_name=None, mount_point=None, mount_label=None, user_name=None, tool_option=None, **kw): """ Import a GitHub wiki into a new Wiki Allura tool. """ project_name = "%s/%s" % (user_name, project_name) extractor = GitHubProjectExtractor(project_name, user=user) wiki_avail = extractor.has_wiki() if not wiki_avail: return self.github_wiki_url = extractor.get_page_url( 'wiki_url').replace('.wiki', '/wiki') self.app = project.install_app( "Wiki", mount_point=mount_point or 'wiki', mount_label=mount_label or 'Wiki', import_id={ 'source': self.source, 'project_name': project_name, } ) with_history = tool_option == 'import_history' ThreadLocalORMSession.flush_all() self.github_markdown_converter = GitHubMarkdownConverter( user_name, project_name) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, app=self.app): try: wiki_url = extractor.get_page_url('wiki_url') self.import_pages(wiki_url, history=with_history) except git.GitCommandError: log.error( 'Unable to clone GitHub wiki: ' 'wiki_url=%s; ' 'wiki_avail=%s; ' 'avail_url=%s', wiki_url, wiki_avail, extractor.get_page_url('project_info'), exc_info=True) raise ThreadLocalORMSession.flush_all() M.AuditLog.log( 'import tool %s from %s on %s' % ( self.app.config.options.mount_point, project_name, self.source), project=project, user=user, url=self.app.url) g.post_event('project_updated') return self.app except Exception: h.make_app_admin_only(self.app) raise finally: M.session.artifact_orm_session._get().skip_mod_date = False
def import_tool(self, project, user, project_name, mount_point=None, mount_label=None, **kw): import_id_converter = ImportIdConverter.get() project_name = '%s/%s' % (kw['user_name'], project_name) extractor = GitHubProjectExtractor(project_name, user=user) if not extractor.has_tracker(): return app = project.install_app('tickets', mount_point, mount_label, EnableVoting=False, open_status_names='open', closed_status_names='closed', import_id={ 'source': self.source, 'project_name': project_name, } ) self.github_markdown_converter = GitHubMarkdownConverter( kw['user_name'], project_name) ThreadLocalORMSession.flush_all() try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, user=M.User.anonymous(), app=app): for ticket_num, issue in extractor.iter_issues(): self.max_ticket_num = max(ticket_num, self.max_ticket_num) ticket = TM.Ticket( app_config_id=app.config._id, custom_fields=dict(), ticket_num=ticket_num, import_id=import_id_converter.expand(ticket_num, app) ) self.process_fields(extractor, ticket, issue) self.process_comments(extractor, ticket, issue) self.process_events(extractor, ticket, issue) self.process_milestones(ticket, issue) session(ticket).flush(ticket) session(ticket).expunge(ticket) app.globals.custom_fields = self.postprocess_milestones() app.globals.last_ticket_num = self.max_ticket_num ThreadLocalORMSession.flush_all() M.AuditLog.log( 'import tool %s from %s on %s' % ( app.config.options.mount_point, project_name, self.source), project=project, user=user, url=app.url) g.post_event('project_updated') app.globals.invalidate_bin_counts() return app finally: M.session.artifact_orm_session._get().skip_mod_date = False
def import_tool( self, project, user, project_name=None, mount_point=None, mount_label=None, user_name=None, tool_option=None, **kw ): """ Import a GitHub wiki into a new Wiki Allura tool. """ project_name = "%s/%s" % (user_name, project_name) extractor = GitHubProjectExtractor(project_name, user=user) if not extractor.has_wiki(): return self.github_wiki_url = extractor.get_page_url("wiki_url").replace(".wiki", "/wiki") self.app = project.install_app( "Wiki", mount_point=mount_point or "wiki", mount_label=mount_label or "Wiki", import_id={"source": self.source, "project_name": project_name}, ) with_history = tool_option == "import_history" ThreadLocalORMSession.flush_all() self.github_markdown_converter = GitHubMarkdownConverter(user_name, project_name) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, app=self.app): self.import_pages(extractor.get_page_url("wiki_url"), history=with_history) ThreadLocalORMSession.flush_all() M.AuditLog.log( "import tool %s from %s on %s" % (self.app.config.options.mount_point, project_name, self.source), project=project, user=user, url=self.app.url, ) g.post_event("project_updated") return self.app except Exception as e: h.make_app_admin_only(self.app) raise finally: M.session.artifact_orm_session._get().skip_mod_date = False
def test_github_markdown_converted_in_description(self): ticket = mock.Mock() body = '''Hello ```python def hello(name): print "Hello, " + name ```''' body_converted = '''*Originally created by:* [creator](https://github.com/creator) *Originally owned by:* [owner](https://github.com/owner) Hello :::python def hello(name): print "Hello, " + name''' issue = { 'body': body, 'title': 'title', 'state': 'New', 'created_at': 'created_at', 'updated_at': 'updated_at', 'assignee': { 'login': '******' }, 'user': { 'login': '******' }, 'labels': [{ 'name': 'first' }, { 'name': 'second' }], } importer = tracker.GitHubTrackerImporter() importer.github_markdown_converter = GitHubMarkdownConverter( 'user', 'project') extractor = mock.Mock() extractor.urlopen().read.return_value = 'data' with mock.patch.object(tracker, 'datetime') as dt: dt.strptime.side_effect = lambda s, f: s importer.process_fields(extractor, ticket, issue) self.assertEqual(ticket.description.strip(), body_converted.strip())
def test_convert_markup(self): importer = GitHubWikiImporter() importer.github_wiki_url = 'https://github.com/a/b/wiki' importer.app = Mock() importer.app.url = '/p/test/wiki/' importer.github_markdown_converter = GitHubMarkdownConverter( 'user', 'proj') f = importer.convert_markup source = '''Look at [[this page|Some Page]] More info at: [[MoreInfo]] [[Even More Info]] Our website is [[http://domain.net]]. '[[Escaped Tag]] ```python codeblock ``` ticket #1 #1 header sha aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa''' result = '''Look at [this page](Some Page) More info at: [MoreInfo] [Even More Info] Our website is <http://domain.net>. [[Escaped Tag]] :::python codeblock ticket [#1] [#1] header sha [aaaaaa]''' assert_equal(f(source, 'test.md').strip(), result) assert_equal(f('h1. Hello', 't.textile').strip(), '# Hello')
def setUp(self): self.conv = GitHubMarkdownConverter('user', 'project')
class TestGitHubMarkdownConverter(object): def setUp(self): self.conv = GitHubMarkdownConverter('user', 'project') def test_convert_sha(self): text = '16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, '[16c999]') text = 'some context 16c999e8c71134401a78d4d46435517b2271d6ac ' result = self.conv.convert(text) assert_equal(result, 'some context [16c999] ') def test_convert_user_sha(self): text = 'user@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, '[16c999]') # Not an owner of current project text = 'another-user@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, text) def test_convert_user_repo_sha(self): text = 'user/project@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, '[16c999]') # Not a current project text = 'user/p@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, '[user/p@16c999]' '(https://github.com/user/p/commit/16c999e8c71134401a78d4d46435517b2271d6ac)') def test_convert_ticket(self): text = 'Ticket #1' result = self.conv.convert(text) assert_equal(result, 'Ticket [#1]') assert_equal(self.conv.convert('#1'), '[#1]') def test_convert_user_ticket(self): text = 'user#1' result = self.conv.convert(text) assert_equal(result, '[#1]') # Not an owner of current project text = 'another-user#1' result = self.conv.convert(text) assert_equal(result, 'another-user#1') def test_convert_user_repo_ticket(self): text = 'user/project#1' result = self.conv.convert(text) assert_equal(result, '[#1]') # Not a current project text = 'user/p#1' result = self.conv.convert(text) assert_equal(result, '[user/p#1](https://github.com/user/p/issues/1)') def test_convert_strikethrough(self): text = '~~mistake~~' assert_equal(self.conv.convert(text), '<s>mistake</s>') def test_inline_code_block(self): text = u'This `~~some text~~` converts to this ~~strike out~~.' result = u'This `~~some text~~` converts to this <s>strike out</s>.' assert_equal(self.conv.convert(text).strip(), result) def test_convert_code_blocks(self): text = u'''```python print "Hello!" ``` Two code blocks here! ``` for (var i = 0; i < a.length; i++) { console.log(i); } ```''' result = u''':::python print "Hello!" Two code blocks here! for (var i = 0; i < a.length; i++) { console.log(i); }''' assert_equal(self.conv.convert(text).strip(), result) def test_code_blocks_without_newline_before(self): text = u''' There are some code snippet: ``` print 'Hello' ``` Pretty cool, ha?''' result = u''' There are some code snippet: print 'Hello' Pretty cool, ha?''' assert_equal(self.conv.convert(text).strip(), result.strip()) text = text.replace('```', '~~~') assert_equal(self.conv.convert(text).strip(), result.strip()) text = u''' There are some code snippet: ```python print 'Hello' ``` Pretty cool, ha?''' result = u''' There are some code snippet: :::python print 'Hello' Pretty cool, ha?''' assert_equal(self.conv.convert(text).strip(), result.strip())
def import_tool(self, project, user, project_name=None, mount_point=None, mount_label=None, user_name=None, tool_option=None, **kw): """ Import a GitHub wiki into a new Wiki Allura tool. """ project_name = "%s/%s" % (user_name, project_name) extractor = GitHubProjectExtractor(project_name, user=user) wiki_avail = extractor.has_wiki() # has_wiki only indicates that wiki is enabled, but it does not mean # that it has any pages, so we should check if wiki repo actually # exists wiki_url = extractor.get_page_url('wiki_url') if not wiki_avail or not self.has_wiki_repo(wiki_url): return self.github_wiki_url = extractor.get_page_url('wiki_url').replace( '.wiki', '/wiki') self.app = project.install_app("Wiki", mount_point=mount_point or 'wiki', mount_label=mount_label or 'Wiki', import_id={ 'source': self.source, 'project_name': project_name, }) with_history = tool_option == 'import_history' ThreadLocalORMSession.flush_all() self.github_markdown_converter = GitHubMarkdownConverter( user_name, project_name) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, app=self.app): try: self.import_pages(wiki_url, history=with_history) except git.GitCommandError: log.error( 'Unable to clone GitHub wiki: ' 'wiki_url=%s; ' 'wiki_avail=%s; ' 'avail_url=%s', wiki_url, wiki_avail, extractor.get_page_url('project_info'), exc_info=True) raise ThreadLocalORMSession.flush_all() M.AuditLog.log('import tool %s from %s on %s' % (self.app.config.options.mount_point, project_name, self.source), project=project, user=user, url=self.app.url) g.post_event('project_updated') return self.app except Exception: h.make_app_admin_only(self.app) raise finally: M.session.artifact_orm_session._get().skip_mod_date = False
class GitHubWikiImporter(ToolImporter): target_app_ep_names = 'wiki' controller = GitHubWikiImportController source = 'GitHub' tool_label = 'Wiki' tool_description = 'Import your wiki from GitHub' tool_option = {"import_history": "Import history"} mediawiki_exts = ['.wiki', '.mediawiki'] markdown_exts = utils.MARKDOWN_EXTENSIONS textile_exts = ['.textile'] # List of supported formats # https://github.com/gollum/gollum/wiki#page-files supported_formats = [ '.asciidoc', '.creole', '.org', '.pod', '.rdoc', '.rest.txt', '.rst.txt', '.rest', '.rst', ] + mediawiki_exts + markdown_exts + textile_exts available_pages = [] def import_tool(self, project, user, project_name=None, mount_point=None, mount_label=None, user_name=None, tool_option=None, **kw): """ Import a GitHub wiki into a new Wiki Allura tool. """ project_name = "%s/%s" % (user_name, project_name) extractor = GitHubProjectExtractor(project_name, user=user) wiki_avail = extractor.has_wiki() # has_wiki only indicates that wiki is enabled, but it does not mean # that it has any pages, so we should check if wiki repo actually # exists wiki_url = extractor.get_page_url('wiki_url') if not wiki_avail or not self.has_wiki_repo(wiki_url): return self.github_wiki_url = extractor.get_page_url('wiki_url').replace( '.wiki', '/wiki') self.app = project.install_app("Wiki", mount_point=mount_point or 'wiki', mount_label=mount_label or 'Wiki', import_id={ 'source': self.source, 'project_name': project_name, }) with_history = tool_option == 'import_history' ThreadLocalORMSession.flush_all() self.github_markdown_converter = GitHubMarkdownConverter( user_name, project_name) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, app=self.app): try: self.import_pages(wiki_url, history=with_history) except git.GitCommandError: log.error( 'Unable to clone GitHub wiki: ' 'wiki_url=%s; ' 'wiki_avail=%s; ' 'avail_url=%s', wiki_url, wiki_avail, extractor.get_page_url('project_info'), exc_info=True) raise ThreadLocalORMSession.flush_all() M.AuditLog.log('import tool %s from %s on %s' % (self.app.config.options.mount_point, project_name, self.source), project=project, user=user, url=self.app.url) g.post_event('project_updated') return self.app except Exception: h.make_app_admin_only(self.app) raise finally: M.session.artifact_orm_session._get().skip_mod_date = False def _set_available_pages(self, commit): pages = [blob.name for blob in commit.tree.traverse()] pages = list(map(os.path.splitext, pages)) pages = [ self._convert_page_name(name) for name, ext in pages if ext in self.supported_formats ] self.available_pages = pages def _without_history(self, commit): self._set_available_pages(commit) for page in commit.tree.blobs: self._make_page(page.data_stream.read(), page.name, commit) def _with_history(self, commit): for filename in commit.stats.files.keys(): self._set_available_pages(commit) renamed_to = None if '=>' in filename: # File renamed. Stats contains entry like 'Page.md => # NewPage.md' filename, renamed_to = filename.split(' => ') if renamed_to and renamed_to in commit.tree: text = commit.tree[renamed_to].data_stream.read() elif filename in commit.tree: text = commit.tree[filename].data_stream.read() else: # file is deleted text = '' self._make_page(text, filename, commit, renamed_to) def _make_page(self, text, filename, commit, renamed_to=None): orig_name = self._format_supported(filename) renamed_orig_name = self._format_supported( renamed_to) if renamed_to else None if not orig_name: return if renamed_to and not renamed_orig_name: return mod_date = datetime.utcfromtimestamp(commit.committed_date) wiki_page = WM.Page.upsert(self._convert_page_name(orig_name)) wiki_page.timestamp = wiki_page.mod_date = mod_date if renamed_orig_name and renamed_to in commit.tree: wiki_page.title = self._convert_page_name(renamed_orig_name) wiki_page.text = self.convert_markup(h.really_unicode(text), renamed_to) elif filename in commit.tree: wiki_page.text = self.convert_markup(h.really_unicode(text), filename) else: wiki_page.delete() import_id_name = renamed_orig_name if renamed_orig_name else orig_name wiki_page.import_id = ImportIdConverter.get().expand( import_id_name, self.app) wiki_page.commit() return wiki_page def _format_supported(self, filename): orig_name, ext = os.path.splitext(filename) if ext and ext not in self.supported_formats: log.info('Not a wiki page %s. Skipping.' % filename) return False return orig_name def _convert_page_name(self, name): """Convert '-' and '/' into spaces in page name to match github behavior""" return name.replace('-', ' ').replace('/', ' ') def has_wiki_repo(self, wiki_url): wiki_path = mkdtemp() try: wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True) except git.GitCommandError: return False rmtree(wiki_path) return True def import_pages(self, wiki_url, history=None): wiki_path = mkdtemp() wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True) if not history: self._without_history(wiki.heads.master.commit) else: for commit in reversed(list(wiki.iter_commits())): self._with_history(commit) rmtree(wiki_path) def convert_markup(self, text, filename): """Convert any supported github markup into Allura-markdown. Conversion happens in 4 phases: 1. Convert source text to a html using h.render_any_markup. 2. Rewrite links that match the wiki URL prefix with new location. 3. Convert resulting html to a markdown using html2text, if available. 4. Convert gollum tags If html2text module isn't available then only phases 1 and 2 will be executed. Files in mediawiki format are converted using mediawiki2markdown if html2text is available. """ name, ext = os.path.splitext(filename) if ext in self.markdown_exts: text = self.github_markdown_converter.convert(text) return self.convert_gollum_tags(text) try: import html2text html2text.BODY_WIDTH = 0 except ImportError: html2text = None if ext and ext in self.mediawiki_exts: if html2text: text = mediawiki2markdown(text) text = self.convert_gollum_tags(text) # Don't have html here, so we can't call self._rewrite_links. # Falling back to simpler rewriter. prefix = self.github_wiki_url new_prefix = self.app.url if not prefix.endswith('/'): prefix += '/' if not new_prefix.endswith('/'): new_prefix += '/' _re = re.compile(r'%s(\S*)' % prefix) def repl(m): return new_prefix + self._convert_page_name(m.group(1)) text = _re.sub(repl, text) else: text = h.render_any_markup(filename, text) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) return text elif ext and ext in self.textile_exts: text = self._prepare_textile_text(text) text = six.text_type(h.render_any_markup(filename, text)) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) if html2text: text = html2text.html2text(text) text = self.convert_gollum_tags(text) text = text.replace('<notextile>', '').replace('< notextile>', '').replace('</notextile>', '') text = text.replace('<notextile>', '').replace('</notextile>', '') text = text.replace('<notextile>', '').replace('</notextile>', '') return text else: text = h.render_any_markup(filename, text) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) if html2text: text = html2text.html2text(text) text = self.convert_gollum_tags(text) return text def convert_gollum_tags(self, text): tag_re = re.compile( r''' (?P<quote>')? # optional tag escaping (?P<tag>\[\[ # tag start (?P<link>[^]]+) # title/link/filename with options \]\]) # tag end ''', re.VERBOSE) return tag_re.sub(self._gollum_tag_match, text) def _gollum_tag_match(self, match): available_options = [ 'alt=', 'frame', 'align=', 'float', 'width=', 'height=', ] quote = match.groupdict().get('quote') if quote: # tag is escaped, return untouched return match.group('tag') link = match.group('link').split('|') title = options = None if len(link) == 1: link = link[0] elif any([link[1].startswith(opt) for opt in available_options]): # second element is option -> first is the link link, options = link[0], link[1:] else: title, link, options = link[0], link[1], link[2:] if link == '_TOC_': return '[TOC]' if link.startswith('http://') or link.startswith('https://'): sub = self._gollum_external_link # TODO: add embedded images and file links else: sub = self._gollum_page_link return sub(link, title, options) def _gollum_external_link(self, link, title, options): if title: return '[{}]({})'.format(title, link) return '<{}>'.format(link) def _gollum_page_link(self, link, title, options): page = self._convert_page_name(link) page = page.replace('&', '&') # allow & in page links # gollum page lookups are case-insensitive, you'll always get link to # whatever comes first in the file system, no matter how you refer to a page. # E.g. if you have two pages: a.md and A.md both [[a]] and [[A]] will refer a.md. # We're emulating this behavior using list of all available pages try: idx = [p.lower() for p in self.available_pages].index(page.lower()) except ValueError: idx = None if idx is not None: page = self.available_pages[idx] if title: return '[{}]({})'.format(title, page) return '[{}]'.format(page) def rewrite_links(self, html, prefix, new_prefix): if not prefix.endswith('/'): prefix += '/' if not new_prefix.endswith('/'): new_prefix += '/' soup = BeautifulSoup(html, 'html.parser') for a in soup.findAll('a'): if a.get('href').startswith(prefix): page = a['href'].replace(prefix, '') new_page = self._convert_page_name(page) a['href'] = new_prefix + new_page if a.string == page: a.string = new_page elif a.string == prefix + page: a.string = new_prefix + new_page return six.text_type(soup) def _prepare_textile_text(self, text): # need to convert lists properly text_lines = text.splitlines() for i, l in enumerate(text_lines): if l.lstrip().startswith('#'): text_lines[i] = l.lstrip() text = '\n'.join(text_lines) # to convert gollum tags properly used <notextile> tag, # so these tags will not be affected by converter text = text.replace('[[', '<notextile>[[').replace(']]', ']]</notextile>') return text
class GitHubWikiImporter(ToolImporter): target_app_ep_names = "wiki" controller = GitHubWikiImportController source = "GitHub" tool_label = "Wiki" tool_description = "Import your wiki from GitHub" tool_option = {"import_history": "Import history"} mediawiki_exts = [".wiki", ".mediawiki"] markdown_exts = utils.MARKDOWN_EXTENSIONS textile_exts = [".textile"] # List of supported formats # https://github.com/gollum/gollum/wiki#page-files supported_formats = ( [".asciidoc", ".creole", ".org", ".pod", ".rdoc", ".rest.txt", ".rst.txt", ".rest", ".rst"] + mediawiki_exts + markdown_exts + textile_exts ) available_pages = [] def import_tool( self, project, user, project_name=None, mount_point=None, mount_label=None, user_name=None, tool_option=None, **kw ): """ Import a GitHub wiki into a new Wiki Allura tool. """ project_name = "%s/%s" % (user_name, project_name) extractor = GitHubProjectExtractor(project_name, user=user) wiki_avail = extractor.has_wiki() # has_wiki only indicates that wiki is enabled, but it does not mean # that it has any pages, so we should check if wiki repo actually # exists wiki_url = extractor.get_page_url("wiki_url") if not wiki_avail or not self.has_wiki_repo(wiki_url): return self.github_wiki_url = extractor.get_page_url("wiki_url").replace(".wiki", "/wiki") self.app = project.install_app( "Wiki", mount_point=mount_point or "wiki", mount_label=mount_label or "Wiki", import_id={"source": self.source, "project_name": project_name}, ) with_history = tool_option == "import_history" ThreadLocalORMSession.flush_all() self.github_markdown_converter = GitHubMarkdownConverter(user_name, project_name) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, app=self.app): try: self.import_pages(wiki_url, history=with_history) except git.GitCommandError: log.error( "Unable to clone GitHub wiki: " "wiki_url=%s; " "wiki_avail=%s; " "avail_url=%s", wiki_url, wiki_avail, extractor.get_page_url("project_info"), exc_info=True, ) raise ThreadLocalORMSession.flush_all() M.AuditLog.log( "import tool %s from %s on %s" % (self.app.config.options.mount_point, project_name, self.source), project=project, user=user, url=self.app.url, ) g.post_event("project_updated") return self.app except Exception: h.make_app_admin_only(self.app) raise finally: M.session.artifact_orm_session._get().skip_mod_date = False def _set_available_pages(self, commit): pages = [blob.name for blob in commit.tree.traverse()] pages = map(os.path.splitext, pages) pages = [self._convert_page_name(name) for name, ext in pages if ext in self.supported_formats] self.available_pages = pages def _without_history(self, commit): self._set_available_pages(commit) for page in commit.tree.blobs: self._make_page(page.data_stream.read(), page.name, commit) def _with_history(self, commit): for filename in commit.stats.files.keys(): self._set_available_pages(commit) renamed_to = None if "=>" in filename: # File renamed. Stats contains entry like 'Page.md => # NewPage.md' filename, renamed_to = filename.split(" => ") if renamed_to and renamed_to in commit.tree: text = commit.tree[renamed_to].data_stream.read() elif filename in commit.tree: text = commit.tree[filename].data_stream.read() else: # file is deleted text = "" self._make_page(text, filename, commit, renamed_to) def _make_page(self, text, filename, commit, renamed_to=None): orig_name = self._format_supported(filename) renamed_orig_name = self._format_supported(renamed_to) if renamed_to else None if not orig_name: return if renamed_to and not renamed_orig_name: return mod_date = datetime.utcfromtimestamp(commit.committed_date) wiki_page = WM.Page.upsert(self._convert_page_name(orig_name)) wiki_page.timestamp = wiki_page.mod_date = mod_date wiki_page.viewable_by = ["all"] if renamed_orig_name and renamed_to in commit.tree: wiki_page.title = self._convert_page_name(renamed_orig_name) wiki_page.text = self.convert_markup(h.really_unicode(text), renamed_to) elif filename in commit.tree: wiki_page.text = self.convert_markup(h.really_unicode(text), filename) else: wiki_page.delete() import_id_name = renamed_orig_name if renamed_orig_name else orig_name wiki_page.import_id = ImportIdConverter.get().expand(import_id_name, self.app) wiki_page.commit() return wiki_page def _format_supported(self, filename): orig_name, ext = os.path.splitext(filename) if ext and ext not in self.supported_formats: log.info("Not a wiki page %s. Skipping." % filename) return False return orig_name def _convert_page_name(self, name): """Convert '-' and '/' into spaces in page name to match github behavior""" return name.replace("-", " ").replace("/", " ") def has_wiki_repo(self, wiki_url): wiki_path = mkdtemp() try: wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True) except git.GitCommandError: return False rmtree(wiki_path) return True def import_pages(self, wiki_url, history=None): wiki_path = mkdtemp() wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True) if not history: self._without_history(wiki.heads.master.commit) else: for commit in reversed(list(wiki.iter_commits())): self._with_history(commit) rmtree(wiki_path) def convert_markup(self, text, filename): """Convert any supported github markup into Allura-markdown. Conversion happens in 4 phases: 1. Convert source text to a html using h.render_any_markup. 2. Rewrite links that match the wiki URL prefix with new location. 3. Convert resulting html to a markdown using html2text, if available. 4. Convert gollum tags If html2text module isn't available then only phases 1 and 2 will be executed. Files in mediawiki format are converted using mediawiki2markdown if html2text is available. """ name, ext = os.path.splitext(filename) if ext in self.markdown_exts: text = self.github_markdown_converter.convert(text) return self.convert_gollum_tags(text) try: import html2text html2text.BODY_WIDTH = 0 except ImportError: html2text = None if ext and ext in self.mediawiki_exts: if html2text: text = mediawiki2markdown(text) text = self.convert_gollum_tags(text) # Don't have html here, so we can't call self._rewrite_links. # Falling back to simpler rewriter. prefix = self.github_wiki_url new_prefix = self.app.url if not prefix.endswith("/"): prefix += "/" if not new_prefix.endswith("/"): new_prefix += "/" _re = re.compile(r"%s(\S*)" % prefix) def repl(m): return new_prefix + self._convert_page_name(m.group(1)) text = _re.sub(repl, text) else: text = h.render_any_markup(filename, text) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) return text elif ext and ext in self.textile_exts: text = self._prepare_textile_text(text) text = h.render_any_markup(filename, text) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) if html2text: text = html2text.html2text(text) text = self.convert_gollum_tags(text) text = text.replace("<notextile>", "").replace("</notextile>", "") text = text.replace("<notextile>", "").replace("</notextile>", "") text = text.replace("<notextile>", "").replace("</notextile>", "") return text else: text = h.render_any_markup(filename, text) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) if html2text: text = html2text.html2text(text) text = self.convert_gollum_tags(text) return text def convert_gollum_tags(self, text): tag_re = re.compile( r""" (?P<quote>')? # optional tag escaping (?P<tag>\[\[ # tag start (?P<link>[^]]+) # title/link/filename with options \]\]) # tag end """, re.VERBOSE, ) return tag_re.sub(self._gollum_tag_match, text) def _gollum_tag_match(self, match): available_options = ["alt=", "frame", "align=", "float", "width=", "height="] quote = match.groupdict().get("quote") if quote: # tag is escaped, return untouched return match.group("tag") link = match.group("link").split("|") title = options = None if len(link) == 1: link = link[0] elif any(map(lambda opt: link[1].startswith(opt), available_options)): # second element is option -> first is the link link, options = link[0], link[1:] else: title, link, options = link[0], link[1], link[2:] if link == "_TOC_": return "[TOC]" if link.startswith("http://") or link.startswith("https://"): sub = self._gollum_external_link # TODO: add embedded images and file links else: sub = self._gollum_page_link return sub(link, title, options) def _gollum_external_link(self, link, title, options): if title: return u"[{}]({})".format(title, link) return u"<{}>".format(link) def _gollum_page_link(self, link, title, options): page = self._convert_page_name(link) page = page.replace(u"&", u"&") # allow & in page links # gollum page lookups are case-insensitive, you'll always get link to # whatever comes first in the file system, no matter how you refer to a page. # E.g. if you have two pages: a.md and A.md both [[a]] and [[A]] will refer a.md. # We're emulating this behavior using list of all available pages try: idx = map(lambda p: p.lower(), self.available_pages).index(page.lower()) except ValueError: idx = None if idx is not None: page = self.available_pages[idx] if title: return u"[{}]({})".format(title, page) return u"[{}]".format(page) def rewrite_links(self, html, prefix, new_prefix): if not prefix.endswith("/"): prefix += "/" if not new_prefix.endswith("/"): new_prefix += "/" soup = BeautifulSoup(html) for a in soup.findAll("a"): if a.get("href").startswith(prefix): page = a["href"].replace(prefix, "") new_page = self._convert_page_name(page) a["href"] = new_prefix + new_page if a.text == page: a.setString(new_page) elif a.text == prefix + page: a.setString(new_prefix + new_page) return unicode(soup) def _prepare_textile_text(self, text): # need to convert lists properly text_lines = text.splitlines() for i, l in enumerate(text_lines): if l.lstrip().startswith("#"): text_lines[i] = l.lstrip() text = "\n".join(text_lines) # to convert gollum tags properly used <notextile> tag, # so these tags will not be affected by converter text = text.replace("[[", "<notextile>[[").replace("]]", "]]</notextile>") return text
class GitHubWikiImporter(ToolImporter): target_app_ep_names = 'wiki' controller = GitHubWikiImportController source = 'GitHub' tool_label = 'Wiki' tool_description = 'Import your wiki from GitHub' tool_option = {"import_history": "Import history"} mediawiki_exts = ['.wiki', '.mediawiki'] markdown_exts = utils.MARKDOWN_EXTENSIONS textile_exts = ['.textile'] # List of supported formats # https://github.com/gollum/gollum/wiki#page-files supported_formats = [ '.asciidoc', '.creole', '.org', '.pod', '.rdoc', '.rest.txt', '.rst.txt', '.rest', '.rst', ] + mediawiki_exts + markdown_exts + textile_exts available_pages = [] def import_tool( self, project, user, project_name=None, mount_point=None, mount_label=None, user_name=None, tool_option=None, **kw): """ Import a GitHub wiki into a new Wiki Allura tool. """ project_name = "%s/%s" % (user_name, project_name) extractor = GitHubProjectExtractor(project_name, user=user) wiki_avail = extractor.has_wiki() if not wiki_avail: return self.github_wiki_url = extractor.get_page_url( 'wiki_url').replace('.wiki', '/wiki') self.app = project.install_app( "Wiki", mount_point=mount_point or 'wiki', mount_label=mount_label or 'Wiki', import_id={ 'source': self.source, 'project_name': project_name, } ) with_history = tool_option == 'import_history' ThreadLocalORMSession.flush_all() self.github_markdown_converter = GitHubMarkdownConverter( user_name, project_name) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, app=self.app): try: wiki_url = extractor.get_page_url('wiki_url') self.import_pages(wiki_url, history=with_history) except git.GitCommandError: log.error( 'Unable to clone GitHub wiki: ' 'wiki_url=%s; ' 'wiki_avail=%s; ' 'avail_url=%s', wiki_url, wiki_avail, extractor.get_page_url('project_info'), exc_info=True) raise ThreadLocalORMSession.flush_all() M.AuditLog.log( 'import tool %s from %s on %s' % ( self.app.config.options.mount_point, project_name, self.source), project=project, user=user, url=self.app.url) g.post_event('project_updated') return self.app except Exception: h.make_app_admin_only(self.app) raise finally: M.session.artifact_orm_session._get().skip_mod_date = False def _set_available_pages(self, commit): pages = [blob.name for blob in commit.tree.traverse()] pages = map(os.path.splitext, pages) pages = [self._convert_page_name(name) for name, ext in pages if ext in self.supported_formats] self.available_pages = pages def _without_history(self, commit): self._set_available_pages(commit) for page in commit.tree.blobs: self._make_page(page.data_stream.read(), page.name, commit) def _with_history(self, commit): for filename in commit.stats.files.keys(): self._set_available_pages(commit) renamed_to = None if '=>' in filename: # File renamed. Stats contains entry like 'Page.md => # NewPage.md' filename, renamed_to = filename.split(' => ') if renamed_to and renamed_to in commit.tree: text = commit.tree[renamed_to].data_stream.read() elif filename in commit.tree: text = commit.tree[filename].data_stream.read() else: # file is deleted text = '' self._make_page(text, filename, commit, renamed_to) def _make_page(self, text, filename, commit, renamed_to=None): orig_name = self._format_supported(filename) renamed_orig_name = self._format_supported( renamed_to) if renamed_to else None if not orig_name: return if renamed_to and not renamed_orig_name: return mod_date = datetime.utcfromtimestamp(commit.committed_date) wiki_page = WM.Page.upsert(self._convert_page_name(orig_name)) wiki_page.timestamp = wiki_page.mod_date = mod_date wiki_page.viewable_by = ['all'] if renamed_orig_name and renamed_to in commit.tree: wiki_page.title = self._convert_page_name(renamed_orig_name) wiki_page.text = self.convert_markup( h.really_unicode(text), renamed_to) elif filename in commit.tree: wiki_page.text = self.convert_markup( h.really_unicode(text), filename) else: wiki_page.delete() import_id_name = renamed_orig_name if renamed_orig_name else orig_name wiki_page.import_id = ImportIdConverter.get().expand( import_id_name, self.app) wiki_page.commit() return wiki_page def _format_supported(self, filename): orig_name, ext = os.path.splitext(filename) if ext and ext not in self.supported_formats: log.info('Not a wiki page %s. Skipping.' % filename) return False return orig_name def _convert_page_name(self, name): """Convert '-' and '/' into spaces in page name to match github behavior""" return name.replace('-', ' ').replace('/', ' ') def import_pages(self, wiki_url, history=None): wiki_path = mkdtemp() wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True) if not history: self._without_history(wiki.heads.master.commit) else: for commit in reversed(list(wiki.iter_commits())): self._with_history(commit) rmtree(wiki_path) def convert_markup(self, text, filename): """Convert any supported github markup into Allura-markdown. Conversion happens in 4 phases: 1. Convert source text to a html using h.render_any_markup. 2. Rewrite links that match the wiki URL prefix with new location. 3. Convert resulting html to a markdown using html2text, if available. 4. Convert gollum tags If html2text module isn't available then only phases 1 and 2 will be executed. Files in mediawiki format are converted using mediawiki2markdown if html2text is available. """ name, ext = os.path.splitext(filename) if ext in self.markdown_exts: text = self.github_markdown_converter.convert(text) return self.convert_gollum_tags(text) try: import html2text html2text.BODY_WIDTH = 0 except ImportError: html2text = None if ext and ext in self.mediawiki_exts: if html2text: text = mediawiki2markdown(text) text = self.convert_gollum_tags(text) # Don't have html here, so we can't call self._rewrite_links. # Falling back to simpler rewriter. prefix = self.github_wiki_url new_prefix = self.app.url if not prefix.endswith('/'): prefix += '/' if not new_prefix.endswith('/'): new_prefix += '/' _re = re.compile(r'%s(\S*)' % prefix) def repl(m): return new_prefix + self._convert_page_name(m.group(1)) text = _re.sub(repl, text) else: text = h.render_any_markup(filename, text) text = self.rewrite_links( text, self.github_wiki_url, self.app.url) return text elif ext and ext in self.textile_exts: text = self._prepare_textile_text(text) text = h.render_any_markup(filename, text) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) if html2text: text = html2text.html2text(text) text = self.convert_gollum_tags(text) text = text.replace('<notextile>', '').replace('</notextile>', '') text = text.replace('<notextile>', '').replace( '</notextile>', '') text = text.replace('<notextile>', '').replace( '</notextile>', '') return text else: text = h.render_any_markup(filename, text) text = self.rewrite_links(text, self.github_wiki_url, self.app.url) if html2text: text = html2text.html2text(text) text = self.convert_gollum_tags(text) return text def convert_gollum_tags(self, text): tag_re = re.compile(r''' (?P<quote>')? # optional tag escaping (?P<tag>\[\[ # tag start (?P<link>[^]]+) # title/link/filename with options \]\]) # tag end ''', re.VERBOSE) return tag_re.sub(self._gollum_tag_match, text) def _gollum_tag_match(self, match): available_options = [ 'alt=', 'frame', 'align=', 'float', 'width=', 'height=', ] quote = match.groupdict().get('quote') if quote: # tag is escaped, return untouched return match.group('tag') link = match.group('link').split('|') title = options = None if len(link) == 1: link = link[0] elif any(map(lambda opt: link[1].startswith(opt), available_options)): # second element is option -> first is the link link, options = link[0], link[1:] else: title, link, options = link[0], link[1], link[2:] if link == '_TOC_': return '[TOC]' if link.startswith('http://') or link.startswith('https://'): sub = self._gollum_external_link # TODO: add embedded images and file links else: sub = self._gollum_page_link return sub(link, title, options) def _gollum_external_link(self, link, title, options): if title: return u'[{}]({})'.format(title, link) return u'<{}>'.format(link) def _gollum_page_link(self, link, title, options): page = self._convert_page_name(link) page = page.replace(u'&', u'&') # allow & in page links # gollum page lookups are case-insensitive, you'll always get link to # whatever comes first in the file system, no matter how you refer to a page. # E.g. if you have two pages: a.md and A.md both [[a]] and [[A]] will refer a.md. # We're emulating this behavior using list of all available pages try: idx = map(lambda p: p.lower(), self.available_pages).index(page.lower()) except ValueError: idx = None if idx is not None: page = self.available_pages[idx] if title: return u'[{}]({})'.format(title, page) return u'[{}]'.format(page) def rewrite_links(self, html, prefix, new_prefix): if not prefix.endswith('/'): prefix += '/' if not new_prefix.endswith('/'): new_prefix += '/' soup = BeautifulSoup(html) for a in soup.findAll('a'): if a.get('href').startswith(prefix): page = a['href'].replace(prefix, '') new_page = self._convert_page_name(page) a['href'] = new_prefix + new_page if a.text == page: a.setString(new_page) elif a.text == prefix + page: a.setString(new_prefix + new_page) return unicode(soup) def _prepare_textile_text(self, text): # need to convert lists properly text_lines = text.splitlines() for i, l in enumerate(text_lines): if l.lstrip().startswith('#'): text_lines[i] = l.lstrip() text = '\n'.join(text_lines) # to convert gollum tags properly used <notextile> tag, # so these tags will not be affected by converter text = text.replace( '[[', '<notextile>[[').replace(']]', ']]</notextile>') return text
class GitHubTrackerImporter(ToolImporter): source = 'GitHub' target_app = ForgeTrackerApp controller = GitHubTrackerImportController tool_label = 'Issues' max_ticket_num = 0 open_milestones = set() def import_tool(self, project, user, project_name, mount_point=None, mount_label=None, **kw): import_id_converter = ImportIdConverter.get() project_name = '%s/%s' % (kw['user_name'], project_name) app = project.install_app('tickets', mount_point, mount_label, EnableVoting=False, open_status_names='open', closed_status_names='closed', import_id={ 'source': self.source, 'project_name': project_name, } ) self.github_markdown_converter = GitHubMarkdownConverter( kw['user_name'], project_name) ThreadLocalORMSession.flush_all() extractor = GitHubProjectExtractor(project_name, user=user) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, user=M.User.anonymous(), app=app): for ticket_num, issue in extractor.iter_issues(): self.max_ticket_num = max(ticket_num, self.max_ticket_num) ticket = TM.Ticket( app_config_id=app.config._id, custom_fields=dict(), ticket_num=ticket_num, import_id=import_id_converter.expand(ticket_num, app) ) self.process_fields(ticket, issue) self.process_comments(extractor, ticket, issue) self.process_events(extractor, ticket, issue) self.process_milestones(ticket, issue) session(ticket).flush(ticket) session(ticket).expunge(ticket) app.globals.custom_fields = self.postprocess_milestones() app.globals.last_ticket_num = self.max_ticket_num ThreadLocalORMSession.flush_all() M.AuditLog.log( 'import tool %s from %s on %s' % ( app.config.options.mount_point, project_name, self.source), project=project, user=user, url=app.url) g.post_event('project_updated') app.globals.invalidate_bin_counts() return app finally: M.session.artifact_orm_session._get().skip_mod_date = False def parse_datetime(self, datetime_string): return datetime.strptime(datetime_string, '%Y-%m-%dT%H:%M:%SZ') def get_user_link(self, user): return u'[{0}](https://github.com/{0})'.format(user) def process_fields(self, ticket, issue): ticket.summary = issue['title'] ticket.status = issue['state'] ticket.created_date = self.parse_datetime(issue['created_at']) ticket.mod_date = self.parse_datetime(issue['updated_at']) if issue['assignee']: owner_line = '*Originally owned by:* {}\n'.format( self.get_user_link(issue['assignee']['login'])) else: owner_line = '' # body processing happens here body, attachments = self._get_attachments(issue['body']) ticket.add_multiple_attachments(attachments) ticket.description = ( u'*Originally created by:* {creator}\n' u'{owner}' u'\n' u'{body}').format( creator=self.get_user_link(issue['user']['login']), owner=owner_line, body=self.github_markdown_converter.convert(body), ) ticket.labels = [label['name'] for label in issue['labels']] def process_comments(self, extractor, ticket, issue): for comment in extractor.iter_comments(issue): body, attachments = self._get_attachments(comment['body']) if comment['user']: posted_by = u'*Originally posted by:* {}\n\n'.format( self.get_user_link(comment['user']['login'])) body = posted_by + body p = ticket.discussion_thread.add_post( text = self.github_markdown_converter.convert(body), ignore_security = True, timestamp = self.parse_datetime(comment['created_at']), ) p.add_multiple_attachments(attachments) def process_events(self, extractor, ticket, issue): for event in extractor.iter_events(issue): prefix = text = '' if event['event'] in ('reopened', 'closed'): prefix = '*Ticket changed by:* {}\n\n'.format( self.get_user_link(event['actor']['login'])) if event['event'] == 'reopened': text = '- **status**: closed --> open' elif event['event'] == 'closed': text = '- **status**: open --> closed' elif event['event'] == 'assigned': text = '- **assigned_to**: {}'.format( self.get_user_link(event['actor']['login'])) text = prefix + text if not text: continue ticket.discussion_thread.add_post( text = text, ignore_security = True, timestamp = self.parse_datetime(event['created_at']) ) def process_milestones(self, ticket, issue): if issue['milestone']: title = issue['milestone']['title'] due = None if issue['milestone']['due_on']: due = self.parse_datetime(issue['milestone']['due_on']) ticket.custom_fields = { '_milestone': title, } self.open_milestones.add((title, due,)) def postprocess_milestones(self): global_milestones = { 'milestones': [], 'type': 'milestone', 'name': '_milestone', 'label': 'Milestone' } for milestone in self.open_milestones: global_milestones['milestones'].append({ 'name': milestone[0], 'due_date': unicode(milestone[1].date()) if milestone[1] else None, 'complete': False, }) return [global_milestones] def _get_attachments(self, body): # at github, attachments are images only and are included into comment's body # usual syntax is # ![cdbpzjc5ex4](https://f.cloud.github.com/assets/979771/1027411/a393ab5e-0e70-11e3-8a38-b93a3df904cf.jpg)\r\n REGEXP = r'!\[[\w0-9]+?\]\(((?:https?:\/\/)?[\da-z\.-]+\.[a-z\.]{2,6}'\ '[\/%\w\.-]*.(jpg|jpeg|png|gif))\)[\r\n]*' attachments = [] try: found_matches = re.finditer(REGEXP, body, re.IGNORECASE) except TypeError: found_matches = re.finditer(REGEXP, str(body), re.IGNORECASE) for i, match in enumerate(found_matches): # removing attach text from comment body = body.replace(match.group(0), '') # stripping url and extension attachments.append(Attachment( match.group(1), # url 'attach{}.{}'.format(i + 1, match.group(2)) # extension )) return (body, attachments)
class GitHubTrackerImporter(ToolImporter): source = 'GitHub' target_app_ep_names = 'tickets' controller = GitHubTrackerImportController tool_label = 'Issues' max_ticket_num = 0 open_milestones = set() def import_tool(self, project, user, project_name, mount_point=None, mount_label=None, **kw): import_id_converter = ImportIdConverter.get() project_name = '%s/%s' % (kw['user_name'], project_name) extractor = GitHubProjectExtractor(project_name, user=user) if not extractor.has_tracker(): return app = project.install_app('tickets', mount_point, mount_label, EnableVoting=False, open_status_names='open', closed_status_names='closed', import_id={ 'source': self.source, 'project_name': project_name, } ) self.github_markdown_converter = GitHubMarkdownConverter( kw['user_name'], project_name) ThreadLocalORMSession.flush_all() try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, user=M.User.anonymous(), app=app): for ticket_num, issue in extractor.iter_issues(): self.max_ticket_num = max(ticket_num, self.max_ticket_num) ticket = TM.Ticket( app_config_id=app.config._id, custom_fields=dict(), ticket_num=ticket_num, import_id=import_id_converter.expand(ticket_num, app) ) self.process_fields(extractor, ticket, issue) self.process_comments(extractor, ticket, issue) self.process_events(extractor, ticket, issue) self.process_milestones(ticket, issue) session(ticket).flush(ticket) session(ticket).expunge(ticket) app.globals.custom_fields = self.postprocess_milestones() app.globals.last_ticket_num = self.max_ticket_num ThreadLocalORMSession.flush_all() M.AuditLog.log( 'import tool %s from %s on %s' % ( app.config.options.mount_point, project_name, self.source), project=project, user=user, url=app.url) g.post_event('project_updated') app.globals.invalidate_bin_counts() return app finally: M.session.artifact_orm_session._get().skip_mod_date = False def parse_datetime(self, datetime_string): return datetime.strptime(datetime_string, '%Y-%m-%dT%H:%M:%SZ') def get_user_link(self, user): return u'[{0}](https://github.com/{0})'.format(user) def process_fields(self, extractor, ticket, issue): ticket.summary = issue['title'] ticket.status = issue['state'] ticket.created_date = self.parse_datetime(issue['created_at']) ticket.mod_date = self.parse_datetime(issue['updated_at']) if issue['assignee']: owner_line = '*Originally owned by:* {}\n'.format( self.get_user_link(issue['assignee']['login'])) else: owner_line = '' # body processing happens here body, attachments = self._get_attachments(extractor, issue['body']) ticket.add_multiple_attachments(attachments) ticket.description = ( u'*Originally created by:* {creator}\n' u'{owner}' u'\n' u'{body}').format( creator=self.get_user_link(issue['user']['login']), owner=owner_line, body=self.github_markdown_converter.convert(body), ) ticket.labels = [label['name'] for label in issue['labels']] def process_comments(self, extractor, ticket, issue): for comment in extractor.iter_comments(issue): body, attachments = self._get_attachments( extractor, comment['body']) if comment['user']: posted_by = u'*Originally posted by:* {}\n\n'.format( self.get_user_link(comment['user']['login'])) body = posted_by + body p = ticket.discussion_thread.add_post( text=self.github_markdown_converter.convert(body), ignore_security=True, timestamp=self.parse_datetime(comment['created_at']), ) p.add_multiple_attachments(attachments) def process_events(self, extractor, ticket, issue): for event in extractor.iter_events(issue): prefix = text = '' if event['event'] in ('reopened', 'closed'): prefix = '*Ticket changed by:* {}\n\n'.format( self.get_user_link(event['actor']['login'])) if event['event'] == 'reopened': text = '- **status**: closed --> open' elif event['event'] == 'closed': text = '- **status**: open --> closed' elif event['event'] == 'assigned': text = '- **assigned_to**: {}'.format( self.get_user_link(event['actor']['login'])) text = prefix + text if not text: continue ticket.discussion_thread.add_post( text=text, ignore_security=True, timestamp=self.parse_datetime(event['created_at']) ) def process_milestones(self, ticket, issue): if issue['milestone']: title = issue['milestone']['title'] due = None if issue['milestone']['due_on']: due = self.parse_datetime(issue['milestone']['due_on']) ticket.custom_fields = { '_milestone': title, } self.open_milestones.add((title, due,)) def postprocess_milestones(self): global_milestones = { 'milestones': [], 'type': 'milestone', 'name': '_milestone', 'label': 'Milestone' } for milestone in self.open_milestones: global_milestones['milestones'].append({ 'name': milestone[0], 'due_date': unicode(milestone[1].date()) if milestone[1] else None, 'complete': False, }) return [global_milestones] def _get_attachments(self, extractor, body): # at github, attachments are images only and are included into comment's body # usual syntax is # ![cdbpzjc5ex4](https://f.cloud.github.com/assets/979771/1027411/a393ab5e-0e70-11e3-8a38-b93a3df904cf.jpg)\r\n REGEXP = r'!\[[\w0-9]+?\]\(((?:https?:\/\/)?[\da-z\.-]+\.[a-z\.]{2,6}'\ '[\/%\w\.-]*.(jpg|jpeg|png|gif))\)[\r\n]*' attachments = [] try: found_matches = re.finditer(REGEXP, body, re.IGNORECASE) except TypeError: found_matches = re.finditer(REGEXP, str(body), re.IGNORECASE) for i, match in enumerate(found_matches): # removing attach text from comment body = body.replace(match.group(0), '') # stripping url and extension attachments.append(Attachment( extractor, match.group(1), # url 'attach{}.{}'.format(i + 1, match.group(2)) # extension )) return (body, attachments)
def import_tool( self, project, user, project_name=None, mount_point=None, mount_label=None, user_name=None, tool_option=None, **kw ): """ Import a GitHub wiki into a new Wiki Allura tool. """ project_name = "%s/%s" % (user_name, project_name) extractor = GitHubProjectExtractor(project_name, user=user) wiki_avail = extractor.has_wiki() # has_wiki only indicates that wiki is enabled, but it does not mean # that it has any pages, so we should check if wiki repo actually # exists wiki_url = extractor.get_page_url("wiki_url") if not wiki_avail or not self.has_wiki_repo(wiki_url): return self.github_wiki_url = extractor.get_page_url("wiki_url").replace(".wiki", "/wiki") self.app = project.install_app( "Wiki", mount_point=mount_point or "wiki", mount_label=mount_label or "Wiki", import_id={"source": self.source, "project_name": project_name}, ) with_history = tool_option == "import_history" ThreadLocalORMSession.flush_all() self.github_markdown_converter = GitHubMarkdownConverter(user_name, project_name) try: M.session.artifact_orm_session._get().skip_mod_date = True with h.push_config(c, app=self.app): try: self.import_pages(wiki_url, history=with_history) except git.GitCommandError: log.error( "Unable to clone GitHub wiki: " "wiki_url=%s; " "wiki_avail=%s; " "avail_url=%s", wiki_url, wiki_avail, extractor.get_page_url("project_info"), exc_info=True, ) raise ThreadLocalORMSession.flush_all() M.AuditLog.log( "import tool %s from %s on %s" % (self.app.config.options.mount_point, project_name, self.source), project=project, user=user, url=self.app.url, ) g.post_event("project_updated") return self.app except Exception: h.make_app_admin_only(self.app) raise finally: M.session.artifact_orm_session._get().skip_mod_date = False
class TestGitHubMarkdownConverter(object): def setUp(self): self.conv = GitHubMarkdownConverter('user', 'project') def test_convert_sha(self): text = '16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, '[16c999]') text = 'some context 16c999e8c71134401a78d4d46435517b2271d6ac ' result = self.conv.convert(text) assert_equal(result, 'some context [16c999] ') def test_convert_user_sha(self): text = 'user@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, '[16c999]') # Not an owner of current project text = 'another-user@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, text) def test_convert_user_repo_sha(self): text = 'user/project@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal(result, '[16c999]') # Not a current project text = 'user/p@16c999e8c71134401a78d4d46435517b2271d6ac' result = self.conv.convert(text) assert_equal( result, '[user/p@16c999]' '(https://github.com/user/p/commit/16c999e8c71134401a78d4d46435517b2271d6ac)' ) def test_convert_ticket(self): text = 'Ticket #1' result = self.conv.convert(text) assert_equal(result, 'Ticket [#1]') assert_equal(self.conv.convert('#1'), '[#1]') def test_convert_user_ticket(self): text = 'user#1' result = self.conv.convert(text) assert_equal(result, '[#1]') # Not an owner of current project text = 'another-user#1' result = self.conv.convert(text) assert_equal(result, 'another-user#1') def test_convert_user_repo_ticket(self): text = 'user/project#1' result = self.conv.convert(text) assert_equal(result, '[#1]') # Not a current project text = 'user/p#1' result = self.conv.convert(text) assert_equal(result, '[user/p#1](https://github.com/user/p/issues/1)') def test_convert_strikethrough(self): text = '~~mistake~~' assert_equal(self.conv.convert(text), '<s>mistake</s>') def test_inline_code_block(self): text = 'This `~~some text~~` converts to this ~~strike out~~.' result = 'This `~~some text~~` converts to this <s>strike out</s>.' assert_equal(self.conv.convert(text).strip(), result) def test_convert_code_blocks(self): text = '''```python print "Hello!" ``` Two code blocks here! ``` for (var i = 0; i < a.length; i++) { console.log(i); } ```''' result = ''':::python print "Hello!" Two code blocks here! for (var i = 0; i < a.length; i++) { console.log(i); }''' assert_equal(self.conv.convert(text).strip(), result) def test_code_blocks_without_newline_before(self): text = ''' There are some code snippet: ``` print 'Hello' ``` Pretty cool, ha?''' result = ''' There are some code snippet: print 'Hello' Pretty cool, ha?''' assert_equal(self.conv.convert(text).strip(), result.strip()) text = text.replace('```', '~~~') assert_equal(self.conv.convert(text).strip(), result.strip()) text = ''' There are some code snippet: ```python print 'Hello' ``` Pretty cool, ha?''' result = ''' There are some code snippet: :::python print 'Hello' Pretty cool, ha?''' assert_equal(self.conv.convert(text).strip(), result.strip())