Esempio n. 1
0
 def import_tool(self,
                 project,
                 user,
                 project_name,
                 mount_point=None,
                 mount_label=None,
                 **kw):
     import_id_converter = ImportIdConverter.get()
     project_name = '%s/%s' % (kw['user_name'], project_name)
     extractor = GitHubProjectExtractor(project_name, user=user)
     if not extractor.has_tracker():
         return
     app = project.install_app('tickets',
                               mount_point,
                               mount_label,
                               EnableVoting=False,
                               open_status_names='open',
                               closed_status_names='closed',
                               import_id={
                                   'source': self.source,
                                   'project_name': project_name,
                               })
     self.github_markdown_converter = GitHubMarkdownConverter(
         kw['user_name'], project_name)
     ThreadLocalORMSession.flush_all()
     try:
         M.session.artifact_orm_session._get().skip_mod_date = True
         with h.push_config(c, user=M.User.anonymous(), app=app):
             for ticket_num, issue in extractor.iter_issues():
                 self.max_ticket_num = max(ticket_num, self.max_ticket_num)
                 ticket = TM.Ticket(app_config_id=app.config._id,
                                    custom_fields=dict(),
                                    ticket_num=ticket_num,
                                    import_id=import_id_converter.expand(
                                        ticket_num, app))
                 self.process_fields(extractor, ticket, issue)
                 self.process_comments(extractor, ticket, issue)
                 self.process_events(extractor, ticket, issue)
                 self.process_milestones(ticket, issue)
                 session(ticket).flush(ticket)
                 session(ticket).expunge(ticket)
             app.globals.custom_fields = self.postprocess_milestones()
             app.globals.last_ticket_num = self.max_ticket_num
             ThreadLocalORMSession.flush_all()
         M.AuditLog.log(
             'import tool %s from %s on %s' %
             (app.config.options.mount_point, project_name, self.source),
             project=project,
             user=user,
             url=app.url)
         g.post_event('project_updated')
         app.globals.invalidate_bin_counts()
         return app
     finally:
         M.session.artifact_orm_session._get().skip_mod_date = False
Esempio n. 2
0
 def test_process_fields(self):
     ticket = mock.Mock()
     issue = {
         'title': 'title',
         'state': 'New',
         'created_at': 'created_at',
         'updated_at': 'updated_at',
         'assignee': {'login': '******'},
         'user': {'login': '******'},
         'body': 'hello',
         'labels': [{'name': 'first'}, {'name': 'second'}],
     }
     importer = tracker.GitHubTrackerImporter()
     importer.github_markdown_converter = GitHubMarkdownConverter(
         'user', 'project')
     extractor = mock.Mock()
     extractor.urlopen().read.return_value = 'data'
     with mock.patch.object(tracker, 'datetime') as dt:
         dt.strptime.side_effect = lambda s, f: s
         importer.process_fields(extractor, ticket, issue)
         self.assertEqual(ticket.summary, 'title')
         self.assertEqual(ticket.description,
                          '*Originally created by:* [creator](https://github.com/creator)\n*Originally owned by:* [owner](https://github.com/owner)\n\nhello')
         self.assertEqual(ticket.status, 'New')
         self.assertEqual(ticket.created_date, 'created_at')
         self.assertEqual(ticket.mod_date, 'updated_at')
         self.assertEqual(dt.strptime.call_args_list, [
             mock.call('created_at', '%Y-%m-%dT%H:%M:%SZ'),
             mock.call('updated_at', '%Y-%m-%dT%H:%M:%SZ'),
         ])
         self.assertEqual(ticket.labels, ['first', 'second'])
Esempio n. 3
0
    def test_github_markdown_converted_in_comments(self):
        ticket = mock.Mock()
        extractor = mock.Mock()
        body = '''Hello

```python
def hello(name):
    print "Hello, " + name
```'''
        body_converted = '''*Originally posted by:* [me](https://github.com/me)

Hello

    :::python
    def hello(name):
        print "Hello, " + name'''

        issue = {'comments_url': '/comments'}
        extractor.iter_comments.return_value = [
            {
                'body': body,
                'created_at': '2013-08-26T16:57:53Z',
                'user': {'login': '******'},
            }
        ]
        importer = tracker.GitHubTrackerImporter()
        importer.github_markdown_converter = GitHubMarkdownConverter(
            'user', 'project')
        importer.process_comments(extractor, ticket, issue)
        self.assertEqual(ticket.discussion_thread.add_post.call_args_list[0], mock.call(
            text=body_converted,
            timestamp=datetime(2013, 8, 26, 16, 57, 53),
            ignore_security=True,
        ))
Esempio n. 4
0
    def import_tool(
            self, project, user, project_name=None, mount_point=None,
            mount_label=None, user_name=None, tool_option=None, **kw):
        """ Import a GitHub wiki into a new Wiki Allura tool.

        """
        project_name = "%s/%s" % (user_name, project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        wiki_avail = extractor.has_wiki()
        if not wiki_avail:
            return

        self.github_wiki_url = extractor.get_page_url(
            'wiki_url').replace('.wiki', '/wiki')
        self.app = project.install_app(
            "Wiki",
            mount_point=mount_point or 'wiki',
            mount_label=mount_label or 'Wiki',
            import_id={
                'source': self.source,
                'project_name': project_name,
            }
        )
        with_history = tool_option == 'import_history'
        ThreadLocalORMSession.flush_all()
        self.github_markdown_converter = GitHubMarkdownConverter(
            user_name, project_name)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, app=self.app):
                try:
                    wiki_url = extractor.get_page_url('wiki_url')
                    self.import_pages(wiki_url, history=with_history)
                except git.GitCommandError:
                    log.error(
                        'Unable to clone GitHub wiki: '
                        'wiki_url=%s; '
                        'wiki_avail=%s; '
                        'avail_url=%s',
                        wiki_url, wiki_avail,
                        extractor.get_page_url('project_info'),
                        exc_info=True)
                    raise
            ThreadLocalORMSession.flush_all()
            M.AuditLog.log(
                'import tool %s from %s on %s' % (
                    self.app.config.options.mount_point,
                    project_name,
                    self.source),
                project=project,
                user=user,
                url=self.app.url)
            g.post_event('project_updated')
            return self.app
        except Exception:
            h.make_app_admin_only(self.app)
            raise
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False
Esempio n. 5
0
 def import_tool(self, project, user, project_name, mount_point=None,
                 mount_label=None, **kw):
     import_id_converter = ImportIdConverter.get()
     project_name = '%s/%s' % (kw['user_name'], project_name)
     extractor = GitHubProjectExtractor(project_name, user=user)
     if not extractor.has_tracker():
         return
     app = project.install_app('tickets', mount_point, mount_label,
                               EnableVoting=False,
                               open_status_names='open',
                               closed_status_names='closed',
                               import_id={
                                   'source': self.source,
                                   'project_name': project_name,
                               }
                               )
     self.github_markdown_converter = GitHubMarkdownConverter(
         kw['user_name'], project_name)
     ThreadLocalORMSession.flush_all()
     try:
         M.session.artifact_orm_session._get().skip_mod_date = True
         with h.push_config(c, user=M.User.anonymous(), app=app):
             for ticket_num, issue in extractor.iter_issues():
                 self.max_ticket_num = max(ticket_num, self.max_ticket_num)
                 ticket = TM.Ticket(
                     app_config_id=app.config._id,
                     custom_fields=dict(),
                     ticket_num=ticket_num,
                     import_id=import_id_converter.expand(ticket_num, app)
                 )
                 self.process_fields(extractor, ticket, issue)
                 self.process_comments(extractor, ticket, issue)
                 self.process_events(extractor, ticket, issue)
                 self.process_milestones(ticket, issue)
                 session(ticket).flush(ticket)
                 session(ticket).expunge(ticket)
             app.globals.custom_fields = self.postprocess_milestones()
             app.globals.last_ticket_num = self.max_ticket_num
             ThreadLocalORMSession.flush_all()
         M.AuditLog.log(
             'import tool %s from %s on %s' % (
                 app.config.options.mount_point,
                 project_name, self.source),
             project=project, user=user, url=app.url)
         g.post_event('project_updated')
         app.globals.invalidate_bin_counts()
         return app
     finally:
         M.session.artifact_orm_session._get().skip_mod_date = False
Esempio n. 6
0
    def import_tool(
        self,
        project,
        user,
        project_name=None,
        mount_point=None,
        mount_label=None,
        user_name=None,
        tool_option=None,
        **kw
    ):
        """ Import a GitHub wiki into a new Wiki Allura tool.

        """
        project_name = "%s/%s" % (user_name, project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        if not extractor.has_wiki():
            return

        self.github_wiki_url = extractor.get_page_url("wiki_url").replace(".wiki", "/wiki")
        self.app = project.install_app(
            "Wiki",
            mount_point=mount_point or "wiki",
            mount_label=mount_label or "Wiki",
            import_id={"source": self.source, "project_name": project_name},
        )
        with_history = tool_option == "import_history"
        ThreadLocalORMSession.flush_all()
        self.github_markdown_converter = GitHubMarkdownConverter(user_name, project_name)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, app=self.app):
                self.import_pages(extractor.get_page_url("wiki_url"), history=with_history)
            ThreadLocalORMSession.flush_all()
            M.AuditLog.log(
                "import tool %s from %s on %s" % (self.app.config.options.mount_point, project_name, self.source),
                project=project,
                user=user,
                url=self.app.url,
            )
            g.post_event("project_updated")
            return self.app
        except Exception as e:
            h.make_app_admin_only(self.app)
            raise
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False
Esempio n. 7
0
    def test_github_markdown_converted_in_description(self):
        ticket = mock.Mock()
        body = '''Hello

```python
def hello(name):
    print "Hello, " + name
```'''
        body_converted = '''*Originally created by:* [creator](https://github.com/creator)
*Originally owned by:* [owner](https://github.com/owner)

Hello

    :::python
    def hello(name):
        print "Hello, " + name'''

        issue = {
            'body': body,
            'title': 'title',
            'state': 'New',
            'created_at': 'created_at',
            'updated_at': 'updated_at',
            'assignee': {
                'login': '******'
            },
            'user': {
                'login': '******'
            },
            'labels': [{
                'name': 'first'
            }, {
                'name': 'second'
            }],
        }
        importer = tracker.GitHubTrackerImporter()
        importer.github_markdown_converter = GitHubMarkdownConverter(
            'user', 'project')
        extractor = mock.Mock()
        extractor.urlopen().read.return_value = 'data'
        with mock.patch.object(tracker, 'datetime') as dt:
            dt.strptime.side_effect = lambda s, f: s
            importer.process_fields(extractor, ticket, issue)
        self.assertEqual(ticket.description.strip(), body_converted.strip())
Esempio n. 8
0
    def test_convert_markup(self):
        importer = GitHubWikiImporter()
        importer.github_wiki_url = 'https://github.com/a/b/wiki'
        importer.app = Mock()
        importer.app.url = '/p/test/wiki/'
        importer.github_markdown_converter = GitHubMarkdownConverter(
            'user', 'proj')
        f = importer.convert_markup
        source = '''Look at [[this page|Some Page]]

More info at: [[MoreInfo]] [[Even More Info]]

Our website is [[http://domain.net]].

'[[Escaped Tag]]

```python
codeblock
```

ticket #1

#1 header

sha aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'''
        result = '''Look at [this page](Some Page)

More info at: [MoreInfo] [Even More Info]

Our website is <http://domain.net>.

[[Escaped Tag]]

    :::python
    codeblock

ticket [#1]

[#1] header

sha [aaaaaa]'''
        assert_equal(f(source, 'test.md').strip(), result)

        assert_equal(f('h1. Hello', 't.textile').strip(), '# Hello')
Esempio n. 9
0
 def setUp(self):
     self.conv = GitHubMarkdownConverter('user', 'project')
Esempio n. 10
0
class TestGitHubMarkdownConverter(object):

    def setUp(self):
        self.conv = GitHubMarkdownConverter('user', 'project')

    def test_convert_sha(self):
        text = '16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, '[16c999]')

        text = 'some context  16c999e8c71134401a78d4d46435517b2271d6ac '
        result = self.conv.convert(text)
        assert_equal(result, 'some context  [16c999] ')

    def test_convert_user_sha(self):
        text = 'user@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, '[16c999]')

        # Not an owner of current project
        text = 'another-user@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, text)

    def test_convert_user_repo_sha(self):
        text = 'user/project@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, '[16c999]')

        # Not a current project
        text = 'user/p@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, '[user/p@16c999]'
                             '(https://github.com/user/p/commit/16c999e8c71134401a78d4d46435517b2271d6ac)')

    def test_convert_ticket(self):
        text = 'Ticket #1'
        result = self.conv.convert(text)
        assert_equal(result, 'Ticket [#1]')
        assert_equal(self.conv.convert('#1'), '[#1]')

    def test_convert_user_ticket(self):
        text = 'user#1'
        result = self.conv.convert(text)
        assert_equal(result, '[#1]')

        # Not an owner of current project
        text = 'another-user#1'
        result = self.conv.convert(text)
        assert_equal(result, 'another-user#1')

    def test_convert_user_repo_ticket(self):
        text = 'user/project#1'
        result = self.conv.convert(text)
        assert_equal(result, '[#1]')

        # Not a current project
        text = 'user/p#1'
        result = self.conv.convert(text)
        assert_equal(result, '[user/p#1](https://github.com/user/p/issues/1)')

    def test_convert_strikethrough(self):
        text = '~~mistake~~'
        assert_equal(self.conv.convert(text), '<s>mistake</s>')

    def test_inline_code_block(self):
        text = u'This `~~some text~~` converts to this ~~strike out~~.'
        result = u'This `~~some text~~` converts to this <s>strike out</s>.'
        assert_equal(self.conv.convert(text).strip(), result)

    def test_convert_code_blocks(self):
        text = u'''```python
print "Hello!"
```

Two code blocks here!

```
for (var i = 0; i < a.length; i++) {
    console.log(i);
}
```'''
        result = u''':::python
    print "Hello!"

Two code blocks here!

    for (var i = 0; i < a.length; i++) {
        console.log(i);
    }'''

        assert_equal(self.conv.convert(text).strip(), result)

    def test_code_blocks_without_newline_before(self):
        text = u'''
There are some code snippet:
```
print 'Hello'
```
Pretty cool, ha?'''

        result = u'''
There are some code snippet:

    print 'Hello'
Pretty cool, ha?'''
        assert_equal(self.conv.convert(text).strip(), result.strip())
        text = text.replace('```', '~~~')
        assert_equal(self.conv.convert(text).strip(), result.strip())

        text = u'''
There are some code snippet:
```python
print 'Hello'
```
Pretty cool, ha?'''

        result = u'''
There are some code snippet:

    :::python
    print 'Hello'
Pretty cool, ha?'''
        assert_equal(self.conv.convert(text).strip(), result.strip())
Esempio n. 11
0
    def import_tool(self,
                    project,
                    user,
                    project_name=None,
                    mount_point=None,
                    mount_label=None,
                    user_name=None,
                    tool_option=None,
                    **kw):
        """ Import a GitHub wiki into a new Wiki Allura tool.

        """
        project_name = "%s/%s" % (user_name, project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        wiki_avail = extractor.has_wiki()
        # has_wiki only indicates that wiki is enabled, but it does not mean
        # that it has any pages, so we should check if wiki repo actually
        # exists
        wiki_url = extractor.get_page_url('wiki_url')
        if not wiki_avail or not self.has_wiki_repo(wiki_url):
            return

        self.github_wiki_url = extractor.get_page_url('wiki_url').replace(
            '.wiki', '/wiki')
        self.app = project.install_app("Wiki",
                                       mount_point=mount_point or 'wiki',
                                       mount_label=mount_label or 'Wiki',
                                       import_id={
                                           'source': self.source,
                                           'project_name': project_name,
                                       })
        with_history = tool_option == 'import_history'
        ThreadLocalORMSession.flush_all()
        self.github_markdown_converter = GitHubMarkdownConverter(
            user_name, project_name)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, app=self.app):
                try:
                    self.import_pages(wiki_url, history=with_history)
                except git.GitCommandError:
                    log.error(
                        'Unable to clone GitHub wiki: '
                        'wiki_url=%s; '
                        'wiki_avail=%s; '
                        'avail_url=%s',
                        wiki_url,
                        wiki_avail,
                        extractor.get_page_url('project_info'),
                        exc_info=True)
                    raise
            ThreadLocalORMSession.flush_all()
            M.AuditLog.log('import tool %s from %s on %s' %
                           (self.app.config.options.mount_point, project_name,
                            self.source),
                           project=project,
                           user=user,
                           url=self.app.url)
            g.post_event('project_updated')
            return self.app
        except Exception:
            h.make_app_admin_only(self.app)
            raise
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False
Esempio n. 12
0
class GitHubWikiImporter(ToolImporter):
    target_app_ep_names = 'wiki'

    controller = GitHubWikiImportController
    source = 'GitHub'
    tool_label = 'Wiki'
    tool_description = 'Import your wiki from GitHub'
    tool_option = {"import_history": "Import history"}

    mediawiki_exts = ['.wiki', '.mediawiki']
    markdown_exts = utils.MARKDOWN_EXTENSIONS
    textile_exts = ['.textile']
    # List of supported formats
    # https://github.com/gollum/gollum/wiki#page-files
    supported_formats = [
        '.asciidoc',
        '.creole',
        '.org',
        '.pod',
        '.rdoc',
        '.rest.txt',
        '.rst.txt',
        '.rest',
        '.rst',
    ] + mediawiki_exts + markdown_exts + textile_exts
    available_pages = []

    def import_tool(self,
                    project,
                    user,
                    project_name=None,
                    mount_point=None,
                    mount_label=None,
                    user_name=None,
                    tool_option=None,
                    **kw):
        """ Import a GitHub wiki into a new Wiki Allura tool.

        """
        project_name = "%s/%s" % (user_name, project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        wiki_avail = extractor.has_wiki()
        # has_wiki only indicates that wiki is enabled, but it does not mean
        # that it has any pages, so we should check if wiki repo actually
        # exists
        wiki_url = extractor.get_page_url('wiki_url')
        if not wiki_avail or not self.has_wiki_repo(wiki_url):
            return

        self.github_wiki_url = extractor.get_page_url('wiki_url').replace(
            '.wiki', '/wiki')
        self.app = project.install_app("Wiki",
                                       mount_point=mount_point or 'wiki',
                                       mount_label=mount_label or 'Wiki',
                                       import_id={
                                           'source': self.source,
                                           'project_name': project_name,
                                       })
        with_history = tool_option == 'import_history'
        ThreadLocalORMSession.flush_all()
        self.github_markdown_converter = GitHubMarkdownConverter(
            user_name, project_name)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, app=self.app):
                try:
                    self.import_pages(wiki_url, history=with_history)
                except git.GitCommandError:
                    log.error(
                        'Unable to clone GitHub wiki: '
                        'wiki_url=%s; '
                        'wiki_avail=%s; '
                        'avail_url=%s',
                        wiki_url,
                        wiki_avail,
                        extractor.get_page_url('project_info'),
                        exc_info=True)
                    raise
            ThreadLocalORMSession.flush_all()
            M.AuditLog.log('import tool %s from %s on %s' %
                           (self.app.config.options.mount_point, project_name,
                            self.source),
                           project=project,
                           user=user,
                           url=self.app.url)
            g.post_event('project_updated')
            return self.app
        except Exception:
            h.make_app_admin_only(self.app)
            raise
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False

    def _set_available_pages(self, commit):
        pages = [blob.name for blob in commit.tree.traverse()]
        pages = list(map(os.path.splitext, pages))
        pages = [
            self._convert_page_name(name) for name, ext in pages
            if ext in self.supported_formats
        ]
        self.available_pages = pages

    def _without_history(self, commit):
        self._set_available_pages(commit)
        for page in commit.tree.blobs:
            self._make_page(page.data_stream.read(), page.name, commit)

    def _with_history(self, commit):
        for filename in commit.stats.files.keys():
            self._set_available_pages(commit)
            renamed_to = None
            if '=>' in filename:
                # File renamed. Stats contains entry like 'Page.md =>
                # NewPage.md'
                filename, renamed_to = filename.split(' => ')
            if renamed_to and renamed_to in commit.tree:
                text = commit.tree[renamed_to].data_stream.read()
            elif filename in commit.tree:
                text = commit.tree[filename].data_stream.read()
            else:
                # file is deleted
                text = ''
            self._make_page(text, filename, commit, renamed_to)

    def _make_page(self, text, filename, commit, renamed_to=None):
        orig_name = self._format_supported(filename)
        renamed_orig_name = self._format_supported(
            renamed_to) if renamed_to else None
        if not orig_name:
            return
        if renamed_to and not renamed_orig_name:
            return
        mod_date = datetime.utcfromtimestamp(commit.committed_date)
        wiki_page = WM.Page.upsert(self._convert_page_name(orig_name))
        wiki_page.timestamp = wiki_page.mod_date = mod_date
        if renamed_orig_name and renamed_to in commit.tree:
            wiki_page.title = self._convert_page_name(renamed_orig_name)
            wiki_page.text = self.convert_markup(h.really_unicode(text),
                                                 renamed_to)
        elif filename in commit.tree:
            wiki_page.text = self.convert_markup(h.really_unicode(text),
                                                 filename)
        else:
            wiki_page.delete()
        import_id_name = renamed_orig_name if renamed_orig_name else orig_name
        wiki_page.import_id = ImportIdConverter.get().expand(
            import_id_name, self.app)
        wiki_page.commit()
        return wiki_page

    def _format_supported(self, filename):
        orig_name, ext = os.path.splitext(filename)
        if ext and ext not in self.supported_formats:
            log.info('Not a wiki page %s. Skipping.' % filename)
            return False
        return orig_name

    def _convert_page_name(self, name):
        """Convert '-' and '/' into spaces in page name to match github behavior"""
        return name.replace('-', ' ').replace('/', ' ')

    def has_wiki_repo(self, wiki_url):
        wiki_path = mkdtemp()
        try:
            wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True)
        except git.GitCommandError:
            return False
        rmtree(wiki_path)
        return True

    def import_pages(self, wiki_url, history=None):
        wiki_path = mkdtemp()
        wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True)
        if not history:
            self._without_history(wiki.heads.master.commit)
        else:
            for commit in reversed(list(wiki.iter_commits())):
                self._with_history(commit)
        rmtree(wiki_path)

    def convert_markup(self, text, filename):
        """Convert any supported github markup into Allura-markdown.

        Conversion happens in 4 phases:

        1. Convert source text to a html using h.render_any_markup.
        2. Rewrite links that match the wiki URL prefix with new location.
        3. Convert resulting html to a markdown using html2text, if available.
        4. Convert gollum tags

        If html2text module isn't available then only phases 1 and 2 will be executed.

        Files in mediawiki format are converted using mediawiki2markdown
        if html2text is available.
        """
        name, ext = os.path.splitext(filename)
        if ext in self.markdown_exts:
            text = self.github_markdown_converter.convert(text)
            return self.convert_gollum_tags(text)

        try:
            import html2text
            html2text.BODY_WIDTH = 0
        except ImportError:
            html2text = None

        if ext and ext in self.mediawiki_exts:
            if html2text:
                text = mediawiki2markdown(text)
                text = self.convert_gollum_tags(text)
                # Don't have html here, so we can't call self._rewrite_links.
                # Falling back to simpler rewriter.
                prefix = self.github_wiki_url
                new_prefix = self.app.url
                if not prefix.endswith('/'):
                    prefix += '/'
                if not new_prefix.endswith('/'):
                    new_prefix += '/'
                _re = re.compile(r'%s(\S*)' % prefix)

                def repl(m):
                    return new_prefix + self._convert_page_name(m.group(1))

                text = _re.sub(repl, text)
            else:
                text = h.render_any_markup(filename, text)
                text = self.rewrite_links(text, self.github_wiki_url,
                                          self.app.url)
            return text
        elif ext and ext in self.textile_exts:
            text = self._prepare_textile_text(text)

            text = six.text_type(h.render_any_markup(filename, text))
            text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
            if html2text:
                text = html2text.html2text(text)
                text = self.convert_gollum_tags(text)
            text = text.replace('<notextile>',
                                '').replace('< notextile>',
                                            '').replace('</notextile>', '')
            text = text.replace('&#60;notextile&#62;',
                                '').replace('&#60;/notextile&#62;', '')
            text = text.replace('&lt;notextile&gt;',
                                '').replace('&lt;/notextile&gt;', '')
            return text
        else:
            text = h.render_any_markup(filename, text)
            text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
            if html2text:
                text = html2text.html2text(text)
                text = self.convert_gollum_tags(text)
            return text

    def convert_gollum_tags(self, text):
        tag_re = re.compile(
            r'''
            (?P<quote>')?             # optional tag escaping
            (?P<tag>\[\[              # tag start
            (?P<link>[^]]+)           # title/link/filename with options
            \]\])                     # tag end
        ''', re.VERBOSE)
        return tag_re.sub(self._gollum_tag_match, text)

    def _gollum_tag_match(self, match):
        available_options = [
            'alt=',
            'frame',
            'align=',
            'float',
            'width=',
            'height=',
        ]
        quote = match.groupdict().get('quote')
        if quote:
            # tag is escaped, return untouched
            return match.group('tag')
        link = match.group('link').split('|')
        title = options = None
        if len(link) == 1:
            link = link[0]
        elif any([link[1].startswith(opt) for opt in available_options]):
            # second element is option -> first is the link
            link, options = link[0], link[1:]
        else:
            title, link, options = link[0], link[1], link[2:]

        if link == '_TOC_':
            return '[TOC]'

        if link.startswith('http://') or link.startswith('https://'):
            sub = self._gollum_external_link
        # TODO: add embedded images and file links
        else:
            sub = self._gollum_page_link
        return sub(link, title, options)

    def _gollum_external_link(self, link, title, options):
        if title:
            return '[{}]({})'.format(title, link)
        return '<{}>'.format(link)

    def _gollum_page_link(self, link, title, options):
        page = self._convert_page_name(link)
        page = page.replace('&amp;', '&')  # allow & in page links
        # gollum page lookups are case-insensitive, you'll always get link to
        # whatever comes first in the file system, no matter how you refer to a page.
        # E.g. if you have two pages: a.md and A.md both [[a]] and [[A]] will refer a.md.
        # We're emulating this behavior using list of all available pages
        try:
            idx = [p.lower() for p in self.available_pages].index(page.lower())
        except ValueError:
            idx = None
        if idx is not None:
            page = self.available_pages[idx]

        if title:
            return '[{}]({})'.format(title, page)
        return '[{}]'.format(page)

    def rewrite_links(self, html, prefix, new_prefix):
        if not prefix.endswith('/'):
            prefix += '/'
        if not new_prefix.endswith('/'):
            new_prefix += '/'
        soup = BeautifulSoup(html, 'html.parser')
        for a in soup.findAll('a'):
            if a.get('href').startswith(prefix):
                page = a['href'].replace(prefix, '')
                new_page = self._convert_page_name(page)
                a['href'] = new_prefix + new_page
                if a.string == page:
                    a.string = new_page
                elif a.string == prefix + page:
                    a.string = new_prefix + new_page
        return six.text_type(soup)

    def _prepare_textile_text(self, text):
        # need to convert lists properly
        text_lines = text.splitlines()
        for i, l in enumerate(text_lines):
            if l.lstrip().startswith('#'):
                text_lines[i] = l.lstrip()
        text = '\n'.join(text_lines)

        # to convert gollum tags properly used <notextile> tag,
        # so these tags will not be affected by converter
        text = text.replace('[[',
                            '<notextile>[[').replace(']]', ']]</notextile>')
        return text
Esempio n. 13
0
class GitHubWikiImporter(ToolImporter):
    target_app_ep_names = "wiki"

    controller = GitHubWikiImportController
    source = "GitHub"
    tool_label = "Wiki"
    tool_description = "Import your wiki from GitHub"
    tool_option = {"import_history": "Import history"}

    mediawiki_exts = [".wiki", ".mediawiki"]
    markdown_exts = utils.MARKDOWN_EXTENSIONS
    textile_exts = [".textile"]
    # List of supported formats
    # https://github.com/gollum/gollum/wiki#page-files
    supported_formats = (
        [".asciidoc", ".creole", ".org", ".pod", ".rdoc", ".rest.txt", ".rst.txt", ".rest", ".rst"]
        + mediawiki_exts
        + markdown_exts
        + textile_exts
    )
    available_pages = []

    def import_tool(
        self,
        project,
        user,
        project_name=None,
        mount_point=None,
        mount_label=None,
        user_name=None,
        tool_option=None,
        **kw
    ):
        """ Import a GitHub wiki into a new Wiki Allura tool.

        """
        project_name = "%s/%s" % (user_name, project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        wiki_avail = extractor.has_wiki()
        # has_wiki only indicates that wiki is enabled, but it does not mean
        # that it has any pages, so we should check if wiki repo actually
        # exists
        wiki_url = extractor.get_page_url("wiki_url")
        if not wiki_avail or not self.has_wiki_repo(wiki_url):
            return

        self.github_wiki_url = extractor.get_page_url("wiki_url").replace(".wiki", "/wiki")
        self.app = project.install_app(
            "Wiki",
            mount_point=mount_point or "wiki",
            mount_label=mount_label or "Wiki",
            import_id={"source": self.source, "project_name": project_name},
        )
        with_history = tool_option == "import_history"
        ThreadLocalORMSession.flush_all()
        self.github_markdown_converter = GitHubMarkdownConverter(user_name, project_name)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, app=self.app):
                try:
                    self.import_pages(wiki_url, history=with_history)
                except git.GitCommandError:
                    log.error(
                        "Unable to clone GitHub wiki: " "wiki_url=%s; " "wiki_avail=%s; " "avail_url=%s",
                        wiki_url,
                        wiki_avail,
                        extractor.get_page_url("project_info"),
                        exc_info=True,
                    )
                    raise
            ThreadLocalORMSession.flush_all()
            M.AuditLog.log(
                "import tool %s from %s on %s" % (self.app.config.options.mount_point, project_name, self.source),
                project=project,
                user=user,
                url=self.app.url,
            )
            g.post_event("project_updated")
            return self.app
        except Exception:
            h.make_app_admin_only(self.app)
            raise
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False

    def _set_available_pages(self, commit):
        pages = [blob.name for blob in commit.tree.traverse()]
        pages = map(os.path.splitext, pages)
        pages = [self._convert_page_name(name) for name, ext in pages if ext in self.supported_formats]
        self.available_pages = pages

    def _without_history(self, commit):
        self._set_available_pages(commit)
        for page in commit.tree.blobs:
            self._make_page(page.data_stream.read(), page.name, commit)

    def _with_history(self, commit):
        for filename in commit.stats.files.keys():
            self._set_available_pages(commit)
            renamed_to = None
            if "=>" in filename:
                # File renamed. Stats contains entry like 'Page.md =>
                # NewPage.md'
                filename, renamed_to = filename.split(" => ")
            if renamed_to and renamed_to in commit.tree:
                text = commit.tree[renamed_to].data_stream.read()
            elif filename in commit.tree:
                text = commit.tree[filename].data_stream.read()
            else:
                # file is deleted
                text = ""
            self._make_page(text, filename, commit, renamed_to)

    def _make_page(self, text, filename, commit, renamed_to=None):
        orig_name = self._format_supported(filename)
        renamed_orig_name = self._format_supported(renamed_to) if renamed_to else None
        if not orig_name:
            return
        if renamed_to and not renamed_orig_name:
            return
        mod_date = datetime.utcfromtimestamp(commit.committed_date)
        wiki_page = WM.Page.upsert(self._convert_page_name(orig_name))
        wiki_page.timestamp = wiki_page.mod_date = mod_date
        wiki_page.viewable_by = ["all"]
        if renamed_orig_name and renamed_to in commit.tree:
            wiki_page.title = self._convert_page_name(renamed_orig_name)
            wiki_page.text = self.convert_markup(h.really_unicode(text), renamed_to)
        elif filename in commit.tree:
            wiki_page.text = self.convert_markup(h.really_unicode(text), filename)
        else:
            wiki_page.delete()
        import_id_name = renamed_orig_name if renamed_orig_name else orig_name
        wiki_page.import_id = ImportIdConverter.get().expand(import_id_name, self.app)
        wiki_page.commit()
        return wiki_page

    def _format_supported(self, filename):
        orig_name, ext = os.path.splitext(filename)
        if ext and ext not in self.supported_formats:
            log.info("Not a wiki page %s. Skipping." % filename)
            return False
        return orig_name

    def _convert_page_name(self, name):
        """Convert '-' and '/' into spaces in page name to match github behavior"""
        return name.replace("-", " ").replace("/", " ")

    def has_wiki_repo(self, wiki_url):
        wiki_path = mkdtemp()
        try:
            wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True)
        except git.GitCommandError:
            return False
        rmtree(wiki_path)
        return True

    def import_pages(self, wiki_url, history=None):
        wiki_path = mkdtemp()
        wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True)
        if not history:
            self._without_history(wiki.heads.master.commit)
        else:
            for commit in reversed(list(wiki.iter_commits())):
                self._with_history(commit)
        rmtree(wiki_path)

    def convert_markup(self, text, filename):
        """Convert any supported github markup into Allura-markdown.

        Conversion happens in 4 phases:

        1. Convert source text to a html using h.render_any_markup.
        2. Rewrite links that match the wiki URL prefix with new location.
        3. Convert resulting html to a markdown using html2text, if available.
        4. Convert gollum tags

        If html2text module isn't available then only phases 1 and 2 will be executed.

        Files in mediawiki format are converted using mediawiki2markdown
        if html2text is available.
        """
        name, ext = os.path.splitext(filename)
        if ext in self.markdown_exts:
            text = self.github_markdown_converter.convert(text)
            return self.convert_gollum_tags(text)

        try:
            import html2text

            html2text.BODY_WIDTH = 0
        except ImportError:
            html2text = None

        if ext and ext in self.mediawiki_exts:
            if html2text:
                text = mediawiki2markdown(text)
                text = self.convert_gollum_tags(text)
                # Don't have html here, so we can't call self._rewrite_links.
                # Falling back to simpler rewriter.
                prefix = self.github_wiki_url
                new_prefix = self.app.url
                if not prefix.endswith("/"):
                    prefix += "/"
                if not new_prefix.endswith("/"):
                    new_prefix += "/"
                _re = re.compile(r"%s(\S*)" % prefix)

                def repl(m):
                    return new_prefix + self._convert_page_name(m.group(1))

                text = _re.sub(repl, text)
            else:
                text = h.render_any_markup(filename, text)
                text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
            return text
        elif ext and ext in self.textile_exts:
            text = self._prepare_textile_text(text)

            text = h.render_any_markup(filename, text)
            text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
            if html2text:
                text = html2text.html2text(text)
                text = self.convert_gollum_tags(text)
            text = text.replace("<notextile>", "").replace("</notextile>", "")
            text = text.replace("&#60;notextile&#62;", "").replace("&#60;/notextile&#62;", "")
            text = text.replace("&lt;notextile&gt;", "").replace("&lt;/notextile&gt;", "")
            return text
        else:
            text = h.render_any_markup(filename, text)
            text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
            if html2text:
                text = html2text.html2text(text)
                text = self.convert_gollum_tags(text)
            return text

    def convert_gollum_tags(self, text):
        tag_re = re.compile(
            r"""
            (?P<quote>')?             # optional tag escaping
            (?P<tag>\[\[              # tag start
            (?P<link>[^]]+)           # title/link/filename with options
            \]\])                     # tag end
        """,
            re.VERBOSE,
        )
        return tag_re.sub(self._gollum_tag_match, text)

    def _gollum_tag_match(self, match):
        available_options = ["alt=", "frame", "align=", "float", "width=", "height="]
        quote = match.groupdict().get("quote")
        if quote:
            # tag is escaped, return untouched
            return match.group("tag")
        link = match.group("link").split("|")
        title = options = None
        if len(link) == 1:
            link = link[0]
        elif any(map(lambda opt: link[1].startswith(opt), available_options)):
            # second element is option -> first is the link
            link, options = link[0], link[1:]
        else:
            title, link, options = link[0], link[1], link[2:]

        if link == "_TOC_":
            return "[TOC]"

        if link.startswith("http://") or link.startswith("https://"):
            sub = self._gollum_external_link
        # TODO: add embedded images and file links
        else:
            sub = self._gollum_page_link
        return sub(link, title, options)

    def _gollum_external_link(self, link, title, options):
        if title:
            return u"[{}]({})".format(title, link)
        return u"<{}>".format(link)

    def _gollum_page_link(self, link, title, options):
        page = self._convert_page_name(link)
        page = page.replace(u"&amp;", u"&")  # allow & in page links
        # gollum page lookups are case-insensitive, you'll always get link to
        # whatever comes first in the file system, no matter how you refer to a page.
        # E.g. if you have two pages: a.md and A.md both [[a]] and [[A]] will refer a.md.
        # We're emulating this behavior using list of all available pages
        try:
            idx = map(lambda p: p.lower(), self.available_pages).index(page.lower())
        except ValueError:
            idx = None
        if idx is not None:
            page = self.available_pages[idx]

        if title:
            return u"[{}]({})".format(title, page)
        return u"[{}]".format(page)

    def rewrite_links(self, html, prefix, new_prefix):
        if not prefix.endswith("/"):
            prefix += "/"
        if not new_prefix.endswith("/"):
            new_prefix += "/"
        soup = BeautifulSoup(html)
        for a in soup.findAll("a"):
            if a.get("href").startswith(prefix):
                page = a["href"].replace(prefix, "")
                new_page = self._convert_page_name(page)
                a["href"] = new_prefix + new_page
                if a.text == page:
                    a.setString(new_page)
                elif a.text == prefix + page:
                    a.setString(new_prefix + new_page)
        return unicode(soup)

    def _prepare_textile_text(self, text):
        # need to convert lists properly
        text_lines = text.splitlines()
        for i, l in enumerate(text_lines):
            if l.lstrip().startswith("#"):
                text_lines[i] = l.lstrip()
        text = "\n".join(text_lines)

        # to convert gollum tags properly used <notextile> tag,
        # so these tags will not be affected by converter
        text = text.replace("[[", "<notextile>[[").replace("]]", "]]</notextile>")
        return text
Esempio n. 14
0
class GitHubWikiImporter(ToolImporter):
    target_app_ep_names = 'wiki'

    controller = GitHubWikiImportController
    source = 'GitHub'
    tool_label = 'Wiki'
    tool_description = 'Import your wiki from GitHub'
    tool_option = {"import_history": "Import history"}

    mediawiki_exts = ['.wiki', '.mediawiki']
    markdown_exts = utils.MARKDOWN_EXTENSIONS
    textile_exts = ['.textile']
    # List of supported formats
    # https://github.com/gollum/gollum/wiki#page-files
    supported_formats = [
        '.asciidoc',
        '.creole',
        '.org',
        '.pod',
        '.rdoc',
        '.rest.txt',
        '.rst.txt',
        '.rest',
        '.rst',
    ] + mediawiki_exts + markdown_exts + textile_exts
    available_pages = []

    def import_tool(
            self, project, user, project_name=None, mount_point=None,
            mount_label=None, user_name=None, tool_option=None, **kw):
        """ Import a GitHub wiki into a new Wiki Allura tool.

        """
        project_name = "%s/%s" % (user_name, project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        wiki_avail = extractor.has_wiki()
        if not wiki_avail:
            return

        self.github_wiki_url = extractor.get_page_url(
            'wiki_url').replace('.wiki', '/wiki')
        self.app = project.install_app(
            "Wiki",
            mount_point=mount_point or 'wiki',
            mount_label=mount_label or 'Wiki',
            import_id={
                'source': self.source,
                'project_name': project_name,
            }
        )
        with_history = tool_option == 'import_history'
        ThreadLocalORMSession.flush_all()
        self.github_markdown_converter = GitHubMarkdownConverter(
            user_name, project_name)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, app=self.app):
                try:
                    wiki_url = extractor.get_page_url('wiki_url')
                    self.import_pages(wiki_url, history=with_history)
                except git.GitCommandError:
                    log.error(
                        'Unable to clone GitHub wiki: '
                        'wiki_url=%s; '
                        'wiki_avail=%s; '
                        'avail_url=%s',
                        wiki_url, wiki_avail,
                        extractor.get_page_url('project_info'),
                        exc_info=True)
                    raise
            ThreadLocalORMSession.flush_all()
            M.AuditLog.log(
                'import tool %s from %s on %s' % (
                    self.app.config.options.mount_point,
                    project_name,
                    self.source),
                project=project,
                user=user,
                url=self.app.url)
            g.post_event('project_updated')
            return self.app
        except Exception:
            h.make_app_admin_only(self.app)
            raise
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False

    def _set_available_pages(self, commit):
        pages = [blob.name for blob in commit.tree.traverse()]
        pages = map(os.path.splitext, pages)
        pages = [self._convert_page_name(name) for name, ext in pages
                 if ext in self.supported_formats]
        self.available_pages = pages

    def _without_history(self, commit):
        self._set_available_pages(commit)
        for page in commit.tree.blobs:
            self._make_page(page.data_stream.read(), page.name, commit)

    def _with_history(self, commit):
        for filename in commit.stats.files.keys():
            self._set_available_pages(commit)
            renamed_to = None
            if '=>' in filename:
                # File renamed. Stats contains entry like 'Page.md =>
                # NewPage.md'
                filename, renamed_to = filename.split(' => ')
            if renamed_to and renamed_to in commit.tree:
                text = commit.tree[renamed_to].data_stream.read()
            elif filename in commit.tree:
                text = commit.tree[filename].data_stream.read()
            else:
                # file is deleted
                text = ''
            self._make_page(text, filename, commit, renamed_to)

    def _make_page(self, text, filename, commit, renamed_to=None):
        orig_name = self._format_supported(filename)
        renamed_orig_name = self._format_supported(
            renamed_to) if renamed_to else None
        if not orig_name:
            return
        if renamed_to and not renamed_orig_name:
            return
        mod_date = datetime.utcfromtimestamp(commit.committed_date)
        wiki_page = WM.Page.upsert(self._convert_page_name(orig_name))
        wiki_page.timestamp = wiki_page.mod_date = mod_date
        wiki_page.viewable_by = ['all']
        if renamed_orig_name and renamed_to in commit.tree:
            wiki_page.title = self._convert_page_name(renamed_orig_name)
            wiki_page.text = self.convert_markup(
                h.really_unicode(text), renamed_to)
        elif filename in commit.tree:
            wiki_page.text = self.convert_markup(
                h.really_unicode(text), filename)
        else:
            wiki_page.delete()
        import_id_name = renamed_orig_name if renamed_orig_name else orig_name
        wiki_page.import_id = ImportIdConverter.get().expand(
            import_id_name, self.app)
        wiki_page.commit()
        return wiki_page

    def _format_supported(self, filename):
        orig_name, ext = os.path.splitext(filename)
        if ext and ext not in self.supported_formats:
            log.info('Not a wiki page %s. Skipping.' % filename)
            return False
        return orig_name

    def _convert_page_name(self, name):
        """Convert '-' and '/' into spaces in page name to match github behavior"""
        return name.replace('-', ' ').replace('/', ' ')

    def import_pages(self, wiki_url, history=None):
        wiki_path = mkdtemp()
        wiki = git.Repo.clone_from(wiki_url, to_path=wiki_path, bare=True)
        if not history:
            self._without_history(wiki.heads.master.commit)
        else:
            for commit in reversed(list(wiki.iter_commits())):
                self._with_history(commit)
        rmtree(wiki_path)

    def convert_markup(self, text, filename):
        """Convert any supported github markup into Allura-markdown.

        Conversion happens in 4 phases:

        1. Convert source text to a html using h.render_any_markup.
        2. Rewrite links that match the wiki URL prefix with new location.
        3. Convert resulting html to a markdown using html2text, if available.
        4. Convert gollum tags

        If html2text module isn't available then only phases 1 and 2 will be executed.

        Files in mediawiki format are converted using mediawiki2markdown
        if html2text is available.
        """
        name, ext = os.path.splitext(filename)
        if ext in self.markdown_exts:
            text = self.github_markdown_converter.convert(text)
            return self.convert_gollum_tags(text)

        try:
            import html2text
            html2text.BODY_WIDTH = 0
        except ImportError:
            html2text = None

        if ext and ext in self.mediawiki_exts:
            if html2text:
                text = mediawiki2markdown(text)
                text = self.convert_gollum_tags(text)
                # Don't have html here, so we can't call self._rewrite_links.
                # Falling back to simpler rewriter.
                prefix = self.github_wiki_url
                new_prefix = self.app.url
                if not prefix.endswith('/'):
                    prefix += '/'
                if not new_prefix.endswith('/'):
                    new_prefix += '/'
                _re = re.compile(r'%s(\S*)' % prefix)

                def repl(m):
                    return new_prefix + self._convert_page_name(m.group(1))
                text = _re.sub(repl, text)
            else:
                text = h.render_any_markup(filename, text)
                text = self.rewrite_links(
                    text, self.github_wiki_url, self.app.url)
            return text
        elif ext and ext in self.textile_exts:
            text = self._prepare_textile_text(text)

            text = h.render_any_markup(filename, text)
            text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
            if html2text:
                text = html2text.html2text(text)
                text = self.convert_gollum_tags(text)
            text = text.replace('<notextile>', '').replace('</notextile>', '')
            text = text.replace('&#60;notextile&#62;', '').replace(
                '&#60;/notextile&#62;', '')
            text = text.replace('&lt;notextile&gt;', '').replace(
                '&lt;/notextile&gt;', '')
            return text
        else:
            text = h.render_any_markup(filename, text)
            text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
            if html2text:
                text = html2text.html2text(text)
                text = self.convert_gollum_tags(text)
            return text

    def convert_gollum_tags(self, text):
        tag_re = re.compile(r'''
            (?P<quote>')?             # optional tag escaping
            (?P<tag>\[\[              # tag start
            (?P<link>[^]]+)           # title/link/filename with options
            \]\])                     # tag end
        ''', re.VERBOSE)
        return tag_re.sub(self._gollum_tag_match, text)

    def _gollum_tag_match(self, match):
        available_options = [
            'alt=',
            'frame',
            'align=',
            'float',
            'width=',
            'height=',
        ]
        quote = match.groupdict().get('quote')
        if quote:
            # tag is escaped, return untouched
            return match.group('tag')
        link = match.group('link').split('|')
        title = options = None
        if len(link) == 1:
            link = link[0]
        elif any(map(lambda opt: link[1].startswith(opt), available_options)):
            # second element is option -> first is the link
            link, options = link[0], link[1:]
        else:
            title, link, options = link[0], link[1], link[2:]

        if link == '_TOC_':
            return '[TOC]'

        if link.startswith('http://') or link.startswith('https://'):
            sub = self._gollum_external_link
        # TODO: add embedded images and file links
        else:
            sub = self._gollum_page_link
        return sub(link, title, options)

    def _gollum_external_link(self, link, title, options):
        if title:
            return u'[{}]({})'.format(title, link)
        return u'<{}>'.format(link)

    def _gollum_page_link(self, link, title, options):
        page = self._convert_page_name(link)
        page = page.replace(u'&amp;', u'&')  # allow & in page links
        # gollum page lookups are case-insensitive, you'll always get link to
        # whatever comes first in the file system, no matter how you refer to a page.
        # E.g. if you have two pages: a.md and A.md both [[a]] and [[A]] will refer a.md.
        # We're emulating this behavior using list of all available pages
        try:
            idx = map(lambda p: p.lower(),
                      self.available_pages).index(page.lower())
        except ValueError:
            idx = None
        if idx is not None:
            page = self.available_pages[idx]

        if title:
            return u'[{}]({})'.format(title, page)
        return u'[{}]'.format(page)

    def rewrite_links(self, html, prefix, new_prefix):
        if not prefix.endswith('/'):
            prefix += '/'
        if not new_prefix.endswith('/'):
            new_prefix += '/'
        soup = BeautifulSoup(html)
        for a in soup.findAll('a'):
            if a.get('href').startswith(prefix):
                page = a['href'].replace(prefix, '')
                new_page = self._convert_page_name(page)
                a['href'] = new_prefix + new_page
                if a.text == page:
                    a.setString(new_page)
                elif a.text == prefix + page:
                    a.setString(new_prefix + new_page)
        return unicode(soup)

    def _prepare_textile_text(self, text):
        # need to convert lists properly
        text_lines = text.splitlines()
        for i, l in enumerate(text_lines):
            if l.lstrip().startswith('#'):
                text_lines[i] = l.lstrip()
        text = '\n'.join(text_lines)

        # to convert gollum tags properly used <notextile> tag,
        # so these tags will not be affected by converter
        text = text.replace(
            '[[', '<notextile>[[').replace(']]', ']]</notextile>')
        return text
Esempio n. 15
0
class GitHubTrackerImporter(ToolImporter):
    source = 'GitHub'
    target_app = ForgeTrackerApp
    controller = GitHubTrackerImportController
    tool_label = 'Issues'
    max_ticket_num = 0
    open_milestones = set()

    def import_tool(self, project, user, project_name, mount_point=None,
            mount_label=None, **kw):
        import_id_converter = ImportIdConverter.get()
        project_name = '%s/%s' % (kw['user_name'], project_name)
        app = project.install_app('tickets', mount_point, mount_label,
                EnableVoting=False,
                open_status_names='open',
                closed_status_names='closed',
                import_id={
                    'source': self.source,
                    'project_name': project_name,
                }
            )
        self.github_markdown_converter = GitHubMarkdownConverter(
            kw['user_name'], project_name)
        ThreadLocalORMSession.flush_all()
        extractor = GitHubProjectExtractor(project_name, user=user)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, user=M.User.anonymous(), app=app):
                for ticket_num, issue in extractor.iter_issues():
                    self.max_ticket_num = max(ticket_num, self.max_ticket_num)
                    ticket = TM.Ticket(
                        app_config_id=app.config._id,
                        custom_fields=dict(),
                        ticket_num=ticket_num,
                        import_id=import_id_converter.expand(ticket_num, app)
                    )
                    self.process_fields(ticket, issue)
                    self.process_comments(extractor, ticket, issue)
                    self.process_events(extractor, ticket, issue)
                    self.process_milestones(ticket, issue)
                    session(ticket).flush(ticket)
                    session(ticket).expunge(ticket)
                app.globals.custom_fields = self.postprocess_milestones()
                app.globals.last_ticket_num = self.max_ticket_num
                ThreadLocalORMSession.flush_all()
            M.AuditLog.log(
                    'import tool %s from %s on %s' % (
                        app.config.options.mount_point,
                        project_name, self.source),
                    project=project, user=user, url=app.url)
            g.post_event('project_updated')
            app.globals.invalidate_bin_counts()
            return app
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False

    def parse_datetime(self, datetime_string):
        return datetime.strptime(datetime_string, '%Y-%m-%dT%H:%M:%SZ')

    def get_user_link(self, user):
        return u'[{0}](https://github.com/{0})'.format(user)

    def process_fields(self, ticket, issue):
        ticket.summary = issue['title']
        ticket.status = issue['state']
        ticket.created_date = self.parse_datetime(issue['created_at'])
        ticket.mod_date = self.parse_datetime(issue['updated_at'])
        if issue['assignee']:
            owner_line = '*Originally owned by:* {}\n'.format(
                    self.get_user_link(issue['assignee']['login']))
        else:
            owner_line = ''
        # body processing happens here
        body, attachments = self._get_attachments(issue['body'])
        ticket.add_multiple_attachments(attachments)
        ticket.description = (
                u'*Originally created by:* {creator}\n'
                u'{owner}'
                u'\n'
                u'{body}').format(
                    creator=self.get_user_link(issue['user']['login']),
                    owner=owner_line,
                    body=self.github_markdown_converter.convert(body),
                )
        ticket.labels = [label['name'] for label in issue['labels']]

    def process_comments(self, extractor, ticket, issue):
        for comment in extractor.iter_comments(issue):
            body, attachments = self._get_attachments(comment['body'])
            if comment['user']:
                posted_by = u'*Originally posted by:* {}\n\n'.format(
                    self.get_user_link(comment['user']['login']))
                body = posted_by + body
            p = ticket.discussion_thread.add_post(
                    text = self.github_markdown_converter.convert(body),
                    ignore_security = True,
                    timestamp = self.parse_datetime(comment['created_at']),
                )
            p.add_multiple_attachments(attachments)

    def process_events(self, extractor, ticket, issue):
        for event in extractor.iter_events(issue):
            prefix = text = ''
            if event['event'] in ('reopened', 'closed'):
                prefix = '*Ticket changed by:* {}\n\n'.format(
                        self.get_user_link(event['actor']['login']))
            if event['event'] == 'reopened':
                text = '- **status**: closed --> open'
            elif event['event'] == 'closed':
                text = '- **status**: open --> closed'
            elif event['event'] == 'assigned':
                text = '- **assigned_to**: {}'.format(
                        self.get_user_link(event['actor']['login']))

            text = prefix + text
            if not text:
                continue
            ticket.discussion_thread.add_post(
                text = text,
                ignore_security = True,
                timestamp = self.parse_datetime(event['created_at'])
            )

    def process_milestones(self, ticket, issue):
        if issue['milestone']:
            title = issue['milestone']['title']
            due = None
            if issue['milestone']['due_on']:
                due = self.parse_datetime(issue['milestone']['due_on'])
            ticket.custom_fields = {
                '_milestone': title,
            }
            self.open_milestones.add((title, due,))

    def postprocess_milestones(self):
        global_milestones = {
            'milestones': [],
            'type': 'milestone',
            'name': '_milestone',
            'label': 'Milestone'
        }
        for milestone in self.open_milestones:
            global_milestones['milestones'].append({
                'name': milestone[0],
                'due_date': unicode(milestone[1].date()) if milestone[1] else None,
                'complete': False,
            })
        return [global_milestones]

    def _get_attachments(self, body):
        # at github, attachments are images only and are included into comment's body
        # usual syntax is
        # ![cdbpzjc5ex4](https://f.cloud.github.com/assets/979771/1027411/a393ab5e-0e70-11e3-8a38-b93a3df904cf.jpg)\r\n
        REGEXP = r'!\[[\w0-9]+?\]\(((?:https?:\/\/)?[\da-z\.-]+\.[a-z\.]{2,6}'\
            '[\/%\w\.-]*.(jpg|jpeg|png|gif))\)[\r\n]*'
        attachments = []

        try:
            found_matches = re.finditer(REGEXP, body, re.IGNORECASE)
        except TypeError:
            found_matches = re.finditer(REGEXP, str(body), re.IGNORECASE)

        for i, match in enumerate(found_matches):
            # removing attach text from comment
            body = body.replace(match.group(0), '')
            # stripping url and extension
            attachments.append(Attachment(
                match.group(1),  # url
                'attach{}.{}'.format(i + 1, match.group(2)) # extension
            ))
        return (body, attachments)
Esempio n. 16
0
class GitHubTrackerImporter(ToolImporter):
    source = 'GitHub'
    target_app_ep_names = 'tickets'
    controller = GitHubTrackerImportController
    tool_label = 'Issues'
    max_ticket_num = 0
    open_milestones = set()

    def import_tool(self, project, user, project_name, mount_point=None,
                    mount_label=None, **kw):
        import_id_converter = ImportIdConverter.get()
        project_name = '%s/%s' % (kw['user_name'], project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        if not extractor.has_tracker():
            return
        app = project.install_app('tickets', mount_point, mount_label,
                                  EnableVoting=False,
                                  open_status_names='open',
                                  closed_status_names='closed',
                                  import_id={
                                      'source': self.source,
                                      'project_name': project_name,
                                  }
                                  )
        self.github_markdown_converter = GitHubMarkdownConverter(
            kw['user_name'], project_name)
        ThreadLocalORMSession.flush_all()
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, user=M.User.anonymous(), app=app):
                for ticket_num, issue in extractor.iter_issues():
                    self.max_ticket_num = max(ticket_num, self.max_ticket_num)
                    ticket = TM.Ticket(
                        app_config_id=app.config._id,
                        custom_fields=dict(),
                        ticket_num=ticket_num,
                        import_id=import_id_converter.expand(ticket_num, app)
                    )
                    self.process_fields(extractor, ticket, issue)
                    self.process_comments(extractor, ticket, issue)
                    self.process_events(extractor, ticket, issue)
                    self.process_milestones(ticket, issue)
                    session(ticket).flush(ticket)
                    session(ticket).expunge(ticket)
                app.globals.custom_fields = self.postprocess_milestones()
                app.globals.last_ticket_num = self.max_ticket_num
                ThreadLocalORMSession.flush_all()
            M.AuditLog.log(
                'import tool %s from %s on %s' % (
                    app.config.options.mount_point,
                    project_name, self.source),
                project=project, user=user, url=app.url)
            g.post_event('project_updated')
            app.globals.invalidate_bin_counts()
            return app
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False

    def parse_datetime(self, datetime_string):
        return datetime.strptime(datetime_string, '%Y-%m-%dT%H:%M:%SZ')

    def get_user_link(self, user):
        return u'[{0}](https://github.com/{0})'.format(user)

    def process_fields(self, extractor, ticket, issue):
        ticket.summary = issue['title']
        ticket.status = issue['state']
        ticket.created_date = self.parse_datetime(issue['created_at'])
        ticket.mod_date = self.parse_datetime(issue['updated_at'])
        if issue['assignee']:
            owner_line = '*Originally owned by:* {}\n'.format(
                self.get_user_link(issue['assignee']['login']))
        else:
            owner_line = ''
        # body processing happens here
        body, attachments = self._get_attachments(extractor, issue['body'])
        ticket.add_multiple_attachments(attachments)
        ticket.description = (
            u'*Originally created by:* {creator}\n'
            u'{owner}'
            u'\n'
            u'{body}').format(
            creator=self.get_user_link(issue['user']['login']),
            owner=owner_line,
            body=self.github_markdown_converter.convert(body),
        )
        ticket.labels = [label['name'] for label in issue['labels']]

    def process_comments(self, extractor, ticket, issue):
        for comment in extractor.iter_comments(issue):
            body, attachments = self._get_attachments(
                extractor, comment['body'])
            if comment['user']:
                posted_by = u'*Originally posted by:* {}\n\n'.format(
                    self.get_user_link(comment['user']['login']))
                body = posted_by + body
            p = ticket.discussion_thread.add_post(
                text=self.github_markdown_converter.convert(body),
                ignore_security=True,
                timestamp=self.parse_datetime(comment['created_at']),
            )
            p.add_multiple_attachments(attachments)

    def process_events(self, extractor, ticket, issue):
        for event in extractor.iter_events(issue):
            prefix = text = ''
            if event['event'] in ('reopened', 'closed'):
                prefix = '*Ticket changed by:* {}\n\n'.format(
                    self.get_user_link(event['actor']['login']))
            if event['event'] == 'reopened':
                text = '- **status**: closed --> open'
            elif event['event'] == 'closed':
                text = '- **status**: open --> closed'
            elif event['event'] == 'assigned':
                text = '- **assigned_to**: {}'.format(
                    self.get_user_link(event['actor']['login']))

            text = prefix + text
            if not text:
                continue
            ticket.discussion_thread.add_post(
                text=text,
                ignore_security=True,
                timestamp=self.parse_datetime(event['created_at'])
            )

    def process_milestones(self, ticket, issue):
        if issue['milestone']:
            title = issue['milestone']['title']
            due = None
            if issue['milestone']['due_on']:
                due = self.parse_datetime(issue['milestone']['due_on'])
            ticket.custom_fields = {
                '_milestone': title,
            }
            self.open_milestones.add((title, due,))

    def postprocess_milestones(self):
        global_milestones = {
            'milestones': [],
            'type': 'milestone',
            'name': '_milestone',
            'label': 'Milestone'
        }
        for milestone in self.open_milestones:
            global_milestones['milestones'].append({
                'name': milestone[0],
                'due_date': unicode(milestone[1].date()) if milestone[1] else None,
                'complete': False,
            })
        return [global_milestones]

    def _get_attachments(self, extractor, body):
        # at github, attachments are images only and are included into comment's body
        # usual syntax is
        # ![cdbpzjc5ex4](https://f.cloud.github.com/assets/979771/1027411/a393ab5e-0e70-11e3-8a38-b93a3df904cf.jpg)\r\n
        REGEXP = r'!\[[\w0-9]+?\]\(((?:https?:\/\/)?[\da-z\.-]+\.[a-z\.]{2,6}'\
            '[\/%\w\.-]*.(jpg|jpeg|png|gif))\)[\r\n]*'
        attachments = []

        try:
            found_matches = re.finditer(REGEXP, body, re.IGNORECASE)
        except TypeError:
            found_matches = re.finditer(REGEXP, str(body), re.IGNORECASE)

        for i, match in enumerate(found_matches):
            # removing attach text from comment
            body = body.replace(match.group(0), '')
            # stripping url and extension
            attachments.append(Attachment(
                extractor,
                match.group(1),  # url
                'attach{}.{}'.format(i + 1, match.group(2))  # extension
            ))
        return (body, attachments)
Esempio n. 17
0
    def import_tool(
        self,
        project,
        user,
        project_name=None,
        mount_point=None,
        mount_label=None,
        user_name=None,
        tool_option=None,
        **kw
    ):
        """ Import a GitHub wiki into a new Wiki Allura tool.

        """
        project_name = "%s/%s" % (user_name, project_name)
        extractor = GitHubProjectExtractor(project_name, user=user)
        wiki_avail = extractor.has_wiki()
        # has_wiki only indicates that wiki is enabled, but it does not mean
        # that it has any pages, so we should check if wiki repo actually
        # exists
        wiki_url = extractor.get_page_url("wiki_url")
        if not wiki_avail or not self.has_wiki_repo(wiki_url):
            return

        self.github_wiki_url = extractor.get_page_url("wiki_url").replace(".wiki", "/wiki")
        self.app = project.install_app(
            "Wiki",
            mount_point=mount_point or "wiki",
            mount_label=mount_label or "Wiki",
            import_id={"source": self.source, "project_name": project_name},
        )
        with_history = tool_option == "import_history"
        ThreadLocalORMSession.flush_all()
        self.github_markdown_converter = GitHubMarkdownConverter(user_name, project_name)
        try:
            M.session.artifact_orm_session._get().skip_mod_date = True
            with h.push_config(c, app=self.app):
                try:
                    self.import_pages(wiki_url, history=with_history)
                except git.GitCommandError:
                    log.error(
                        "Unable to clone GitHub wiki: " "wiki_url=%s; " "wiki_avail=%s; " "avail_url=%s",
                        wiki_url,
                        wiki_avail,
                        extractor.get_page_url("project_info"),
                        exc_info=True,
                    )
                    raise
            ThreadLocalORMSession.flush_all()
            M.AuditLog.log(
                "import tool %s from %s on %s" % (self.app.config.options.mount_point, project_name, self.source),
                project=project,
                user=user,
                url=self.app.url,
            )
            g.post_event("project_updated")
            return self.app
        except Exception:
            h.make_app_admin_only(self.app)
            raise
        finally:
            M.session.artifact_orm_session._get().skip_mod_date = False
Esempio n. 18
0
class TestGitHubMarkdownConverter(object):
    def setUp(self):
        self.conv = GitHubMarkdownConverter('user', 'project')

    def test_convert_sha(self):
        text = '16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, '[16c999]')

        text = 'some context  16c999e8c71134401a78d4d46435517b2271d6ac '
        result = self.conv.convert(text)
        assert_equal(result, 'some context  [16c999] ')

    def test_convert_user_sha(self):
        text = 'user@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, '[16c999]')

        # Not an owner of current project
        text = 'another-user@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, text)

    def test_convert_user_repo_sha(self):
        text = 'user/project@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(result, '[16c999]')

        # Not a current project
        text = 'user/p@16c999e8c71134401a78d4d46435517b2271d6ac'
        result = self.conv.convert(text)
        assert_equal(
            result, '[user/p@16c999]'
            '(https://github.com/user/p/commit/16c999e8c71134401a78d4d46435517b2271d6ac)'
        )

    def test_convert_ticket(self):
        text = 'Ticket #1'
        result = self.conv.convert(text)
        assert_equal(result, 'Ticket [#1]')
        assert_equal(self.conv.convert('#1'), '[#1]')

    def test_convert_user_ticket(self):
        text = 'user#1'
        result = self.conv.convert(text)
        assert_equal(result, '[#1]')

        # Not an owner of current project
        text = 'another-user#1'
        result = self.conv.convert(text)
        assert_equal(result, 'another-user#1')

    def test_convert_user_repo_ticket(self):
        text = 'user/project#1'
        result = self.conv.convert(text)
        assert_equal(result, '[#1]')

        # Not a current project
        text = 'user/p#1'
        result = self.conv.convert(text)
        assert_equal(result, '[user/p#1](https://github.com/user/p/issues/1)')

    def test_convert_strikethrough(self):
        text = '~~mistake~~'
        assert_equal(self.conv.convert(text), '<s>mistake</s>')

    def test_inline_code_block(self):
        text = 'This `~~some text~~` converts to this ~~strike out~~.'
        result = 'This `~~some text~~` converts to this <s>strike out</s>.'
        assert_equal(self.conv.convert(text).strip(), result)

    def test_convert_code_blocks(self):
        text = '''```python
print "Hello!"
```

Two code blocks here!

```
for (var i = 0; i < a.length; i++) {
    console.log(i);
}
```'''
        result = ''':::python
    print "Hello!"

Two code blocks here!

    for (var i = 0; i < a.length; i++) {
        console.log(i);
    }'''

        assert_equal(self.conv.convert(text).strip(), result)

    def test_code_blocks_without_newline_before(self):
        text = '''
There are some code snippet:
```
print 'Hello'
```
Pretty cool, ha?'''

        result = '''
There are some code snippet:

    print 'Hello'
Pretty cool, ha?'''
        assert_equal(self.conv.convert(text).strip(), result.strip())
        text = text.replace('```', '~~~')
        assert_equal(self.conv.convert(text).strip(), result.strip())

        text = '''
There are some code snippet:
```python
print 'Hello'
```
Pretty cool, ha?'''

        result = '''
There are some code snippet:

    :::python
    print 'Hello'
Pretty cool, ha?'''
        assert_equal(self.conv.convert(text).strip(), result.strip())
Esempio n. 19
0
 def setUp(self):
     self.conv = GitHubMarkdownConverter('user', 'project')