Exemple #1
0
    def parse_ticket(self, id):
        # Use CSV export to get ticket fields
        url = self.full_url(self.TICKET_URL % id, 'csv')
        f = self.csvopen(url)
        reader = csv.DictReader(f)
        ticket_fields = reader.next()
        ticket_fields['class'] = 'ARTIFACT'
        ticket = self.remap_fields(ticket_fields)

        # Use HTML export to get ticket description and comments
        import html2text
        html2text.BODY_WIDTH = 0
        url = self.full_url(self.TICKET_URL % id)
        self.log_url(url)
        d = BeautifulSoup(urlopen(url))
        self.clean_missing_wiki_links(d)
        desc = d.find('div', 'description').find('div', 'searchable')
        ticket['description'] = html2text.html2text(
            desc.renderContents('utf8').decode('utf8')) if desc else ''
        comments = []
        for comment in d.findAll('form', action='#comment'):
            c = {}
            c['submitter'] = re.sub(r'.* by ', '',
                                    comment.find('h3', 'change').text).strip()
            c['date'] = self.trac2z_date(
                comment.find('a',
                             'timeline')['title'].replace(' in Timeline', ''))
            changes = unicode(comment.find('ul', 'changes') or '')
            body = comment.find('div', 'comment')
            body = body.renderContents('utf8').decode('utf8') if body else ''
            c['comment'] = html2text.html2text(changes + body)
            c['class'] = 'COMMENT'
            comments.append(c)
        ticket['comments'] = comments
        return ticket
 def parse_ticket_attachments(self, id):
     SIZE_PATTERN = r'(\d+) bytes'
     TIMESTAMP_PATTERN = r'(.+) in Timeline'
     # Scrape HTML to get ticket attachments
     url = self.full_url(self.ATTACHMENT_LIST_URL % id)
     self.log_url(url)
     f = urlopen(url)
     soup = BeautifulSoup(f)
     attach = soup.find('div', id='attachments')
     list = []
     while attach:
         attach = attach.findNext('dt')
         if not attach:
             break
         d = {}
         d['filename'] = attach.a['href'].rsplit('/', 1)[1]
         d['url'] = self.full_url(self.ATTACHMENT_URL % (id, d['filename']))
         size_s = attach.span['title']
         d['size'] = int(self.match_pattern(SIZE_PATTERN, size_s))
         timestamp_s = attach.find('a', {'class': 'timeline'})['title']
         d['date'] = self.trac2z_date(self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
         d['by'] = attach.find(text=re.compile('added by')).nextSibling.renderContents()
         d['description'] = ''
         # Skip whitespace
         while attach.nextSibling and type(attach.nextSibling) is NavigableString:
             attach = attach.nextSibling
         # if there's a description, there will be a <dd> element, other immediately next <dt>
         if attach.nextSibling and attach.nextSibling.name == 'dd':
             desc_el = attach.nextSibling
             if desc_el:
                 # TODO: Convert to Allura link syntax as needed
                 d['description'] = ''.join(desc_el.findAll(text=True)).strip()
         list.append(d)
     return list
Exemple #3
0
    def parse_ticket(self, id):
        # Use CSV export to get ticket fields
        url = self.full_url(self.TICKET_URL % id, 'csv')
        f = self.csvopen(url)
        reader = csv.DictReader(f)
        ticket_fields = reader.next()
        ticket_fields['class'] = 'ARTIFACT'
        ticket = self.remap_fields(ticket_fields)

        # Use HTML export to get ticket description and comments
        import html2text
        html2text.BODY_WIDTH = 0
        url = self.full_url(self.TICKET_URL % id)
        self.log_url(url)
        d = BeautifulSoup(urlopen(url))
        self.clean_missing_wiki_links(d)
        desc = d.find('div', 'description').find('div', 'searchable')
        ticket['description'] = html2text.html2text(
            desc.renderContents('utf8').decode('utf8')) if desc else ''
        comments = []
        for comment in d.findAll('form', action='#comment'):
            c = {}
            c['submitter'] = re.sub(
                r'.* by ', '', comment.find('h3', 'change').text).strip()
            c['date'] = self.trac2z_date(
                comment.find('a', 'timeline')['title'].replace(' in Timeline', ''))
            changes = unicode(comment.find('ul', 'changes') or '')
            body = comment.find('div', 'comment')
            body = body.renderContents('utf8').decode('utf8') if body else ''
            c['comment'] = html2text.html2text(changes + body)
            c['class'] = 'COMMENT'
            comments.append(c)
        ticket['comments'] = comments
        return ticket
 def parse_ticket_attachments(self, id):
     SIZE_PATTERN = r'(\d+) bytes'
     TIMESTAMP_PATTERN = r'(.+) in Timeline'
     # Scrape HTML to get ticket attachments
     url = self.full_url(self.ATTACHMENT_LIST_URL % id)
     self.log_url(url)
     f = urlopen(url)
     soup = BeautifulSoup(f)
     attach = soup.find('div', id='attachments')
     list = []
     while attach:
         attach = attach.findNext('dt')
         if not attach:
             break
         d = {}
         d['filename'] = attach.a['href'].rsplit('/', 1)[1]
         d['url'] = self.full_url(self.ATTACHMENT_URL % (id, d['filename']))
         size_s = attach.span['title']
         d['size'] = int(self.match_pattern(SIZE_PATTERN, size_s))
         timestamp_s = attach.find('a', {'class': 'timeline'})['title']
         d['date'] = self.trac2z_date(self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
         d['by'] = attach.find(text=re.compile('added by')).nextSibling.renderContents()
         d['description'] = ''
         # Skip whitespace
         while attach.nextSibling and type(attach.nextSibling) is NavigableString:
             attach = attach.nextSibling
         # if there's a description, there will be a <dd> element, other immediately next <dt>
         if attach.nextSibling and attach.nextSibling.name == 'dd':
             desc_el = attach.nextSibling
             if desc_el:
                 # TODO: Convert to Allura link syntax as needed
                 d['description'] = ''.join(desc_el.findAll(text=True)).strip()
         list.append(d)
     return list
    def parse_ticket(self, id):
        # Use CSV export to get ticket fields
        url = self.full_url(self.TICKET_URL % id, 'csv')
        f = self.csvopen(url)
        reader = csv.DictReader(f)
        ticket_fields = reader.next()
        ticket_fields['class'] = 'ARTIFACT'
        ticket = self.remap_fields(ticket_fields)

        # Use RSS export to get ticket description and comments
        import html2text
        html2text.BODY_WIDTH = 0
        url = self.full_url(self.TICKET_URL % id, 'rss')
        self.log_url(url)
        d = feedparser.parse(urlopen(url))
        ticket['description'] = html2text.html2text(d.feed.description)
        comments = []
        for comment in d['entries']:
            c = {}
            c['submitter'] = getattr(comment, 'author', None)
            c['date'] = comment.updated_parsed
            c['comment'] = html2text.html2text(comment.summary)
            c['class'] = 'COMMENT'
            comments.append(c)
        ticket['comments'] = comments
        return ticket
 def csvopen(self, url):
     self.log_url(url)
     f = urlopen(url)
     # Trac doesn't throw 403 error, just shows normal 200 HTML page
     # telling that access denied. So, we'll emulate 403 ourselves.
     # TODO: currently, any non-csv result treated as 403.
     if not f.info()['Content-Type'].startswith('text/csv'):
         raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
     return f
 def csvopen(self, url):
     self.log_url(url)
     f = urlopen(url)
     # Trac doesn't throw 403 error, just shows normal 200 HTML page
     # telling that access denied. So, we'll emulate 403 ourselves.
     # TODO: currently, any non-csv result treated as 403.
     if not f.info()['Content-Type'].startswith('text/csv'):
         raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
     return f
Exemple #8
0
 def urlopen(url,
             retries=3,
             codes=(408, 500, 502, 503, 504),
             timeout=120,
             unredirected_hdrs=None,
             **kw):
     req = six.moves.urllib.request.Request(url, **kw)
     if unredirected_hdrs:
         for key, val in unredirected_hdrs.items():
             req.add_unredirected_header(key, val)
     req.add_header('User-Agent',
                    'Allura Data Importer (https://allura.apache.org/)')
     return h.urlopen(req, retries=retries, codes=codes, timeout=timeout)
Exemple #9
0
 def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), **kw):
     req = urllib2.Request(url, **kw)
     req.add_header('User-Agent',
                    'Allura Data Importer (https://allura.apache.org/)')
     return h.urlopen(req, retries=retries, codes=codes)
Exemple #10
0
 def test_no_error(self, urllib2):
     r = h.urlopen('myurl')
     self.assertEqual(r, urllib2.urlopen.return_value)
     urllib2.urlopen.assert_called_once_with('myurl', timeout=None)
Exemple #11
0
 def urlopen(url, retries=3, codes=(408,), **kw):
     req = urllib2.Request(url, **kw)
     req.add_header('User-Agent', 'Allura Data Importer (https://forge-allura.apache.org/p/allura/)')
     return h.urlopen(req, retries=retries, codes=codes)
Exemple #12
0
 def fetch(self, url):
     return urlopen(url)
Exemple #13
0
 def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), timeout=120, **kw):
     req = urllib2.Request(url, **kw)
     req.add_header(
         'User-Agent', 'Allura Data Importer (https://allura.apache.org/)')
     return h.urlopen(req, retries=retries, codes=codes, timeout=timeout)
 def fetch(self, url):
     return urlopen(url)