def test_process_bugs(self): url2filename = { "https://api.github.com/repos/openhatch/tests/issues/42": os.path.join( HERE, "sample-data", "github", "issue-show" ) } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) requests = self.im.process_bugs((("https://api.github.com/repos/openhatch/tests/issues/42", None),)) bugs = ar.respond_recursively(requests) self.assertEqual(len(bugs), 1) bug = bugs[0] self.assertEqual(bug["title"], "yo dawg") self.assertEqual(bug["description"], "this issue be all up in ya biz-nass.") self.assertEqual(bug["status"], "open") self.assertEqual(bug["people_involved"], 2) self.assertEqual(bug["date_reported"], printable_datetime(datetime.datetime(2012, 3, 12, 19, 24, 42))) self.assertEqual(bug["last_touched"], printable_datetime(datetime.datetime(2012, 3, 16, 21, 39, 42))) self.assertEqual(bug["submitter_username"], "openhatch") self.assertEqual(bug["submitter_realname"], "") self.assertEqual(bug["canonical_bug_link"], "https://github.com/openhatch/tests/issues/42") self.assertEqual(bug["good_for_newcomers"], True) self.assertEqual(bug["concerns_just_documentation"], False) self.assertEqual(bug["looks_closed"], False)
def test_create_bug_with_link_in_reported_by_field(self): tbp = TracBugParser('https://code.djangoproject.com/query?id=18937') cached_csv_filename = os.path.join(HERE, 'sample-data', 'django-trac-18937.csv') tbp.set_bug_csv_data(unicode( open(cached_csv_filename).read(), 'utf-8')) cached_html_filename = os.path.join(HERE, 'sample-data', 'django-trac-18937.html') tbp.set_bug_html_data(unicode( open(cached_html_filename).read(), 'utf-8')) got = tbp.get_parsed_data_dict(self.tm4) del got['last_polled'] wanted = { '_project_name': 'Tango', 'as_appears_in_distribution': '', 'canonical_bug_link': 'https://code.djangoproject.com/query?id=18937', 'concerns_just_documentation': False, 'date_reported': printable_datetime( datetime.datetime(2012, 9, 10, 3, 17, 54)), 'description': u'Add a PKG-INFO file as fitting with [http://www.python.org/dev/peps/pep-0345/ PEP 345].\r\rSee [http://blog.ziade.org/2012/09/10/dear-django-help-python-packaging/ this blog post] for reference.\r\rSeems to me we can add this metadata file without too much difficulty and make new packaging happy :D\r\r', 'good_for_newcomers': False, 'importance': '', 'last_touched': printable_datetime( datetime.datetime(2012, 9, 10, 3, 27, 13)), 'looks_closed': False, 'people_involved': 3, 'status': 'new', 'submitter_realname': '', 'submitter_username': '******', 'title': 'Use modern Python packaging metadata standard (1.2, PEP 345)', } self.assertEqual(wanted, got)
def test_top_to_bottom_closed(self): spider = bugimporters.main.BugImportSpider() self.tm.bugimporter = "github.GitHubBugImporter" self.tm.tracker_name = "openhatch tests" self.tm.github_name = "openhatch" self.tm.github_repo = "tests" self.tm.bitesized_tag = "lowfruit" self.tm.documentation_tag = "docs" self.tm.queries = ["https://api.github.com/repos/openhatch/tests/issues?state=closed"] spider.input_data = [self.tm.__dict__] url2filename = { "https://api.github.com/repos/openhatch/tests/issues?state=closed": os.path.join( HERE, "sample-data", "github", "issue-list-closed" ) } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) bugs = ar.respond_recursively(spider.start_requests()) assert len(bugs) == 1 bug = bugs[0] self.assertEqual(bug["title"], "yo dawg") self.assertEqual(bug["description"], "this issue be all up in ya biz-nass.") self.assertEqual(bug["status"], "closed") self.assertEqual(bug["people_involved"], 2) self.assertEqual(bug["date_reported"], printable_datetime(datetime.datetime(2012, 3, 12, 19, 24, 42))) self.assertEqual(bug["last_touched"], printable_datetime(datetime.datetime(2012, 3, 16, 21, 39, 42))) self.assertEqual(bug["submitter_username"], "openhatch") self.assertEqual(bug["submitter_realname"], "") self.assertEqual(bug["canonical_bug_link"], "https://github.com/openhatch/tests/issues/42") self.assertEqual(bug["good_for_newcomers"], True) self.assertEqual(bug["concerns_just_documentation"], False) self.assertEqual(bug["looks_closed"], True)
def parse(self, issue): print "Tracker: ", self.tm parsed = bugimporters.items.ParsedBug({ 'title': issue['fields']['summary'], 'description': issue['fields']['description'], 'status': issue['fields']['status']['name'].lower(), 'date_reported': printable_datetime(string2naive_datetime(issue['fields']['created'])), 'last_touched': printable_datetime(string2naive_datetime(issue['fields']['updated'])), 'submitter_username': issue['fields']['reporter']['name'], 'submitter_realname': issue['fields']['reporter']['displayName'], 'canonical_bug_link': urljoin(self.tm.get_base_url(), '/browse/' + issue['key']), 'looks_closed': (issue['fields']['status']['name'] == 'Closed'), 'last_polled': printable_datetime(), '_project_name': self.tm.tracker_name, '_tracker_name': self.tm.tracker_name, }) issue_labels = set([ l for l in issue['fields']['labels'] ]) if self.tm.bitesized_type: if self.tm.bitesized_type == 'label': b_list = self.tm.bitesized_text.split(',') parsed['good_for_newcomers'] = not issue_labels.isdisjoint(b_list) elif self.tm.bitesized_type == 'priority': parsed['good_for_newcomers'] = issue['fields']['priority']['name'] == self.tm.bitesized_text else: parsed['good_for_newcomers'] = False d_list = self.tm.documentation_text.split(',') parsed['concerns_just_documentation'] = not issue_labels.isdisjoint(d_list) return parsed
def test_create_bug_that_has_new_date_format(self): tbp = TracBugParser('http://trac.edgewall.org/ticket/3275') tbp.bug_csv = { 'description': u"Hi\r\n\r\nWhen embedding sourcecode in wiki pages using the {{{-Makro, I would sometimes like to have line numbers displayed. This would make it possible to reference some lines in a text, like: \r\n\r\n''We got some c-sourcecode here, in line 1, a buffer is allocated, in line 35, some data is copied to the buffer without checking the size of the data...''\r\n\r\nThe svn browser shows line numbers, so I hope this will not be so difficult.", 'status': 'new', 'keywords': '', 'summary': 'Show line numbers when embedding source code in wiki pages', 'priority': '', 'reporter': 'erik@\xe2\x80\xa6', 'id': '3275'} cached_html_filename = os.path.join(HERE, 'sample-data', 'trac-3275.html') tbp.set_bug_html_data(unicode( open(cached_html_filename).read(), 'utf-8')) got = tbp.get_parsed_data_dict(self.tm2) del got['last_polled'] wanted = {'status': 'new', 'as_appears_in_distribution': u'', 'description': u"Hi\r\n\r\nWhen embedding sourcecode in wiki pages using the {{{-Makro, I would sometimes like to have line numbers displayed. This would make it possible to reference some lines in a text, like: \r\n\r\n''We got some c-sourcecode here, in line 1, a buffer is allocated, in line 35, some data is copied to the buffer without checking the size of the data...''\r\n\r\nThe svn browser shows line numbers, so I hope this will not be so difficult.", 'importance': '', 'canonical_bug_link': 'http://trac.edgewall.org/ticket/3275', 'date_reported': printable_datetime( datetime.datetime(2006, 6, 16, 15, 1, 52)), 'submitter_realname': '', 'title': 'Show line numbers when embedding source code in wiki pages', 'people_involved': 3, 'last_touched': printable_datetime( datetime.datetime(2010, 11, 26, 13, 45, 45)), 'submitter_username': '******', 'looks_closed': False, 'good_for_newcomers': False, 'concerns_just_documentation': False, '_project_name': 'Trac', } self.assertEqual(wanted, got)
def parse(self, issue): parsed = bugimporters.items.ParsedBug({ 'title': issue['title'], 'description': issue['body'], 'status': issue['state'], 'people_involved': self.github_count_people_involved(issue), 'date_reported': printable_datetime(string2naive_datetime(issue['created_at'])), 'last_touched': printable_datetime(string2naive_datetime(issue['updated_at'])), 'submitter_username': issue['user']['login'], 'submitter_realname': '', # FIXME: can get this from ['user']['url'] 'canonical_bug_link': issue['html_url'], 'looks_closed': (issue['state'] == 'closed'), 'last_polled': printable_datetime(), '_project_name': self.tm.tracker_name, }) issue_labels = set([ l['name'] for l in issue['labels'] ]) b_list = self.tm.bitesized_tag.split(',') parsed['good_for_newcomers'] = not issue_labels.isdisjoint(b_list) d_list = self.tm.documentation_tag.split(',') parsed['concerns_just_documentation'] = not issue_labels.isdisjoint(d_list) return parsed
def test_create_bug_that_has_new_date_format(self): tbp = TracBugParser('http://trac.edgewall.org/ticket/3275') tbp.bug_csv = { 'description': u"Hi\r\n\r\nWhen embedding sourcecode in wiki pages using the {{{-Makro, I would sometimes like to have line numbers displayed. This would make it possible to reference some lines in a text, like: \r\n\r\n''We got some c-sourcecode here, in line 1, a buffer is allocated, in line 35, some data is copied to the buffer without checking the size of the data...''\r\n\r\nThe svn browser shows line numbers, so I hope this will not be so difficult.", 'status': 'new', 'keywords': '', 'summary': 'Show line numbers when embedding source code in wiki pages', 'priority': '', 'reporter': 'erik@\xe2\x80\xa6', 'id': '3275' } cached_html_filename = os.path.join(HERE, 'sample-data', 'trac-3275.html') tbp.set_bug_html_data( unicode(open(cached_html_filename).read(), 'utf-8')) got = tbp.get_parsed_data_dict(self.tm2) del got['last_polled'] wanted = { 'status': 'new', 'as_appears_in_distribution': u'', 'description': u"Hi\r\n\r\nWhen embedding sourcecode in wiki pages using the {{{-Makro, I would sometimes like to have line numbers displayed. This would make it possible to reference some lines in a text, like: \r\n\r\n''We got some c-sourcecode here, in line 1, a buffer is allocated, in line 35, some data is copied to the buffer without checking the size of the data...''\r\n\r\nThe svn browser shows line numbers, so I hope this will not be so difficult.", 'importance': '', 'canonical_bug_link': 'http://trac.edgewall.org/ticket/3275', 'date_reported': printable_datetime(datetime.datetime(2006, 6, 16, 15, 1, 52)), 'submitter_realname': '', 'title': 'Show line numbers when embedding source code in wiki pages', 'people_involved': 3, 'last_touched': printable_datetime(datetime.datetime(2010, 11, 26, 13, 45, 45)), 'submitter_username': '******', 'looks_closed': False, 'good_for_newcomers': False, 'concerns_just_documentation': False, '_project_name': 'Trac', } self.assertEqual(wanted, got)
def test_create_bug_with_link_in_reported_by_field(self): tbp = TracBugParser('https://code.djangoproject.com/query?id=18937') cached_csv_filename = os.path.join(HERE, 'sample-data', 'django-trac-18937.csv') tbp.set_bug_csv_data(unicode( open(cached_csv_filename).read(), 'utf-8')) cached_html_filename = os.path.join(HERE, 'sample-data', 'django-trac-18937.html') tbp.set_bug_html_data( unicode(open(cached_html_filename).read(), 'utf-8')) got = tbp.get_parsed_data_dict(self.tm4) del got['last_polled'] wanted = { '_project_name': 'Tango', 'as_appears_in_distribution': '', 'canonical_bug_link': 'https://code.djangoproject.com/query?id=18937', 'concerns_just_documentation': False, 'date_reported': printable_datetime(datetime.datetime(2012, 9, 10, 3, 17, 54)), 'description': u'Add a PKG-INFO file as fitting with [http://www.python.org/dev/peps/pep-0345/ PEP 345].\r\rSee [http://blog.ziade.org/2012/09/10/dear-django-help-python-packaging/ this blog post] for reference.\r\rSeems to me we can add this metadata file without too much difficulty and make new packaging happy :D\r\r', 'good_for_newcomers': False, 'importance': '', 'last_touched': printable_datetime(datetime.datetime(2012, 9, 10, 3, 27, 13)), 'looks_closed': False, 'people_involved': 3, 'status': 'new', 'submitter_realname': '', 'submitter_username': '******', 'title': 'Use modern Python packaging metadata standard (1.2, PEP 345)', } self.assertEqual(wanted, got)
def test_create_bug_object_data_dict_more_recent(self): tbp = TracBugParser('http://twistedmatrix.com/trac/ticket/4298') tbp.bug_csv = { 'branch': '', 'branch_author': '', 'cc': 'thijs_ exarkun', 'component': 'core', 'description': "This package hasn't been touched in 4 years which either means it's stable or not being used at all. Let's deprecate it (also see #4111).", 'id': '4298', 'keywords': 'easy', 'launchpad_bug': '', 'milestone': '', 'owner': 'djfroofy', 'priority': 'normal', 'reporter': 'thijs', 'resolution': '', 'status': 'new', 'summary': 'Deprecate twisted.persisted.journal', 'type': 'task'} cached_html_filename = os.path.join(HERE, 'sample-data', 'twisted-trac-4298-on-2010-04-02.html') tbp.set_bug_html_data(unicode( open(cached_html_filename).read(), 'utf-8')) self.assertEqual(tbp.component, 'core') got = tbp.get_parsed_data_dict(self.tm) del got['last_polled'] wanted = {'title': 'Deprecate twisted.persisted.journal', 'description': "This package hasn't been touched in 4 years which either means it's stable or not being used at all. Let's deprecate it (also see #4111).", 'status': 'new', 'importance': 'normal', 'people_involved': 3, # FIXME: Need time zone 'date_reported': printable_datetime( datetime.datetime(2010, 2, 23, 0, 46, 30)), 'last_touched': printable_datetime( datetime.datetime(2010, 3, 12, 18, 43, 5)), 'looks_closed': False, 'submitter_username': '******', 'submitter_realname': '', 'canonical_bug_link': 'http://twistedmatrix.com/trac/ticket/4298', 'good_for_newcomers': True, 'looks_closed': False, 'concerns_just_documentation': False, '_project_name': 'Twisted', 'as_appears_in_distribution': '', } self.assertEqual(wanted, got)
def test_create_bug_that_has_another_date_format(self): tbp = TracBugParser('http://dsource.org/projects/tango/ticket/1939') tbp.bug_csv = { 'cc': '', 'component': 'Documentation', 'description': "tango.core.Memory.GC.monitor() is documented incorrectly. It just duplicates previous function documentation. At least in Kai. Can't see current trunk Memory module for some reason.\\r\\n", 'id': '1939', 'keywords': 'GC.monitor', 'milestone': 'Documentation', 'owner': 'community', 'priority': 'trivial', 'reporter': '~Gh0sT~', 'resolution': '', 'status': 'new', 'summary': 'tango.core.Memory.GC.monitor() is documented incorrectly', 'type': 'defect', 'version': '0.99.9 Kai', } cached_html_filename = os.path.join(HERE, 'sample-data', 'dsource-1939') tbp.set_bug_html_data(unicode( open(cached_html_filename).read(), 'utf-8')) got = tbp.get_parsed_data_dict(self.tm4) wanted_date = printable_datetime( datetime.datetime(2010, 6, 19, 8, 15, 37)) self.assertEqual(wanted_date, got['date_reported']) self.assertEqual(wanted_date, got['last_touched'])
def test_create_bug_that_has_another_date_format(self): tbp = TracBugParser('http://dsource.org/projects/tango/ticket/1939') tbp.bug_csv = { 'cc': '', 'component': 'Documentation', 'description': "tango.core.Memory.GC.monitor() is documented incorrectly. It just duplicates previous function documentation. At least in Kai. Can't see current trunk Memory module for some reason.\\r\\n", 'id': '1939', 'keywords': 'GC.monitor', 'milestone': 'Documentation', 'owner': 'community', 'priority': 'trivial', 'reporter': '~Gh0sT~', 'resolution': '', 'status': 'new', 'summary': 'tango.core.Memory.GC.monitor() is documented incorrectly', 'type': 'defect', 'version': '0.99.9 Kai', } cached_html_filename = os.path.join(HERE, 'sample-data', 'dsource-1939') tbp.set_bug_html_data( unicode(open(cached_html_filename).read(), 'utf-8')) got = tbp.get_parsed_data_dict(self.tm4) wanted_date = printable_datetime( datetime.datetime(2010, 6, 19, 8, 15, 37)) self.assertEqual(wanted_date, got['date_reported']) self.assertEqual(wanted_date, got['last_touched'])
def parse(self, issue): print "Tracker: ", self.tm parsed = bugimporters.items.ParsedBug({ 'title': issue['fields']['summary'], 'description': issue['fields']['description'], 'status': issue['fields']['status']['name'].lower(), 'date_reported': printable_datetime( string2naive_datetime(issue['fields']['created'])), 'last_touched': printable_datetime( string2naive_datetime(issue['fields']['updated'])), 'submitter_username': issue['fields']['reporter']['name'], 'submitter_realname': issue['fields']['reporter']['displayName'], 'canonical_bug_link': urljoin(self.tm.get_base_url(), '/browse/' + issue['key']), 'looks_closed': (issue['fields']['status']['name'] == 'Closed'), 'last_polled': printable_datetime(), '_project_name': self.tm.tracker_name, '_tracker_name': self.tm.tracker_name, }) issue_labels = set([l for l in issue['fields']['labels']]) if self.tm.bitesized_type: if self.tm.bitesized_type == 'label': b_list = self.tm.bitesized_text.split(',') parsed['good_for_newcomers'] = not issue_labels.isdisjoint( b_list) elif self.tm.bitesized_type == 'priority': parsed['good_for_newcomers'] = issue['fields']['priority'][ 'name'] == self.tm.bitesized_text else: parsed['good_for_newcomers'] = False d_list = self.tm.documentation_text.split(',') parsed['concerns_just_documentation'] = not issue_labels.isdisjoint( d_list) return parsed
def test_top_to_bottom_closed(self): spider = bugimporters.main.BugImportSpider() self.tm.bugimporter = 'github.GitHubBugImporter' self.tm.tracker_name = 'openhatch tests' self.tm.github_name = 'openhatch' self.tm.github_repo = 'tests' self.tm.bitesized_tag = 'lowfruit' self.tm.documentation_tag = 'docs' self.tm.queries = [ 'https://api.github.com/repos/openhatch/tests/issues?state=closed', ] spider.input_data = [self.tm.__dict__] url2filename = { 'https://api.github.com/repos/openhatch/tests/issues?state=closed': os.path.join(HERE, 'sample-data', 'github', 'issue-list-closed'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) bugs = ar.respond_recursively(spider.start_requests()) assert len(bugs) == 1 bug = bugs[0] self.assertEqual(bug['title'], 'yo dawg') self.assertEqual(bug['description'], 'this issue be all up in ya biz-nass.') self.assertEqual(bug['status'], 'closed') self.assertEqual(bug['people_involved'], 2) self.assertEqual( bug['date_reported'], printable_datetime(datetime.datetime(2012, 3, 12, 19, 24, 42))) self.assertEqual( bug['last_touched'], printable_datetime(datetime.datetime(2012, 3, 16, 21, 39, 42))) self.assertEqual(bug['submitter_username'], 'openhatch') self.assertEqual(bug['submitter_realname'], '') self.assertEqual(bug['canonical_bug_link'], 'https://github.com/openhatch/tests/issues/42') self.assertEqual(bug['good_for_newcomers'], True) self.assertEqual(bug['concerns_just_documentation'], False) self.assertEqual(bug['looks_closed'], True)
def test_top_to_bottom_closed(self): spider = bugimporters.main.BugImportSpider() self.tm.bugimporter = 'jira.JiraBugImporter' self.tm.tracker_name = 'openhatch tests' self.tm.base_url = 'http://jira.cyanogenmod.org/browse/' self.tm.bitesized_type = 'priority' self.tm.bitesized_text = 'Trivial' self.tm.documentation_text = 'docs' self.tm.queries = [ 'https://jira.cyanogenmod.org/rest/api/2/search?jql=status=closed' ] spider.input_data = [self.tm.__dict__] url2filename = { 'https://jira.cyanogenmod.org/rest/api/2/search?jql=status=closed': os.path.join(HERE, 'sample-data', 'jira', 'issue-list-closed'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) bugs = ar.respond_recursively(spider.start_requests()) assert len(bugs) == 1 bug = bugs[0] self.assertEqual(bug['title'], 'First Test Issue') self.assertEqual(bug['description'], "A description") self.assertEqual(bug['status'], 'closed') self.assertEqual(bug['date_reported'], printable_datetime(datetime.datetime(2011, 11, 21, 22, 22, 59, 899000))) self.assertEqual(bug['last_touched'], printable_datetime(datetime.datetime(2011, 11, 21, 22, 23, 2, 302000))) self.assertEqual(bug['submitter_username'], 'admin') self.assertEqual(bug['submitter_realname'], 'Administrator') self.assertEqual(bug['canonical_bug_link'], 'http://jira.cyanogenmod.org/browse/MKY-1') self.assertEqual(bug['good_for_newcomers'], True) self.assertEqual(bug['concerns_just_documentation'], True) self.assertEqual(bug['looks_closed'], True)
def parse(self, issue): parsed = bugimporters.items.ParsedBug({ 'title': issue['title'], 'description': issue['body'], 'status': issue['state'], 'people_involved': self.github_count_people_involved(issue), 'date_reported': printable_datetime(string2naive_datetime(issue['created_at'])), 'last_touched': printable_datetime(string2naive_datetime(issue['updated_at'])), 'submitter_username': issue['user']['login'], 'submitter_realname': '', # FIXME: can get this from ['user']['url'] 'canonical_bug_link': issue['html_url'], 'looks_closed': (issue['state'] == 'closed'), 'last_polled': printable_datetime(), '_project_name': self.tm.tracker_name, '_tracker_name': self.tm.tracker_name, }) issue_labels = set([l['name'] for l in issue['labels']]) b_list = self.tm.bitesized_tag.split(',') parsed['good_for_newcomers'] = not issue_labels.isdisjoint(b_list) d_list = self.tm.documentation_tag.split(',') parsed['concerns_just_documentation'] = not issue_labels.isdisjoint( d_list) return parsed
def test_top_to_bottom_closed(self): spider = bugimporters.main.BugImportSpider() self.tm.bugimporter = 'github.GitHubBugImporter' self.tm.tracker_name = 'openhatch tests' self.tm.github_name = 'openhatch' self.tm.github_repo = 'tests' self.tm.bitesized_tag = 'lowfruit' self.tm.documentation_tag = 'docs' self.tm.queries = [ 'https://api.github.com/repos/openhatch/tests/issues?state=closed', ] spider.input_data = [self.tm.__dict__] url2filename = { 'https://api.github.com/repos/openhatch/tests/issues?state=closed': os.path.join(HERE, 'sample-data', 'github', 'issue-list-closed'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) bugs = ar.respond_recursively(spider.start_requests()) assert len(bugs) == 1 bug = bugs[0] self.assertEqual(bug['title'], 'yo dawg') self.assertEqual(bug['description'], 'this issue be all up in ya biz-nass.') self.assertEqual(bug['status'], 'closed') self.assertEqual(bug['people_involved'], 2) self.assertEqual(bug['date_reported'], printable_datetime(datetime.datetime(2012, 3, 12, 19, 24, 42))) self.assertEqual(bug['last_touched'], printable_datetime(datetime.datetime(2012, 3, 16, 21, 39, 42))) self.assertEqual(bug['submitter_username'], 'openhatch') self.assertEqual(bug['submitter_realname'], '') self.assertEqual(bug['canonical_bug_link'], 'https://github.com/openhatch/tests/issues/42') self.assertEqual(bug['good_for_newcomers'], True) self.assertEqual(bug['concerns_just_documentation'], False) self.assertEqual(bug['looks_closed'], True)
def handle_timeline_rss(self, timeline_rss): # There are two steps to updating the timeline. # First step is to use the actual timeline to update the date_reported and # last_touched fields for each bug. # Parse the returned timeline RSS feed. for entry in feedparser.parse(timeline_rss).entries: # Format the data. base_url = self.tm.get_base_url() entry_url = entry.link.rsplit("#", 1)[0] entry_date = printable_datetime( datetime.datetime(*entry.date_parsed[0:6])) entry_status = entry.title.split("): ", 1)[0].rsplit(" ", 1)[1] timeline_url = self.data_transits['trac']['get_timeline_url']({ 'base_url': base_url, 'entry_url': entry_url, 'entry_date': entry_date, 'entry_status': entry_status }) # Add the URL to the waiting list yield scrapy.http.Request(url=timeline_url, callback=self.handle_timeline_rss) # Second step is to use the RSS feed for each individual bug to update the # last_touched field. This would be unneccessary if the timeline showed # update events as well as creation and closing ones, and in fact later # versions of Trac have this option - but then the later versions of Trac # also hyperlink to the timeline from the bug, making this all moot. # Also, we cannot just use the RSS feed for everything, as it is missing # the date_reported time, as well as a lot of information about the bug # itself (e.g. Priority). for tb_times in self.timeline.tracbugtimes_set.all(): # Check that the bug has not beeen seen as 'closed' in the timeline. # This will reduce network load by not grabbing the RSS feed of bugs # whose last_touched info is definitely correct. if 'closed' not in tb_times.latest_timeline_status: r = scrapy.http.Request(url=tb_times.canonical_bug_link + '?format=rss', callback=self.handle_bug_rss) r.meta['cb_args'] = tb_times yield r # URLs are now all prepped, so start pushing them onto the reactor. self.push_urls_onto_reactor()
def handle_timeline_rss(self, timeline_rss): # There are two steps to updating the timeline. # First step is to use the actual timeline to update the date_reported and # last_touched fields for each bug. # Parse the returned timeline RSS feed. for entry in feedparser.parse(timeline_rss).entries: # Format the data. base_url = self.tm.get_base_url() entry_url = entry.link.rsplit("#", 1)[0] entry_date = printable_datetime( datetime.datetime(*entry.date_parsed[0:6])) entry_status = entry.title.split("): ", 1)[0].rsplit(" ", 1)[1] timeline_url = self.data_transits['trac']['get_timeline_url']({ 'base_url': base_url, 'entry_url': entry_url, 'entry_date': entry_date, 'entry_status': entry_status}) # Add the URL to the waiting list yield scrapy.http.Request( url=timeline_url, callback=self.handle_timeline_rss) # Second step is to use the RSS feed for each individual bug to update the # last_touched field. This would be unneccessary if the timeline showed # update events as well as creation and closing ones, and in fact later # versions of Trac have this option - but then the later versions of Trac # also hyperlink to the timeline from the bug, making this all moot. # Also, we cannot just use the RSS feed for everything, as it is missing # the date_reported time, as well as a lot of information about the bug # itself (e.g. Priority). for tb_times in self.timeline.tracbugtimes_set.all(): # Check that the bug has not beeen seen as 'closed' in the timeline. # This will reduce network load by not grabbing the RSS feed of bugs # whose last_touched info is definitely correct. if 'closed' not in tb_times.latest_timeline_status: r = scrapy.http.Request( url=tb_times.canonical_bug_link + '?format=rss', callback=self.handle_bug_rss) r.meta['cb_args'] = tb_times yield r # URLs are now all prepped, so start pushing them onto the reactor. self.push_urls_onto_reactor()
def test_create_bug_that_lacks_modified_date_and_uses_owned_by_instead_of_assigned_to( self): tbp = TracBugParser('http://twistedmatrix.com/trac/ticket/4298') tbp.bug_csv = { 'branch': '', 'branch_author': '', 'cc': 'thijs_ exarkun', 'component': 'core', 'description': "This package hasn't been touched in 4 years which either means it's stable or not being used at all. Let's deprecate it (also see #4111).", 'id': '4298', 'keywords': 'easy', 'launchpad_bug': '', 'milestone': '', 'owner': 'djfroofy', 'priority': 'normal', 'reporter': 'thijs', 'resolution': '', 'status': 'new', 'summary': 'Deprecate twisted.persisted.journal', 'type': 'task' } cached_html_filename = os.path.join( HERE, 'sample-data', 'twisted-trac-4298-without-modified-using-owned-instead-of-assigned.html' ) tbp.set_bug_html_data( unicode(open(cached_html_filename).read(), 'utf-8')) got = tbp.get_parsed_data_dict(self.tm) del got['last_polled'] wanted = { 'title': 'Deprecate twisted.persisted.journal', 'description': "This package hasn't been touched in 4 years which either means it's stable or not being used at all. Let's deprecate it (also see #4111).", 'status': 'new', 'importance': 'normal', 'people_involved': 4, # FIXME: Need time zone 'date_reported': printable_datetime(datetime.datetime(2010, 2, 22, 19, 46, 30)), 'last_touched': printable_datetime(datetime.datetime(2010, 2, 22, 19, 46, 30)), 'looks_closed': False, 'submitter_username': '******', 'submitter_realname': '', 'canonical_bug_link': 'http://twistedmatrix.com/trac/ticket/4298', 'good_for_newcomers': True, 'looks_closed': False, 'concerns_just_documentation': False, '_project_name': 'Twisted', 'as_appears_in_distribution': '', } self.assertEqual(wanted, got)
def get_parsed_data_dict(self, base_url, bitesized_type, bitesized_text, documentation_type, documentation_text): # Generate the bug_url. self.bug_url = urlparse.urljoin( base_url, 'show_bug.cgi?id=%d' % self.bug_id) xml_data = self.bug_xml date_reported_text = self.get_tag_text_from_xml(xml_data, 'creation_ts') last_touched_text = self.get_tag_text_from_xml(xml_data, 'delta_ts') u, r = self._who_tag_to_username_and_realname(xml_data.xpath('.//reporter')[0]) status = self.get_tag_text_from_xml(xml_data, 'bug_status') looks_closed = status in ('RESOLVED', 'WONTFIX', 'CLOSED', 'ASSIGNED') ret_dict = bugimporters.items.ParsedBug({ 'title': self.get_tag_text_from_xml(xml_data, 'short_desc'), 'description': (self.get_tag_text_from_xml(xml_data, 'long_desc/thetext') or '(Empty description)'), 'status': status, 'importance': self.get_tag_text_from_xml(xml_data, 'bug_severity'), 'people_involved': self.bugzilla_count_people_involved(xml_data), 'date_reported': self.bugzilla_date_to_printable_datetime( date_reported_text), 'last_touched': self.bugzilla_date_to_printable_datetime( last_touched_text), 'last_polled': printable_datetime(), 'submitter_username': u, 'submitter_realname': r, 'canonical_bug_link': self.bug_url, 'looks_closed': looks_closed }) keywords_text = self.get_tag_text_from_xml(xml_data, 'keywords') or '' keywords = map(lambda s: s.strip(), keywords_text.split(',')) # Check for the bitesized keyword if bitesized_type: b_list = bitesized_text.split(',') if bitesized_type == 'key': ret_dict['good_for_newcomers'] = any(b in keywords for b in b_list) elif bitesized_type == 'wboard': whiteboard_text = self.get_tag_text_from_xml(xml_data, 'status_whiteboard') ret_dict['good_for_newcomers'] = any(b in whiteboard_text for b in b_list) else: ret_dict['good_for_newcomers'] = False else: ret_dict['good_for_newcomers'] = False # Chemck whether this is a documentation bug. if documentation_type: d_list = documentation_text.split(',') if documentation_type == 'key': ret_dict['concerns_just_documentation'] = any(d in keywords for d in d_list) elif documentation_type == 'comp': ret_dict['concerns_just_documentation'] = any(d == self.component for d in d_list) elif documentation_type == 'prod': ret_dict['concerns_just_documentation'] = any(d == self.product for d in d_list) else: ret_dict['concerns_just_documentation'] = False else: ret_dict['concerns_just_documentation'] = False # If being called in a subclass, open ourselves up to some overriding self.extract_tracker_specific_data(xml_data, ret_dict) # And pass ret_dict on. return ret_dict
def _span2date(span): date_string = span.attrib['title'] date_string = date_string.replace('in Timeline', '') date_string = date_string.replace('See timeline at ', '') return printable_datetime(string2naive_datetime(date_string))
def get_parsed_data_dict(self, tm): # Note that this actually returns a scrapy.Item, which looks and # feels like a dict object, but has some special conveniences. # # Seems that some Trac bug trackers don't give all the information # below. For now, just put the offending item inside a try catch and # give it a null case. ret = bugimporters.items.ParsedBug() ret.update({'title': self.bug_csv['summary'], 'description': TracBugParser.string_un_csv( self.bug_csv['description']), 'status': self.bug_csv['status'], 'submitter_username': self.bug_csv['reporter'], 'submitter_realname': '', # can't find this in Trac 'canonical_bug_link': self.bug_url, 'last_polled': printable_datetime(), '_project_name': tm.tracker_name, }) ret['importance'] = self.bug_csv.get('priority', '') ret['looks_closed'] = (self.bug_csv['status'] == 'closed') page_metadata = TracBugParser.page2metadata_table(self.bug_html) # Set as_appears_in_distribution. ret['as_appears_in_distribution'] = tm.as_appears_in_distribution if not page_metadata: logging.warn("This Trac bug got no page metadata. Probably we did" " not find it on the page.") logging.warn("Bug URL: %s", self.bug_url) return ret all_people = set(TracBugParser.all_people_in_changes(self.bug_html)) all_people.add(page_metadata['Reported by:']) if 'Cc' in page_metadata: all_people.update( map(lambda x: x.strip(), page_metadata['Cc'].split(','))) if 'Cc:' in page_metadata: all_people.update( map(lambda x: x.strip(), page_metadata['Cc:'].split(','))) try: assignee = page_metadata['Assigned to:'] except KeyError: try: assignee = page_metadata['Owned by:'] except KeyError: assignee = '' if assignee: all_people.add(assignee) ret['people_involved'] = len(all_people) # FIXME: Need time zone if not tm.old_trac: # All is fine, proceed as normal. ret['date_reported'] = TracBugParser.page2date_opened(self.bug_html) ret['last_touched'] = TracBugParser.page2date_modified(self.bug_html) # Check for the bitesized keyword if tm.bitesized_type: b_list = tm.bitesized_text.split(',') ret['good_for_newcomers'] = any( b in self.bug_csv[tm.bitesized_type] for b in b_list) else: ret['good_for_newcomers'] = False # Check whether this is a documentation bug. if tm.documentation_type: d_list = tm.documentation_text.split(',') ret['concerns_just_documentation'] = any( d in self.bug_csv[tm.documentation_type] for d in d_list) else: ret['concerns_just_documentation'] = False # Then pass ret out return ret
def get_parsed_data_dict(self, base_url, bitesized_type, bitesized_text, documentation_type, documentation_text): # Generate the bug_url. self.bug_url = urlparse.urljoin(base_url, "show_bug.cgi?id=%d" % self.bug_id) xml_data = self.bug_xml date_reported_text = self.get_tag_text_from_xml(xml_data, "creation_ts") last_touched_text = self.get_tag_text_from_xml(xml_data, "delta_ts") u, r = self._who_tag_to_username_and_realname(xml_data.xpath(".//reporter")[0]) status = self.get_tag_text_from_xml(xml_data, "bug_status") looks_closed = status in ("RESOLVED", "WONTFIX", "CLOSED", "ASSIGNED") ret_dict = bugimporters.items.ParsedBug( { "title": self.get_tag_text_from_xml(xml_data, "short_desc"), "description": (self.get_tag_text_from_xml(xml_data, "long_desc/thetext") or "(Empty description)"), "status": status, "importance": self.get_tag_text_from_xml(xml_data, "bug_severity"), "people_involved": self.bugzilla_count_people_involved(xml_data), "date_reported": self.bugzilla_date_to_printable_datetime(date_reported_text), "last_touched": self.bugzilla_date_to_printable_datetime(last_touched_text), "last_polled": printable_datetime(), "submitter_username": u, "submitter_realname": r, "canonical_bug_link": self.bug_url, "looks_closed": looks_closed, } ) keywords_text = self.get_tag_text_from_xml(xml_data, "keywords") or "" keywords = map(lambda s: s.strip(), keywords_text.split(",")) # Check for the bitesized keyword if bitesized_type: b_list = bitesized_text.split(",") if bitesized_type == "key": ret_dict["good_for_newcomers"] = any(b in keywords for b in b_list) elif bitesized_type == "wboard": whiteboard_text = self.get_tag_text_from_xml(xml_data, "status_whiteboard") ret_dict["good_for_newcomers"] = any(b in whiteboard_text for b in b_list) else: ret_dict["good_for_newcomers"] = False else: ret_dict["good_for_newcomers"] = False # Chemck whether this is a documentation bug. if documentation_type: d_list = documentation_text.split(",") if documentation_type == "key": ret_dict["concerns_just_documentation"] = any(d in keywords for d in d_list) elif documentation_type == "comp": ret_dict["concerns_just_documentation"] = any(d == self.component for d in d_list) elif documentation_type == "prod": ret_dict["concerns_just_documentation"] = any(d == self.product for d in d_list) else: ret_dict["concerns_just_documentation"] = False else: ret_dict["concerns_just_documentation"] = False # If being called in a subclass, open ourselves up to some overriding self.extract_tracker_specific_data(xml_data, ret_dict) # And pass ret_dict on. return ret_dict
def get_parsed_data_dict(self, tm): # Note that this actually returns a scrapy.Item, which looks and # feels like a dict object, but has some special conveniences. # # Seems that some Trac bug trackers don't give all the information # below. For now, just put the offending item inside a try catch and # give it a null case. ret = bugimporters.items.ParsedBug() ret.update({ 'title': self.bug_csv['summary'], 'description': TracBugParser.string_un_csv(self.bug_csv['description']), 'status': self.bug_csv['status'], 'submitter_username': self.bug_csv['reporter'], 'submitter_realname': '', # can't find this in Trac 'canonical_bug_link': self.bug_url, 'last_polled': printable_datetime(), '_project_name': tm.tracker_name, }) ret['importance'] = self.bug_csv.get('priority', '') ret['looks_closed'] = (self.bug_csv['status'] == 'closed') page_metadata = TracBugParser.page2metadata_table(self.bug_html) # Set as_appears_in_distribution. ret['as_appears_in_distribution'] = tm.as_appears_in_distribution if not page_metadata: logging.warn("This Trac bug got no page metadata. Probably we did" " not find it on the page.") logging.warn("Bug URL: %s", self.bug_url) return ret all_people = set(TracBugParser.all_people_in_changes(self.bug_html)) all_people.add(page_metadata['Reported by:']) if 'Cc' in page_metadata: all_people.update( map(lambda x: x.strip(), page_metadata['Cc'].split(','))) if 'Cc:' in page_metadata: all_people.update( map(lambda x: x.strip(), page_metadata['Cc:'].split(','))) try: assignee = page_metadata['Assigned to:'] except KeyError: try: assignee = page_metadata['Owned by:'] except KeyError: assignee = '' if assignee: all_people.add(assignee) ret['people_involved'] = len(all_people) # FIXME: Need time zone # All is fine, proceed as normal. ret['date_reported'] = TracBugParser.page2date_opened(self.bug_html) ret['last_touched'] = TracBugParser.page2date_modified(self.bug_html) # Check for the bitesized keyword if tm.bitesized_type: b_list = tm.bitesized_text.split(',') ret['good_for_newcomers'] = any( b in self.bug_csv[tm.bitesized_type] for b in b_list) else: ret['good_for_newcomers'] = False # Check whether this is a documentation bug. if tm.documentation_type: d_list = tm.documentation_text.split(',') ret['concerns_just_documentation'] = any( d in self.bug_csv[tm.documentation_type] for d in d_list) else: ret['concerns_just_documentation'] = False # Then pass ret out return ret
def get_parsed_data_dict(self, base_url, bitesized_type, bitesized_text, documentation_type, documentation_text): # Generate the bug_url. self.bug_url = '%sshow_bug.cgi?id=%d' % (base_url, self.bug_id) xml_data = self.bug_xml date_reported_text = self.get_tag_text_from_xml(xml_data, 'creation_ts') last_touched_text = self.get_tag_text_from_xml(xml_data, 'delta_ts') u, r = self._who_tag_to_username_and_realname( xml_data.xpath('.//reporter')[0]) status = self.get_tag_text_from_xml(xml_data, 'issue_status') looks_closed = status in ('RESOLVED', 'WONTFIX', 'CLOSED', 'INVALID') ret_dict = bugimporters.items.ParsedBug({ 'title': self.get_tag_text_from_xml(xml_data, 'short_desc'), 'description': (self.get_tag_text_from_xml( xml_data, 'long_desc/thetext') or '(Empty description)'), 'status': status, 'importance': self.get_tag_text_from_xml(xml_data, 'priority'), 'people_involved': self.tigris_count_people_involved(xml_data), 'date_reported': self.tigris_date_to_printable_datetime( date_reported_text), 'last_touched': self.tigris_date_to_printable_datetime( last_touched_text), 'last_polled': printable_datetime(), 'submitter_username': u, 'submitter_realname': r, 'canonical_bug_link': self.bug_url, 'looks_closed': looks_closed }) keywords_text = self.get_tag_text_from_xml(xml_data, 'keywords') or '' keywords = map(lambda s: s.strip(), keywords_text.split(',')) # Check for the bitesized keyword is_easy = False if bitesized_type: b_list = bitesized_text.split(',') if bitesized_type == 'key': is_easy = any(b in keywords for b in b_list) if not is_easy and bitesized_type == 'wboard': whiteboard_text = self.get_tag_text_from_xml( xml_data, 'status_whiteboard') is_easy = any(b in whiteboard_text for b in b_list) ret_dict['good_for_newcomers'] = is_easy # Check whether this is a documentation bug. is_doc = False if documentation_type: d_list = documentation_text.split(',') if 'key' in documentation_type: is_doc = any(d in keywords for d in d_list) if not is_doc and 'comp' in documentation_type: is_doc = any(d == self.component for d in d_list) if not is_doc and 'subcomp' in documentation_type: is_doc = any(d == self.subcomponent for d in d_list) if not is_doc and 'prod' in documentation_type: is_doc = any(d == self.product for d in d_list) ret_dict['concerns_just_documentation'] = is_doc # And pass ret_dict on. return ret_dict