def test_top_to_bottom(self): self.setup_class() spider = bugimporters.main.BugImportSpider() self.tm.bugimporter = 'trac.TracBugImporter' self.tm.tracker_name = 'Twisted' self.tm.bitesized_type = '' self.tm.documentation_type = '' self.tm.base_url = 'http://twistedmatrix.com/trac/' self.tm.queries = [ 'http://twistedmatrix.com/trac/query?id=5858&format=csv' ] spider.input_data = [self.tm.__dict__] url2filename = { 'http://twistedmatrix.com/trac/query?id=5858&format=csv': os.path.join(HERE, 'sample-data', 'twisted-trac-query-for-id=5858.csv'), 'http://twistedmatrix.com/trac/ticket/5858?format=csv': os.path.join(HERE, 'sample-data', 'twisted-trac-5858.csv'), 'http://twistedmatrix.com/trac/ticket/5858': os.path.join(HERE, 'sample-data', 'twisted-trac-5858.html'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 1 item = items[0] assert item['canonical_bug_link'] == ( 'http://twistedmatrix.com/trac/ticket/5858')
def test_top_to_bottom(self): spider = bugimporters.main.BugImportSpider() spider.input_data = [self.tm] url2filename = { self.tm['queries'][0]: sample_data_path('pculture-bitesized-query.html'), 'http://bugzilla.pculture.org/show_bug.cgi?ctype=xml&excludefield=attachmentdata&id=2138&id=2283&id=2374&id=4763&id=8462&id=8489&id=8670&id=9339&id=9415&id=9466&id=9569&id=11882&id=13122&id=15672&': sample_data_path('lots-of-pculture-bugs.xml'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 14 assert set([item['canonical_bug_link'] for item in items]) == set([ 'http://bugzilla.pculture.org/show_bug.cgi?id=2283', 'http://bugzilla.pculture.org/show_bug.cgi?id=2138', 'http://bugzilla.pculture.org/show_bug.cgi?id=13122', 'http://bugzilla.pculture.org/show_bug.cgi?id=9415', 'http://bugzilla.pculture.org/show_bug.cgi?id=9569', 'http://bugzilla.pculture.org/show_bug.cgi?id=15672', 'http://bugzilla.pculture.org/show_bug.cgi?id=11882', 'http://bugzilla.pculture.org/show_bug.cgi?id=2374', 'http://bugzilla.pculture.org/show_bug.cgi?id=4763', 'http://bugzilla.pculture.org/show_bug.cgi?id=9339', 'http://bugzilla.pculture.org/show_bug.cgi?id=8670', 'http://bugzilla.pculture.org/show_bug.cgi?id=8462', 'http://bugzilla.pculture.org/show_bug.cgi?id=8489', 'http://bugzilla.pculture.org/show_bug.cgi?id=9466', ])
def test_process_bugs(self): '''Note that process_bugs, for Github, requires a date-based query in self.tm.get_older_bug_data''' spider = bugimporters.main.BugImportSpider() self.tm.bugimporter = 'github.GitHubBugImporter' self.tm.tracker_name = 'mango django' self.tm.github_name = 'acm-uiuc' self.tm.github_repo = 'mango-django' self.tm.bitesized_tag = '' self.tm.get_older_bug_data = ('https://api.github.com/repos/acm-uiuc/' 'mango-django/issues?since=' '2012-09-15T00%3A00%3A00') self.tm.existing_bug_urls = [ 'https://github.com/acm-uiuc/mango-django/issues/3', ] self.tm.documentation_tag = '' self.tm.queries = [] spider.input_data = [self.tm.__dict__] url2filename = { 'https://api.github.com/repos/acm-uiuc/mango-django/issues?since=2012-09-15T00%3A00%3A00': os.path.join(HERE, 'sample-data', 'github', 'issue-query-with-date-constraint.json'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) bugs = ar.respond_recursively(spider.start_requests()) self.assertEqual(len(bugs), 1) bug = bugs[0] self.assertEqual(bug['canonical_bug_link'], self.tm.existing_bug_urls[0])
def test_process_existing_bug_urls(self): # Reset test state self.setup_class() # Remove 'queries', and add a bug to existing_bug_urls self.tm.queries = [] self.tm.existing_bug_urls = [ 'http://mercurial.selenic.com/bts/issue1550', ] # Create the bug spider spider = bugimporters.main.BugImportSpider() spider.input_data = [self.tm.__dict__] # Configure URL<->filename mapping for offline crawling url2filename = { 'http://mercurial.selenic.com/bts/issue1550': os.path.join(HERE, 'sample-data', 'closed-mercurial-bug.html'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 1 assert items[0]['canonical_bug_link'] == url2filename.keys()[0]
def test_process_existing_bug_urls(self): # Reset test state self.setup_class() # Remove 'queries', and add a bug to existing_bug_urls self.tm['queries'] = [] self.tm['existing_bug_urls'] = [ 'https://bugs.launchpad.net/bzr/+bug/839461', ] # Create the bug spider spider = bugimporters.main.BugImportSpider() spider.input_data = [self.tm] # Configure URL<->filename mapping for offline crawling url2filename = { 'https://api.launchpad.net/1.0/bugs/839461': os.path.join(HERE, 'sample-data', 'launchpad', 'bugs_839461'), 'https://api.launchpad.net/1.0/bzr/+bug/839461': os.path.join(HERE, 'sample-data', 'launchpad', 'bugs_839461'), 'https://api.launchpad.net/1.0/bzr/+bug/839461/bug_tasks': os.path.join(HERE, 'sample-data', 'launchpad', 'bugs_task_839461'), 'https://api.launchpad.net/1.0/bugs/839461/subscriptions': os.path.join(HERE, 'sample-data', 'launchpad', 'bugs_839461_subscriptions'), 'https://api.launchpad.net/1.0/~vila': os.path.join(HERE, 'sample-data', 'launchpad', '~vila'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 1 assert items[0]['canonical_bug_link'] == self.tm['existing_bug_urls'][ 0]
def test_top_to_bottom(self): spider = bugimporters.main.BugImportSpider() spider.input_data = [ dict( tracker_name='SymPy', google_name='sympy', bitesized_type='label', bitesized_text='EasyToFix', documentation_type='label', documentation_text='Documentation', bugimporter='google.GoogleBugImporter', queries=[ 'https://code.google.com/feeds/issues/p/sympy/issues/full?can=open&max-results=10000' + '&label=EasyToFix' ]) ] url2filename = { 'https://code.google.com/feeds/issues/p/sympy/issues/full?can=open&max-results=10000&label=EasyToFix': os.path.join(HERE, 'sample-data', 'google', 'label-easytofix.atom'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 74
def test_old_bug_data(self): spider = bugimporters.main.BugImportSpider() spider.input_data = [ dict( tracker_name='SymPy', google_name='sympy', bitesized_type='label', bitesized_text='EasyToFix', documentation_type='label', documentation_text='Documentation', bugimporter='google.GoogleBugImporter', queries=[], get_older_bug_data=( 'https://code.google.com/feeds/issues/p/sympy/issues/full' + '?max-results=10000&can=all&updated-min=2012-09-15T00:00:00' ), existing_bug_urls=[ 'http://code.google.com/p/sympy/issues/detail?id=2371', ], ) ] url2filename = { ('https://code.google.com/feeds/issues/p/sympy/issues/full' + '?max-results=10000&can=all&updated-min=2012-09-15T00:00:00'): os.path.join(HERE, 'sample-data', 'google', 'issues-by-date.atom'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 1 item = items[0] assert item[ 'canonical_bug_link'] == 'http://code.google.com/p/sympy/issues/detail?id=2371'
def test_bug_that_404s_is_deleted(self, monkeypatch): bug_url = 'http://twistedmatrix.com/trac/ticket/1234' ar = autoresponse.Autoresponder(url2filename={}, url2errors={ bug_url + '?format=csv': 404, }) all_bugs = [(bug_url, None)] request_iterable = self.im.process_bugs(all_bugs) items = ar.respond_recursively(request_iterable) assert len(items) == 1 assert items[0]['_deleted']
def test_provide_existing_bug_urls(self): self.setup_class() # Create spider spider = bugimporters.main.BugImportSpider() # Provide metadata about the tracker self.tm.bugimporter = 'trac.TracBugImporter' self.tm.tracker_name = 'Twisted' self.tm.bitesized_type = '' self.tm.documentation_type = '' self.tm.base_url = 'http://twistedmatrix.com/trac/' # Make 'queries' empty, because we are only interested in # ohw it handles the existing_bug_urls list self.tm.queries = [] self.tm.existing_bug_urls = [ 'http://twistedmatrix.com/trac/ticket/5858', 'http://twistedmatrix.com/trac/ticket/4298', ] # Convert the trackermodel into data for the spider spider.input_data = [self.tm.__dict__] # Provide sample data url2filename = { 'http://twistedmatrix.com/trac/query?id=5858&format=csv': os.path.join(HERE, 'sample-data', 'twisted-trac-query-for-id=5858.csv'), 'http://twistedmatrix.com/trac/ticket/5858?format=csv': os.path.join(HERE, 'sample-data', 'twisted-trac-5858.csv'), 'http://twistedmatrix.com/trac/ticket/5858': os.path.join(HERE, 'sample-data', 'twisted-trac-5858.html'), 'http://twistedmatrix.com/trac/ticket/4298?format=csv': os.path.join(HERE, 'sample-data', 'twisted-trac-4298-csv-export'), 'http://twistedmatrix.com/trac/ticket/4298': os.path.join(HERE, 'sample-data', 'twisted-trac-4298-on-2010-04-02.html'), } # Get all the data the spider will get ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) # And make sure it pulls out the right two bugs assert len(items) == 2 urls_we_want_to_see = set(self.tm.existing_bug_urls) urls_we_found = set([x['canonical_bug_link'] for x in items]) assert urls_we_found == urls_we_want_to_see
def test_top_to_bottom(self, extra_url2filename=None): self.setup_class() spider = bugimporters.main.BugImportSpider() spider.input_data = [self.tm] url2filename = { 'https://api.launchpad.net/1.0/bzr/?ws.op=searchTasks': os.path.join(HERE, 'sample-data', 'launchpad', 'bzr?ws.op=searchTasks'), 'https://api.launchpad.net/1.0/bugs/839461': os.path.join(HERE, 'sample-data', 'launchpad', 'bugs_839461'), 'https://api.launchpad.net/1.0/bugs/839461/subscriptions': os.path.join(HERE, 'sample-data', 'launchpad', 'bugs_839461_subscriptions'), 'https://api.launchpad.net/1.0/~vila': os.path.join(HERE, 'sample-data', 'launchpad', '~vila'), } if extra_url2filename: url2filename.update(extra_url2filename) ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 1 item = items[0] self.assertEqual( datetime.datetime(2011, 9, 2, 10, 42, 43, 883929).isoformat(), item['date_reported']) self.assertEqual( u'Bug #839461 in Bazaar: "can\'t run selftest for 2.2 with recent subunit/testtools"', item['title']) self.assertEqual('Critical', item['importance']) self.assertEqual('https://bugs.launchpad.net/bzr/+bug/839461', item['canonical_bug_link']) self.assertEqual( datetime.datetime(2012, 8, 30, 14, 16, 26, 102504).isoformat(), item['last_touched']) self.assertEqual( "While freezing bzr-2.2.5 from a natty machine with python-2.7.1+,\nlp:testtools revno 244 and lp:subunit revno 151 I wasn't able to\nrun 'make check-dist-tarball'.\n\nI had to revert to testtools-0.9.2 and subunit 0.0.6 and use\npython2.6 to successfully run:\n\n BZR_PLUGIN_PATH=-site make check-dist-tarball PYTHON=python2.6 | subunit2pyunit\n\nAlso, I've checked the versions used on pqm:\n\n(pqm-amd64-new)pqm@cupuasso:~/pqm-workdir/bzr+ssh/new-pqm-test$ dpkg -l | grep subunit\nii libsubunit-perl 0.0.6-1~bazaar1.0.IS.10.04 perl parser and diff for Subunit streams\nii python-subunit 0.0.6-1~bazaar1.0.IS.10.04 unit testing protocol - Python bindings to g\nii subunit 0.0.6-1~bazaar1.0.IS.10.04 command line tools for processing Subunit st\n(pqm-amd64-new)pqm@cupuasso:~/pqm-workdir/bzr+ssh/new-pqm-test$ dpkg -l | grep testtools\nii python-testtools 0.9.6-0~bazaar1.0.IS.8.04 Extensions to the Python unittest library", item['description']) self.assertEqual(1, item['people_involved']) self.assertEqual('vila', item['submitter_username']) self.assertEqual('Vincent Ladeuil', item['submitter_realname']) return item
def test_top_to_bottom(self): self.setup_class() spider = bugimporters.main.BugImportSpider() spider.input_data = [self.tm.__dict__] url2filename = { 'http://mercurial.selenic.com/bts/issue?@action=export_csv&@columns=id,activity,title,creator,status&@sort=-activity&@group=priority&@filter=status,assignedto&@pagesize=50&@startwith=0&status=-1,1,2,3,4,5,6,7,9,10': os.path.join(HERE, 'sample-data', 'fake-mercurial-csv.csv'), 'http://mercurial.selenic.com/bts/issue1550': os.path.join(HERE, 'sample-data', 'closed-mercurial-bug.html'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 1 item = items[0] assert item['canonical_bug_link'] == ( 'http://mercurial.selenic.com/bts/issue1550')
def test_top_to_bottom_closed(self): spider = bugimporters.main.BugImportSpider() self.tm.bugimporter = 'github.GitHubBugImporter' self.tm.tracker_name = 'openhatch tests' self.tm.github_name = 'openhatch' self.tm.github_repo = 'tests' self.tm.bitesized_tag = 'lowfruit' self.tm.documentation_tag = 'docs' self.tm.queries = [ 'https://api.github.com/repos/openhatch/tests/issues?state=closed', ] spider.input_data = [self.tm.__dict__] url2filename = { 'https://api.github.com/repos/openhatch/tests/issues?state=closed': os.path.join(HERE, 'sample-data', 'github', 'issue-list-closed'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) bugs = ar.respond_recursively(spider.start_requests()) assert len(bugs) == 1 bug = bugs[0] self.assertEqual(bug['title'], 'yo dawg') self.assertEqual(bug['description'], 'this issue be all up in ya biz-nass.') self.assertEqual(bug['status'], 'closed') self.assertEqual(bug['people_involved'], 2) self.assertEqual( bug['date_reported'], printable_datetime(datetime.datetime(2012, 3, 12, 19, 24, 42))) self.assertEqual( bug['last_touched'], printable_datetime(datetime.datetime(2012, 3, 16, 21, 39, 42))) self.assertEqual(bug['submitter_username'], 'openhatch') self.assertEqual(bug['submitter_realname'], '') self.assertEqual(bug['canonical_bug_link'], 'https://github.com/openhatch/tests/issues/42') self.assertEqual(bug['good_for_newcomers'], True) self.assertEqual(bug['concerns_just_documentation'], False) self.assertEqual(bug['looks_closed'], True)
def test_provide_existing_bug_urls(self): spider = bugimporters.main.BugImportSpider() spider.input_data = [dict(self.tm)] # Add some existing bug URLs to the story spider.input_data[0]['existing_bug_urls'] = [ 'http://bugzilla.pculture.org/show_bug.cgi?id=9415', 'http://bugzilla.pculture.org/show_bug.cgi?id=9569', 'http://bugzilla.pculture.org/show_bug.cgi?id=15672', 'http://bugzilla.pculture.org/show_bug.cgi?id=11882', 'http://bugzilla.pculture.org/show_bug.cgi?id=2374', 'http://bugzilla.pculture.org/show_bug.cgi?id=4763', ] spider.input_data[0]['queries'] = [] url2filename = { 'http://bugzilla.pculture.org/show_bug.cgi?ctype=xml&excludefield=attachmentdata&id=2374&id=4763&id=9415&id=9569&id=11882&id=15672&': sample_data_path('fewer-pculture-bugs.xml'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) assert len(items) == 6 assert set([item['canonical_bug_link'] for item in items ]) == set(spider.input_data[0]['existing_bug_urls'])
def test_top_to_bottom_with_bigger_project(self): # For this project, we found that some bugs from the past were not # getting refreshed. # # This is because of a subtlety of import from the Google Code bug # tracker. # # The get_older_bug_data query gives us all updates to bugs that have # taken place since that date. So if one of the bugs in # existing_bug_urls has been updated, we get notified of those updates. # # But if one of those bugs has *not* been updated, then Google Code # tells us nothing. The old behavior was that we would, therefore, # leave no information about that bug in the output of the crawl. # Therefore, consumers of the data would conclude that the bug has # not been polled. But actually, we *do* have some information we # can report. Namely, since there was no update to the bug since # its last_polled, it has stayed the same until now. # # Therefore, this test verifies that we report on existing_bug_urls # to indicate there is no change. spider = bugimporters.main.BugImportSpider() spider.input_data = [ { 'bitesized_text': u'Effort-Minimal,Effort-Easy,Effort-Fair', 'bitesized_type': u'label', 'bugimporter': 'google', 'custom_parser': u'', 'documentation_text': u'Component-Docs', 'documentation_type': u'label', 'existing_bug_urls': [ # No data in the feed u'http://code.google.com/p/soc/issues/detail?id=1461', # Has data in the feed u'http://code.google.com/p/soc/issues/detail?id=1618', ], 'get_older_bug_data': u'https://code.google.com/feeds/issues/p/soc/issues/full?max-results=10000&can=all&updated-min=2012-05-22T19%3A52%3A10', 'google_name': u'soc', 'queries': [], 'tracker_name': u'Melange' }, ] url2filename = { 'https://code.google.com/feeds/issues/p/soc/issues/full?max-results=10000&can=all&updated-min=2012-05-22T19%3A52%3A10': os.path.join(HERE, 'sample-data', 'google', 'soc-date-query.atom'), } ar = autoresponse.Autoresponder(url2filename=url2filename, url2errors={}) items = ar.respond_recursively(spider.start_requests()) # Make sure bugs that actually have data come back, clear and true bug_with_data = [ x for x in items if (x['canonical_bug_link'] == 'http://code.google.com/p/soc/issues/detail?id=1618') ][0] assert bug_with_data['title'] assert not bug_with_data.get('_no_update', False) # Verify (here's the new bit) that we report on bugs that are not # represented in the feed. bug_without_data = [ x for x in items if (x['canonical_bug_link'] == 'http://code.google.com/p/soc/issues/detail?id=1461') ][0] assert bug_without_data['_no_update'] assert ('http://code.google.com/p/soc/issues/detail?id=1461' in [x['canonical_bug_link'] for x in items])