def test_add_twitter_resource_processor_nonentity(self, mock_api, mock_auth): '''Test twitter processor handles non-entities properly (neither a user nor hashtag).''' # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'entity': 'non-entity', 'project_id': 'my-project' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_twitter_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. # error_msg = 'Entity, "non-entity", must be an @account or a #hashtag' error_msg = 'Entity, "non-entity", must be an @account, #hashtag, ' \ 'or url:url-search' with self.assertRaises(ValueError) as cm: mock_processor_test(processor_path, (params, datapackage, [])) self.assertEqual(str(cm.exception), error_msg)
def test_add_facebook_resource_processor_page_no_data(self, mock_api): '''Test facebook processor raises exception is no data is returned from api.''' mock_api.return_value = my_mock_api_no_response # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = {'entity': 'MyPage', 'project_id': 'my-project'} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_facebook_resource.py') # Trigger the processor with our mock `ingest` will return an exception error_msg = 'Facebook request returned no data.' with self.assertRaises(ValueError) as cm: mock_processor_test(processor_path, (params, datapackage, [])) self.assertEqual(str(cm.exception), error_msg)
def test_add_facebook_resource_processor_nopagetoken(self): '''Test facebook processor handles when no page token is available.''' # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = {'entity': 'NoPage', 'project_id': 'my-project'} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_facebook_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. error_msg = '\'No Facebook Page Access Token found for page ' \ '"NoPage" in settings\'' with self.assertRaises(KeyError) as cm: mock_processor_test(processor_path, (params, datapackage, [])) self.assertEqual(str(cm.exception), error_msg)
def test_add_ckan_resource_processor_api_key(self, mock_request): mock_request.get('https://demo.ckan.org/api/3/action/resource_show', json=MOCK_CKAN_RESPONSE) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'ckan-host': 'https://demo.ckan.org', 'resource-id': 'd51c9bd4-8256-4289-bdd7-962f8572efb0', 'ckan-api-key': 'my-api-key' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_ckan.processors.__file__) processor_path = os.path.join(processor_dir, 'add_ckan_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, [])) # test request contained a Authorization header with our api key request_history = mock_request.request_history assert request_history[0].headers['Authorization'] == 'my-api-key'
def test_add_rubygems_resource_not_json(self, mock_request): # Mock API responses mock_request.get('https://rubygems.org/api/v1/gems/mygem404.json', text="This is not json.") # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] # nothing here } params = {'gem_id': 'mygem404'} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_rubygems_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, iter([]))) # Trigger the processor with our mock `ingest` will return an exception with self.assertRaises(simplejson.scanner.JSONDecodeError): spew_res_iter = spew_args[1] # attempt access to spew_res_iter raises exception list(spew_res_iter)
def test_add_rubygems_resource_bad_status(self, mock_request): # Mock API responses error_msg = 'Hi, there was a problem with your request.' mock_request.get('https://rubygems.org/api/v1/gems/mygem401.json', text=error_msg, status_code=401) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] # nothing here } params = {'gem_id': 'mygem401'} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_rubygems_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, iter([]))) # Trigger the processor with our mock `ingest` will return an exception with self.assertRaises(Exception) as cm: spew_res_iter = spew_args[1] # attempt access to spew_res_iter raises exception list(spew_res_iter) self.assertEqual(str(cm.exception), error_msg)
def test_add_discourse_resource_not_json(self, m): '''Response isn't json error.''' # Mock API responses m.get('https://discourse.example.com/admin/users/list/active.json', text="bad response", status_code=200) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] # nothing here } params = {'domain': 'discourse.example.com'} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_discourse_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, iter([]))) # Trigger the processor with our mock `ingest` will return an exception with self.assertRaises(ValueError) as cm: spew_res_iter = spew_args[1] # attempt access to spew_res_iter raises exception list(spew_res_iter) error_msg = "Expected JSON in response from: https://discourse.example.com/admin/users/list/active.json?api_key=myfakediscoursetoken&page=1" # noqa self.assertEqual(str(cm.exception), error_msg)
def test_add_github_resource_processor_badstatus(self, mock_request): '''Github response returns bad status''' # mock the github response mock_repo_response = { "message": "API rate limit exceeded for user.", "documentation_url": "https://developer.github.com/v3/#rate-limiting" } mock_request.get('https://api.github.com/repos/org/my_github_repo', json=mock_repo_response, status_code=403) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'name': 'hello', 'repo': 'org/my_github_repo' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_github_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. with self.assertRaises(RuntimeError): spew_args, _ = mock_processor_test(processor_path, (params, datapackage, []))
def test_add_github_resource_processor_notjson(self, mock_request): '''Github response isn't json''' # mock the github response mock_repo_response = "Hi" mock_request.get('https://api.github.com/repos/org/my_github_repo', text=mock_repo_response) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'name': 'hello', 'repo': 'org/my_github_repo' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_github_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. with self.assertRaises(simplejson.scanner.JSONDecodeError): spew_args, _ = mock_processor_test(processor_path, (params, datapackage, []))
def test_validate_processor_valid_resource(self): # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [{ 'name': 'my-resource', 'schema': { 'fields': [{ 'name': 'id', 'type': 'integer' }, { 'name': 'name', 'type': 'string' }] } }] } report_dir = '{}'.format(self.get_base_path()) params = { 'reports_path': report_dir, 'datapackage_reports_path': 'reports', } def row_yielder(): yield {'id': 1, 'name': 'english'} yield {'id': 2, 'name': 'german'} # Path to the processor we want to test processor_dir = os.path.dirname( datapackage_pipelines_goodtables.processors.__file__) processor_path = os.path.join(processor_dir, 'validate.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test( processor_path, (params, datapackage, iter([row_yielder()]))) spew_dp = spew_args[0] spew_res_iter = spew_args[1] spew_res_iter_contents = list(spew_res_iter) # listing rows in resource will trigger validation list(spew_res_iter_contents[0]) # Asserts for the datapackage reports = spew_dp['reports'] assert len(reports) == 1 assert reports[0]['resource'] == 'my-resource' assert reports[0]['reportType'] == 'goodtables' assert reports[0]['path'] == 'reports/my-resource.json' with io.open('{}/my-resource.json'.format(report_dir), 'r') as f: report_json = json.loads(f.read()) assert report_json['valid'] is True
def test_validate_processor_no_resources_with_params(self): # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = {'fail_on_error': True, 'datapackage_reports_path': 'reports'} # Path to the processor we want to test processor_dir = os.path.dirname( datapackage_pipelines_goodtables.processors.__file__) processor_path = os.path.join(processor_dir, 'validate.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, [])) spew_dp = spew_args[0] # Asserts for the datapackage dp_resources = spew_dp['resources'] # No resources assert len(dp_resources) == 0 assert len(spew_dp['reports']) == 0
def test_add_ckan_resource_processor_misc_error(self, mock_request): mock_request.get('https://demo.ckan.org/api/3/action/resource_show', json=MOCK_CKAN_ERROR) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'ckan-host': 'https://demo.ckan.org', 'resource-id': 'd51c9bd4-8256-4289-bdd7-962f8572efb0' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_ckan.processors.__file__) processor_path = os.path.join(processor_dir, 'add_ckan_resource.py') # Trigger the processor with our mock `ingest` will return an exception with self.assertRaises(Exception): spew_args, _ = mock_processor_test(processor_path, (params, datapackage, []))
def test_achanges_acl_handles_non_existing_keys(self): # Should be in setup but requires mock s3 = boto3.client('s3', endpoint_url=os.environ['S3_ENDPOINT_URL']) bucket = 'my.private.bucket' try: s3.create_bucket(ACL='public-read', Bucket=bucket) except: pass params = { 'bucket': bucket, 'path': 'my/non-existing/datasets', 'acl': 'private' } processor_dir = os.path.dirname( datapackage_pipelines_aws.processors.__file__) processor_path = os.path.join(processor_dir, 'change_acl.py') spew_args, _ = mock_processor_test(processor_path, (params, { 'name': 'test', 'resources': [] }, [[]])) dp = spew_args[0] # Just make sure processor executed without errors (must not do anything) self.assertEqual(dp['name'], 'test')
def test_add_github_resource_processor(self, mock_request): # mock the github response mock_github_response = { 'name': 'my-repository', 'subscribers_count': 4, 'stargazers_count': 1 } mock_request.get(requests_mock.ANY, json=mock_github_response) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'name': 'hello', 'repo': 'my_github_repo', 'map_fields': { 'repository': 'name', 'watchers': 'subscribers_count', 'stars': 'stargazers_count' } } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_github_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, [])) spew_dp = spew_args[0] spew_res_iter = spew_args[1] # Asserts for the datapackage dp_resources = spew_dp['resources'] assert len(dp_resources) == 1 assert dp_resources[0]['name'] == 'hello' field_names = \ [field['name'] for field in dp_resources[0]['schema']['fields']] assert field_names == [ 'repository', 'watchers', 'stars', 'source', 'date' ] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) assert len(spew_res_iter_contents) == 1 assert list(spew_res_iter_contents[0]) == \ [{ 'repository': 'my-repository', 'watchers': 4, 'stars': 1, 'source': 'github', 'date': datetime.date.today() }]
def test_validate_processor_invalid_resource_fail_on_error_no_report(self): '''Fail on error but don't write report.''' # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [{ 'name': 'resource-fail-no-write', 'schema': { 'fields': [{ 'name': 'id', 'type': 'string' }, { 'name': 'name', 'type': 'string' }] } }] } report_dir = '{}'.format(self.get_base_path()) params = { 'reports_path': report_dir, 'fail_on_error': True, 'write_report': False } def row_yielder(): # id not a string causes goodtables error yield {'id': 1, 'name': 'english'} # Path to the processor we want to test processor_dir = os.path.dirname( datapackage_pipelines_goodtables.processors.__file__) processor_path = os.path.join(processor_dir, 'validate.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test( processor_path, (params, datapackage, iter([row_yielder()]))) spew_res_iter = spew_args[1] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) error_msg = 'Datapackage resource \'resource-fail-no-write\' ' \ 'failed Goodtables validation.' # listing rows in resource will trigger validation with self.assertRaises(RuntimeError) as cm: list(spew_res_iter_contents[0]) self.assertEqual(str(cm.exception), error_msg) assert not os.path.isfile( '{}/resource-fail-no-write.json'.format(report_dir))
def test_add_rubygems_resource_processor(self, mock_request): '''No latest in database. Get today's data.''' # mock the rubygems response mock_rubygems_response = { 'name': 'mygem', 'downloads': 271, 'version': '0.3.1', 'version_downloads': 170 } mock_request.get('https://rubygems.org/api/v1/gems/mygem.json', json=mock_rubygems_response) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = {'gem_id': 'mygem'} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_rubygems_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, [])) spew_dp = spew_args[0] spew_res_iter = spew_args[1] # Asserts for the datapackage dp_resources = spew_dp['resources'] assert len(dp_resources) == 1 assert dp_resources[0]['name'] == 'mygem' field_names = \ [field['name'] for field in dp_resources[0]['schema']['fields']] assert field_names == [ 'source', 'date', 'package', 'downloads', 'total_downloads' ] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) assert len(spew_res_iter_contents) == 1 assert list(spew_res_iter_contents[0]) == \ [{ 'package': 'mygem', 'source': 'rubygems', 'total_downloads': 271, 'date': datetime.date.today() }]
def test_add_twitter_resource_processor_account( self, mock_api, mock_auth, mock_cursor, ): '''Test twitter processor handles user account (@myuser) properties.''' # mock the twitter api response mock_auth.return_value = 'authed' mock_api.return_value = my_mock_api mock_cursor.return_value.items.side_effect = [ get_cursor_items_iter(my_mock_api.search()), get_cursor_items_iter(my_mock_api.user_timeline()) ] # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'entity': '@myuser', 'project_id': 'my-project' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_twitter_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = \ mock_processor_test(processor_path, (params, datapackage, [])) spew_res_iter = spew_args[1] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) # One rows in first resource assert len(spew_res_iter_contents[0]) == 1 # Get row from resource first_row = list(spew_res_iter_contents[0])[0] # followers is updated from api assert first_row['date'] == \ datetime.date.today() - datetime.timedelta(days=1) # the others are updated from today's stored result assert first_row['mentions'] == 2 assert first_row['interactions'] == 15 assert first_row['followers'] == 5
def test_add_ga_resource_processor_no_latest(self): '''No latest in db, so populate from GA request.''' # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] # nothing here } params = { 'domain': { 'url': 'sub.example.com', 'viewid': '123456' } } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_ga_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, iter([]))) # spew_dp = spew_args[0] spew_res_iter = spew_args[1] # one resource resources = list(spew_res_iter) assert len(resources) == 1 # rows in resource rows = list(resources)[0] assert len(rows) == 4 # first row asserts assert rows[0] == { 'date': dateutil.parser.parse('2017-05-15').date(), 'page_path': '/', 'visitors': 1, 'unique_visitors': 1, 'avg_time_spent': 2, 'domain': 'sub.example.com', 'source': 'ga' } # last row asserts assert rows[len(rows)-1]['visitors'] == 55 assert rows[len(rows)-1]['unique_visitors'] == 89 assert rows[len(rows)-1]['avg_time_spent'] == 144 assert rows[len(rows)-1]['date'] == \ dateutil.parser.parse('2017-05-18').date()
def test_validate_processor_valid_resource_no_report(self): '''Don't write report if `write_report` is False.''' # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [{ 'name': 'valid-no-report', 'schema': { 'fields': [{ 'name': 'id', 'type': 'integer' }, { 'name': 'name', 'type': 'string' }] } }] } report_dir = '{}'.format(self.get_base_path()) params = {'reports_path': report_dir, 'write_report': False} def row_yielder(): yield {'id': 1, 'name': 'english'} yield {'id': 2, 'name': 'german'} # Path to the processor we want to test processor_dir = os.path.dirname( datapackage_pipelines_goodtables.processors.__file__) processor_path = os.path.join(processor_dir, 'validate.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test( processor_path, (params, datapackage, iter([row_yielder()]))) spew_dp = spew_args[0] spew_res_iter = spew_args[1] spew_res_iter_contents = list(spew_res_iter) # listing rows in resource will trigger validation list(spew_res_iter_contents[0]) # No `reports` property in datapackage assert 'reports' not in spew_dp assert not os.path.isfile('{}/valid-no-report.json'.format(report_dir))
def test_puts_datapackage_on_s3(self): # Should be in setup but requires mock s3 = boto3.resource('s3', endpoint_url=os.environ['S3_ENDPOINT_URL']) bucket = s3.Bucket(self.bucket) class TempList(list): pass res = TempList([{ 'Date': datetime.datetime(2001, 2, 3), 'Name': 'Name' }]) res.spec = self.resources[0] res_iter = [res] spew_args, _ = mock_processor_test( self.processor_path, (self.params, self.datapackage, res_iter)) spew_res_iter = spew_args[1] # We need to actually read the rows to ecexute the iterator(s) rows = [list(res) for res in spew_res_iter] keys = [key.key for key in bucket.objects.all()] dp_path = 'my/test/path/me/my-datapackage/latest/datapackage.json' csv_path = 'my/test/path/me/my-datapackage/latest/data/test.csv' assert dp_path in keys assert csv_path in keys # Check datapackage.json content dpjson = s3.Object(self.bucket, dp_path).get() content = dpjson['Body'].read().decode("utf-8") self.assertEquals(json.loads(content)['owner'], 'me') self.assertEquals(json.loads(content)['name'], 'my-datapackage') self.assertEqual(dpjson['ContentType'], self.params['content_type']) # Check csv content obj = s3.Object(self.bucket, csv_path).get() content = obj['Body'].read().decode("utf-8") expected_csv = 'Date,Name\r\n2001-02-03,Name\r\n' self.assertEquals(content, expected_csv) self.assertEqual(obj['ContentType'], self.params['content_type'])
def test_add_facebook_resource_processor_page(self, mock_api): '''Test facebook processor handles page properties. ''' mock_api.return_value = my_mock_api # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = {'entity': 'MyPage', 'project_id': 'my-project'} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_facebook_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = \ mock_processor_test(processor_path, (params, datapackage, [])) spew_res_iter = spew_args[1] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) # One resource assert len(spew_res_iter_contents) == 1 # With one row assert len(spew_res_iter_contents[0]) == 1 row = list(spew_res_iter_contents[0])[0] log.debug(row) assert row['followers'] == 16689 assert row['mentions'] == 13 assert row['interactions'] == 5 assert row['impressions'] == 2 assert row['date'] == \ datetime.date.today() - datetime.timedelta(days=1)
def test_add_ckan_resource_processor(self, mock_request): mock_request.get('https://demo.ckan.org/api/3/action/resource_show', json=MOCK_CKAN_RESPONSE) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'ckan-host': 'https://demo.ckan.org', 'resource-id': 'd51c9bd4-8256-4289-bdd7-962f8572efb0' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_ckan.processors.__file__) processor_path = os.path.join(processor_dir, 'add_ckan_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, [])) spew_dp = spew_args[0] spew_res_iter = spew_args[1] # Asserts for the datapackage dp_resources = spew_dp['resources'] assert len(dp_resources) == 1 assert dp_resources[0]['name'] == 'january-2012' assert dp_resources[0]['format'] == 'csv' assert dp_resources[0]['dpp:streamedFrom'] == \ MOCK_CKAN_RESPONSE['result']['url'] assert 'schema' not in dp_resources[0] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) assert len(spew_res_iter_contents) == 0
def test_add_ga_resource_processor_no_row_returned(self, mock_discovery): '''No rows returned from GA response.''' ga_response = {'reports': [{'data': {}}]} mock_discovery \ .build.return_value \ .reports.return_value \ .batchGet.return_value \ .execute.return_value = ga_response # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = {'domain': {'url': 'sub.example.com', 'viewid': '123456'}} # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_ga_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = \ mock_processor_test(processor_path, (params, datapackage, iter([]))) # spew_dp = spew_args[0] spew_res_iter = spew_args[1] # one resource resources = list(spew_res_iter) assert len(resources) == 1 # rows in resource rows = list(resources)[0] assert len(rows) == 0
def test_add_mailchimp_resource_bad_status(self, m): error_msg = 'Hi, there was a problem with your request.' bad_response = { 'detail': error_msg } # Mock API responses m.get('https://dc1.api.mailchimp.com/3.0/lists/123456', json=bad_response, status_code=401) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] # nothing here } params = { 'list_id': '123456' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_mailchimp_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test(processor_path, (params, datapackage, iter([]))) # Trigger the processor with our mock `ingest` will return an exception with self.assertRaises(Exception) as cm: spew_res_iter = spew_args[1] # attempt access to spew_res_iter raises exception list(spew_res_iter) self.assertEqual(str(cm.exception), error_msg)
def test_index(self): # Should be in setup but requires mock class TempList(list): pass res = TempList([{ 'Date': datetime.datetime(2001, 2, 3), 'Name': 'Name' }]) res.spec = self.resources[0] res_iter = [res] spew_args = mock_processor_test( self.processor_path, (self.params, self.datapackage, res_iter)) spew_res_iter = spew_args[0][1] # We need to actually read the rows to execute the iterator(s) rows = [list(res) for res in spew_res_iter] Elasticsearch().indices.flush() records = Elasticsearch().search(index='dummy') records = [r['_source'] for r in records['hits']['hits']] assert records == [{'Date': '2001-02-03T00:00:00', 'Name': 'Name'}]
def test_add_rubygems_resource_processor_latest_yesterday( self, mock_request): '''Latest was yesterday. Get today's data, and add `downloads`.''' # mock the rubygems response mock_rubygems_response = { 'name': 'mygem', 'downloads': 271, 'version': '0.3.1', 'version_downloads': 170 } mock_request.get('https://rubygems.org/api/v1/gems/mygem.json', json=mock_rubygems_response) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [{ 'name': 'latest-project-entries', 'schema': { 'fields': [{ 'name': 'source', 'type': 'string' }, { 'name': 'date', 'type': 'date' }, { 'name': 'package', 'type': 'string' }, { 'name': 'downloads', 'type': 'int' }, { 'name': 'total_downloads', 'type': 'int' }] } }] } params = {'gem_id': 'mygem'} # latest is yest def latest_entries_res(): yield { 'date': datetime.date.today() - datetime.timedelta(days=1), 'downloads': 7, 'total_downloads': 265, 'package': 'mygem', 'source': 'rubygems' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_rubygems_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = mock_processor_test( processor_path, (params, datapackage, iter([latest_entries_res()]))) spew_dp = spew_args[0] spew_res_iter = spew_args[1] # Asserts for the datapackage dp_resources = spew_dp['resources'] assert len(dp_resources) == 2 assert dp_resources[0]['name'] == 'latest-project-entries' assert dp_resources[1]['name'] == 'mygem' field_names = \ [field['name'] for field in dp_resources[0]['schema']['fields']] assert field_names == [ 'source', 'date', 'package', 'downloads', 'total_downloads' ] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) assert len(spew_res_iter_contents) == 2 assert list(spew_res_iter_contents[1]) == \ [{ 'package': 'mygem', 'source': 'rubygems', 'downloads': 6, 'total_downloads': 271, 'date': datetime.date.today() }]
def test_add_twitter_resource_processor_url(self, mock_api, mock_auth, mock_cursor): '''Test twitter processor handles url entities (url:<term>).''' # mock the twitter api response mock_auth.return_value = 'authed' mock_api.return_value = my_mock_api yesterday = \ datetime.datetime.now() - datetime.timedelta(days=1) yesterdays_statuses = [ Status('okfnlabs', 1, 5, yesterday), Status('anonymous', 3, 0, yesterday), Status('anonymous', 3, 8, yesterday) ] mock_cursor.return_value.items.side_effect = [ get_cursor_items_iter(yesterdays_statuses) ] # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] } params = { 'entity': 'url:example.com', 'project_id': 'my-project' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_twitter_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = \ mock_processor_test(processor_path, (params, datapackage, [])) spew_dp = spew_args[0] spew_res_iter = spew_args[1] # Asserts for the datapackage dp_resources = spew_dp['resources'] assert len(dp_resources) == 1 assert dp_resources[0]['name'] == 'url-example-com' field_names = \ [field['name'] for field in dp_resources[0]['schema']['fields']] assert field_names == ['entity', 'entity_type', 'source', 'date', 'mentions', 'interactions', 'followers'] # Asserts for the res_iter spew_res_iter_contents = list(spew_res_iter) assert len(spew_res_iter_contents[0]) == 1 first_row = spew_res_iter_contents[0][0] assert first_row == \ { 'entity': 'url:example.com', 'entity_type': 'url', 'source': 'twitter', 'followers': None, 'mentions': 3, 'interactions': 20, 'date': datetime.date.today() - datetime.timedelta(days=1) }
def test_add_pypi_resource_processor_no_latest(self, mock_discovery): '''No latest in db, so populate from big query request.''' bq_response = { 'rows': [{ 'f': [{ 'v': 'my_package' }, { 'v': '2017-05-14' }, { 'v': '6' }] }, { 'f': [{ 'v': 'my_package' }, { 'v': '2017-05-15' }, { 'v': '12' }] }, { 'f': [{ 'v': 'my_package' }, { 'v': '2017-05-16' }, { 'v': '24' }] }] } mock_discovery \ .build.return_value \ .jobs.return_value \ .query.return_value \ .execute.return_value = bq_response # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [] # nothing here } params = { 'name': 'hello', 'package': 'my_package', 'project_id': 'my-project' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_pypi_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = \ mock_processor_test(processor_path, (params, datapackage, iter([]))) # spew_dp = spew_args[0] spew_res_iter = spew_args[1] # one resource resources = list(spew_res_iter) assert len(resources) == 1 # rows in resource rows = list(resources)[0] assert len(rows) == 3 # first row asserts assert rows[0] == { 'date': dateutil.parser.parse('2017-05-14').date(), 'downloads': 6, 'package': 'my_package', 'source': 'pypi' } # last row asserts assert rows[len(rows) - 1]['downloads'] == 24 assert rows[len(rows)-1]['date'] == \ dateutil.parser.parse('2017-05-16').date()
def test_add_pypi_resource_processor_latest_week_old(self, mock_discovery): '''Latest in db is a week old, so fetch new data.''' bq_response = { 'rows': [{ 'f': [{ 'v': 'my_package' }, { 'v': '2017-05-14' }, { 'v': '6' }] }, { 'f': [{ 'v': 'my_package' }, { 'v': '2017-05-15' }, { 'v': '12' }] }, { 'f': [{ 'v': 'my_package' }, { 'v': '2017-05-16' }, { 'v': '24' }] }] } mock_discovery \ .build.return_value \ .jobs.return_value \ .query.return_value \ .execute.return_value = bq_response # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [{ 'name': 'latest-project-entries', 'schema': { 'fields': [ { 'name': 'date', 'type': 'date' }, { 'name': 'downloads', 'type': 'int' }, { 'name': 'package', 'type': 'string' }, { 'name': 'source', 'type': 'string' }, ] } }] } params = { 'name': 'hello', 'package': 'my_package', 'project_id': 'my-project' } def latest_entries_res(): yield { 'date': dateutil.parser.parse('2017-05-13').date(), 'downloads': 3, 'package': 'my_package', 'source': 'pypi' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_pypi_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = \ mock_processor_test(processor_path, (params, datapackage, iter([latest_entries_res()]))) # spew_dp = spew_args[0] spew_res_iter = spew_args[1] # two resources resources = list(spew_res_iter) assert len(resources) == 2 # first resource will look like latest_entries_res assert list(resources[0])[0] == next(latest_entries_res()) # rows in resource rows = list(resources)[1] assert len(rows) == 3 # first row asserts assert rows[0] == { 'date': dateutil.parser.parse('2017-05-14').date(), 'downloads': 6, 'package': 'my_package', 'source': 'pypi' } # last row asserts assert rows[len(rows) - 1]['downloads'] == 24 assert rows[len(rows)-1]['date'] == \ dateutil.parser.parse('2017-05-16').date()
def test_add_npm_resource_processor_empty_latest(self, mock_request): '''latest-project-entries is present, but empty.''' day_range = 5 now = datetime.datetime.now() # package created five days ago created = now - datetime.timedelta(days=day_range) created = created.strftime("%Y-%m-%d") mock_registry = {'time': {'created': created}} mock_api_responses = [] for day in reversed(range(1, day_range + 1)): start = now - datetime.timedelta(days=day) start = start.strftime("%Y-%m-%d") mock_api_responses.append({ 'json': { 'downloads': day, 'start': start, 'end': start, 'package': 'my_package' }, 'status_code': 200 }) mock_request.get('https://registry.npmjs.org/my_package', json=mock_registry) matcher = re.compile('api.npmjs.org/downloads/point/') mock_request.get(matcher, mock_api_responses) # input arguments used by our mock `ingest` datapackage = { 'name': 'my-datapackage', 'project': 'my-project', 'resources': [{ 'name': 'latest-project-entries', 'schema': { 'fields': [ { 'name': 'date', 'type': 'date' }, { 'name': 'downloads', 'type': 'int' }, { 'name': 'package', 'type': 'string' }, { 'name': 'source', 'type': 'string' }, ] } }] } params = { 'name': 'hello', 'package': 'my_package', 'project_id': 'my-project' } # Path to the processor we want to test processor_dir = \ os.path.dirname(datapackage_pipelines_measure.processors.__file__) processor_path = os.path.join(processor_dir, 'add_npm_resource.py') # Trigger the processor with our mock `ingest` and capture what it will # returned to `spew`. spew_args, _ = \ mock_processor_test(processor_path, (params, datapackage, iter([{}]))) # spew_dp = spew_args[0] spew_res_iter = spew_args[1] # two resources resources = list(spew_res_iter) assert len(resources) == 2 # No rows in first resource assert len(list(resources[0])) == 0 # rows in second resource rows = list(resources)[1] assert len(rows) == 5 # row asserts assert rows[0] == { 'date': datetime.date.today() - datetime.timedelta(days=5), 'downloads': 5, 'package': 'my_package', 'source': 'npm' } assert rows[len(rows) - 1]['downloads'] == 1 assert rows[len(rows)-1]['date'] == \ datetime.date.today() - datetime.timedelta(days=1)