def _test_harvest_twice(self, content_first_harvest, content_second_harvest): '''Based on _test_harvest_update_resources''' url = self.json_mock_url content_type = self.json_content_type # Mock the GET request to get the file httpretty.register_uri(httpretty.GET, url, body=content_first_harvest, content_type=content_type) # The harvester will try to do a HEAD request first so we need to mock # this as well httpretty.register_uri(httpretty.HEAD, url, status=405, content_type=content_type) kwargs = {'source_type': 'dcat_json'} harvest_source = self._create_harvest_source(url, **kwargs) # First run, create the dataset with the resource self._run_full_job(harvest_source['id'], num_objects=1) # Run the jobs to mark the previous one as Finished self._run_jobs() # get the created dataset fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) results = h.call_action('package_search', {}, fq=fq) eq_(results['count'], 1) existing_dataset = results['results'][0] existing_resources = existing_dataset.get('resources') # Mock an update in the remote dataset. # Change title just to be sure we harvest ok content_second_harvest = \ content_second_harvest.replace('Example dataset 1', 'Example dataset 1 (updated)') httpretty.register_uri(httpretty.GET, url, body=content_second_harvest, content_type=content_type) # Run a second job self._run_full_job(harvest_source['id']) # get the updated dataset new_results = h.call_action('package_search', {}, fq=fq) eq_(new_results['count'], 1) new_dataset = new_results['results'][0] new_resources = new_dataset.get('resources') eq_(existing_dataset['title'], 'Example dataset 1') eq_(new_dataset['title'], 'Example dataset 1 (updated)') return (existing_resources, new_resources)
def test_invalid_unknown_type(self): source_dict = self._get_source_dict() source_dict['source_type'] = 'unknown' with pytest.raises(ValidationError) as e: helpers.call_action(self.action, **source_dict) assert u'Unknown harvester type' in e.value.error_dict['source_type'][0]
def test_invalid_unknown_frequency(self): wrong_frequency = 'ANNUALLY' source_dict = self._get_source_dict() source_dict['frequency'] = wrong_frequency with pytest.raises(ValidationError) as e: helpers.call_action(self.action, **source_dict) assert u'Frequency {0} not recognised'.format(wrong_frequency) in e.value.error_dict['frequency'][0]
def test_remote_groups_create(self): config = {'remote_groups': 'create'} results_by_guid = run_harvest( url='http://localhost:%s' % mock_ckan.PORT, harvester=CKANHarvester(), config=json.dumps(config)) assert 'dataset1-id' in results_by_guid # Check that the remote group was created locally call_action('group_show', {}, id=mock_ckan.GROUPS[0]['id'])
def _test_harvest_twice(self, content_first_harvest, content_second_harvest): '''Based on _test_harvest_update_resources''' url = self.json_mock_url content_type = self.json_content_type # Mock the GET request to get the file httpretty.register_uri(httpretty.GET, url, body=content_first_harvest, content_type=content_type) # The harvester will try to do a HEAD request first so we need to mock # this as well httpretty.register_uri(httpretty.HEAD, url, status=405, content_type=content_type) kwargs = {'source_type': 'dcat_json'} harvest_source = self._create_harvest_source(url, **kwargs) # First run, create the dataset with the resource self._run_full_job(harvest_source['id'], num_objects=1) # Run the jobs to mark the previous one as Finished self._run_jobs() # get the created dataset fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) results = h.call_action('package_search', {}, fq=fq) eq_(results['count'], 1) existing_dataset = results['results'][0] existing_resources = existing_dataset.get('resources') # Mock an update in the remote dataset. # Change title just to be sure we harvest ok content_second_harvest = \ content_second_harvest.replace('Example dataset 1', 'Example dataset 1 (updated)') httpretty.register_uri(httpretty.GET, url, body=content_second_harvest, content_type=content_type) # Run a second job self._run_full_job(harvest_source['id']) # get the updated dataset new_results = h.call_action('package_search', {}, fq=fq) eq_(new_results['count'], 1) new_dataset = new_results['results'][0] new_resources = new_dataset.get('resources') eq_(existing_dataset['title'], 'Example dataset 1') eq_(new_dataset['title'], 'Example dataset 1 (updated)') return (existing_resources, new_resources)
def test_invalid_missing_values(self): source_dict = {} test_data = self._get_source_dict() if 'id' in test_data: source_dict['id'] = test_data['id'] with pytest.raises(ValidationError) as e: helpers.call_action(self.action, **source_dict) for key in ('name', 'title', 'url', 'source_type'): assert e.value.error_dict[key] == [u'Missing value']
def setup(self): harvest_model.setup() queue.purge_queues() requests.get(clear_solr_url) user_dict = h.call_action('user_create', name='testuser', email='*****@*****.**', password='******') org_context = {'user': user_dict['name'], 'return_id_only': True} org_data_dict = {'name': 'geocat_org'} self.org_id = h.call_action('organization_create', org_context, **org_data_dict)
def test_invalid_wrong_configuration(self): source_dict = self._get_source_dict() source_dict['config'] = 'not_json' with pytest.raises(ValidationError) as e: helpers.call_action(self.action, **source_dict) assert u'Error parsing the configuration options' in e.value.error_dict['config'][0] source_dict['config'] = json.dumps({'custom_option': 'not_a_list'}) with pytest.raises(ValidationError) as e: helpers.call_action(self.action, **source_dict) assert u'Error parsing the configuration options: custom_option must be a list' in e.value.error_dict['config'][0]
def test_resource_form_create(self, app): dataset = Dataset(type="test-subfields", citation=[{ 'originator': 'na' }]) env, response = _get_resource_new_page_as_sysadmin(app, dataset["id"]) url = ckantoolkit.h.url_for("test-subfields_resource.new", id=dataset["id"]) if not url.startswith('/'): # ckan < 2.9 url = '/dataset/new_resource/' + dataset["id"] data = {"id": "", "save": ""} data["schedule-0-impact"] = "P" try: app.post(url, environ_overrides=env, data=data, follow_redirects=False) except TypeError: app.post(url.encode('ascii'), params=data, extra_environ=env) dataset = call_action("package_show", id=dataset["id"]) assert dataset["resources"][0]["schedule"] == [{"impact": "P"}]
def test_dataset_form_update(self): value = { 'a': 1, 'b': 2, } dataset = Dataset( type='test-schema', a_json_field=value) app = self._get_test_app() env, response = _get_package_update_page_as_sysadmin( app, dataset['id']) form = response.forms['dataset-edit'] assert_equals(form['a_json_field'].value, json.dumps(value, indent=2)) value = { 'a': 1, 'b': 2, 'c': 3, } json_value = json.dumps(value) form['a_json_field'] = json_value submit_and_follow(app, form, env, 'save') dataset = call_action('package_show', id=dataset['id']) assert_equals(dataset['a_json_field'], value)
def test_package_create_with_group_permission(self): user = factories.User(name='user2') owner_org = factories.Organization(users=[{ 'name': user['name'], 'capacity': 'editor' }]) group = factories.Group(name='economy', users=[{ 'name': user['name'], 'capacity': 'member' }]) context = {'user': user['name'], 'ignore_auth': False} params = { 'name': 'test-dataset-2', 'notes': 'This is a test', 'tag_string': 'geography', 'accessLevel': 'public', 'contact_name': 'John Smith', 'contact_email': '*****@*****.**', 'rights': 'No restrictions on public use', 'accrualPeriodicity': 'R/P1W', 'owner_org': owner_org['id'], 'group': 'economy' } dataset = helpers.call_action('package_create', context=context, **params) assert dataset['groups'][0]['name'] == 'economy'
def _test_harvest_update(self, num_objects, mock_dropzone, dropzone_path, meta_xml_path, **kwargs): harvest_source = self._create_harvest_source(**kwargs) # First run, will create datasets as previously tested self._run_full_job(harvest_source['id'], num_objects=num_objects) # Run the jobs to mark the previous one as Finished self._run_jobs() # change data in source with open(meta_xml_path, 'r') as meta_file: meta = meta_file.read() meta = meta.replace('</titel>', ' (updated)</titel>') with open(meta_xml_path, 'w') as meta_file: meta_file.write(meta) # add new file to dropzone dataset_path = os.path.join(__location__, 'fixtures', mock_dropzone, 'test_dataset') shutil.copytree(dataset_path, os.path.join(dropzone_path, 'test_dataset')) # Run a second job self._run_full_job(harvest_source['id'], num_objects=num_objects + 1) # Check that we still have two datasets fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) results = h.call_action('package_search', {}, fq=fq) eq_(results['count'], num_objects + 1) return results
def _test_harvest_create( self, url, content, content_type, num_datasets=2, exp_num_datasets=2, exp_titles=[], **kwargs ): # Mock the GET request to get the file httpretty.register_uri(httpretty.GET, url, body=content, content_type=content_type) # The harvester will try to do a HEAD request first so we need to mock # this as well httpretty.register_uri(httpretty.HEAD, url, status=405, content_type=content_type) kwargs['source_type'] = 'dcat_json' harvest_source = self._create_harvest_source(url, **kwargs) self._run_full_job(harvest_source['id'], num_objects=num_datasets) fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) results = h.call_action('package_search', {}, fq=fq) eq_(results['count'], exp_num_datasets) if exp_titles: for result in results['results']: assert result['title'] in exp_titles
def test_resource_form_create(self, app): dataset = Dataset(type="test-schema") env, response = _get_resource_new_page_as_sysadmin(app, dataset["id"]) url = ckantoolkit.h.url_for("test-schema_resource.new", id=dataset["id"]) if not url.startswith('/'): # ckan < 2.9 url = '/dataset/new_resource/' + dataset["id"] value = {"a": 1, "b": 2} json_value = json.dumps(value) data = { "id": "", "save": "", "url": "http://example.com/data.csv", "a_resource_json_field": json_value, "name": dataset["name"], } try: app.post(url, environ_overrides=env, data=data, follow_redirects=False) except TypeError: app.post(url.encode('ascii'), params=data, extra_environ=env) dataset = call_action("package_show", id=dataset["id"]) assert dataset["resources"][0]["a_resource_json_field"] == value
def test_resource_form_update(self): value = { 'a': 1, 'b': 2, } dataset = Dataset( type='test-schema', resources=[{ 'url': 'http://example.com/data.csv', 'a_resource_json_field': value }] ) app = self._get_test_app() env, response = _get_resource_update_page_as_sysadmin( app, dataset['id'], dataset['resources'][0]['id']) form = response.forms['resource-edit'] assert_equals(form['a_resource_json_field'].value, json.dumps(value, indent=2)) value = { 'a': 1, 'b': 2, 'c': 3, } json_value = json.dumps(value) form['a_resource_json_field'] = json_value submit_and_follow(app, form, env, 'save') dataset = call_action('package_show', id=dataset['id']) assert_equals(dataset['resources'][0]['a_resource_json_field'], value)
def test_dataset_form_update(self, app): value = {"a": 1, "b": 2} dataset = Dataset(type="test-schema", a_json_field=value) env, response = _get_package_update_page_as_sysadmin( app, dataset["id"]) form = BeautifulSoup(response.body).select_one("#dataset-edit") assert form.select_one( "textarea[name=a_json_field]").text == json.dumps(value, indent=2) value = {"a": 1, "b": 2, "c": 3} json_value = json.dumps(value) data = { "save": "", "a_json_field": json_value, "name": dataset["name"], } url = '/dataset/edit/' + dataset["id"] try: app.post(url, environ_overrides=env, data=data, follow_redirects=False) except TypeError: app.post(url.encode('ascii'), params=data, extra_environ=env) dataset = call_action("package_show", id=dataset["id"]) assert dataset["a_json_field"] == value
def _test_harvest_create(self, url, content, content_type, num_datasets=2, exp_num_datasets=2, exp_titles=[], **kwargs): # Mock the GET request to get the file responses.add(responses.GET, url, body=content, content_type=content_type) # The harvester will try to do a HEAD request first so we need to mock # this as well responses.add(responses.HEAD, url, status=405, content_type=content_type) kwargs['source_type'] = 'dcat_json' harvest_source = self._create_harvest_source(url, **kwargs) self._run_full_job(harvest_source['id'], num_objects=num_datasets) fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) results = h.call_action('package_search', {}, fq=fq) eq_(results['count'], exp_num_datasets) if exp_titles: for result in results['results']: assert result['title'] in exp_titles
def test_resource_form_update(self): value = { 'a': 1, 'b': 2, } dataset = Dataset(type='test-schema', resources=[{ 'url': 'http://example.com/data.csv', 'a_resource_json_field': value }]) app = self._get_test_app() env, response = _get_resource_update_page_as_sysadmin( app, dataset['id'], dataset['resources'][0]['id']) form = response.forms['resource-edit'] assert_equals(form['a_resource_json_field'].value, json.dumps(value, indent=2)) value = { 'a': 1, 'b': 2, 'c': 3, } json_value = json.dumps(value) form['a_resource_json_field'] = json_value submit_and_follow(app, form, env, 'save') dataset = call_action('package_show', id=dataset['id']) assert_equals(dataset['resources'][0]['a_resource_json_field'], value)
def test_resource_form_update_upload(self): value = {'fields': [{'name': 'code'}, {'name': 'department'}]} dataset = Dataset(resources=[{ 'url': 'https://example.com/data.csv', 'schema': value }]) app = self._get_test_app() env, response = _get_resource_update_page_as_sysadmin( app, dataset['id'], dataset['resources'][0]['id']) form = response.forms['resource-edit'] assert_equals(form['schema_json'].value, json.dumps(value, indent=2)) value = { 'fields': [{ 'name': 'code' }, { 'name': 'department' }, { 'name': 'date' }] } json_value = json.dumps(value) upload = ('schema_upload', 'schema.json', json_value) form['url'] = 'https://example.com/data.csv' webtest_submit(form, 'save', upload_files=[upload], extra_environ=env) dataset = call_action('package_show', id=dataset['id']) assert_equals(dataset['resources'][0]['schema'], value)
def _test_harvest_create(self, num_objects, **kwargs): harvest_source = self._create_harvest_source(**kwargs) self._run_full_job(harvest_source['id'], num_objects=num_objects) # Check that two datasets were created fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) results = h.call_action('package_search', {}, fq=fq) eq_(results['count'], num_objects) return results
def _get_or_create_harvest_source(self, **kwargs): source_dict = { 'title': 'Geocat harvester', 'name': 'geocat-harvester', 'url': mock_url, 'source_type': 'geocat_harvester', 'owner_org': self.org_id } source_dict.update(**kwargs) try: harvest_source = h.call_action('harvest_source_show', {}, **source_dict) except Exception as e: harvest_source = h.call_action('harvest_source_create', {}, **source_dict) return harvest_source
def _create(cls, target_class, *args, **kwargs): if args: assert False, "Positional args aren't supported, use keyword args." context = {'user': _get_action_user_name(kwargs)} dataset_dict = helpers.call_action('event_create', context=context, **kwargs) return dataset_dict
def _create(cls, target_class, *args, **kwargs): if args: assert False, "Positional args aren't supported, use keyword args." context = {'user': _get_action_user_name(kwargs)} dataset_dict = helpers.call_action('powerview_create', context=context, **kwargs) return dataset_dict
def test_remote_groups_only_local(self): # Create an existing group Group(id='group1-id', name='group1') config = {'remote_groups': 'only_local'} results_by_guid = run_harvest(url='http://localhost:%s' % mock_ckan.PORT, harvester=CKANHarvester(), config=json.dumps(config)) assert 'dataset1-id' in results_by_guid # Check that the dataset was added to the existing local group dataset = call_action('package_show', {}, id=mock_ckan.DATASETS[0]['id']) assert dataset['groups'][0]['id'] == mock_ckan.DATASETS[0]['groups'][ 0]['id'] # Check that the other remote group was not created locally with pytest.raises(toolkit.ObjectNotFound): call_action('group_show', {}, id='remote-group')
def test_harvest_info_in_package_show(self): results_by_guid = run_harvest(url='http://localhost:%s' % mock_ckan.PORT, harvester=CKANHarvester()) assert 'dataset1-id' in results_by_guid # Check that the dataset extras has the harvest_object_id, harvest_source_id, and harvest_source_title dataset = call_action('package_show', {"for_view": True}, id=mock_ckan.DATASETS[0]['id']) extras_dict = dict((e['key'], e['value']) for e in dataset['extras']) assert 'harvest_object_id' in extras_dict assert 'harvest_source_id' in extras_dict assert 'harvest_source_title' in extras_dict
def test_pages_saves_custom_schema_fields(self, app): user = factories.Sysadmin() context = {'user': user['name']} mock_schema = schema.default_pages_schema() mock_schema.update({ 'new_field': [toolkit.get_validator('ignore_missing')], }) with mock.patch('ckanext.pages.actions.update_pages_schema', return_value=mock_schema): helpers.call_action( 'ckanext_pages_update', context=context, title='Page Title', name='page_name', page='page_name', new_field='new_field_value', content='test', ) pages = helpers.call_action('ckanext_pages_list', context) assert pages[0]['new_field'] == 'new_field_value'
def _create_harvest_source(self, **kwargs): source_dict = { 'title': 'Stadt ZH Source', 'name': 'test-stadtzh-source', 'url': 'http://stadthzh', 'source_type': 'stadtzh_harvester', } source_dict.update(**kwargs) harvest_source = h.call_action('harvest_source_create', {}, **source_dict) return harvest_source
def test_create(self): source_dict = self._get_source_dict() result = helpers.call_action('harvest_source_create', **source_dict) for key in source_dict.keys(): assert source_dict[key] == result[key] # Check that source was actually created source = harvest_model.HarvestSource.get(result['id']) assert source.url == source_dict['url'] assert source.type == source_dict['source_type'] # Trying to create a source with the same URL fails source_dict = self._get_source_dict() source_dict['name'] = 'test-source-action-new' with pytest.raises(ValidationError) as e: result = helpers.call_action('harvest_source_create', **source_dict) assert u'There already is a Harvest Source for this URL' in e.value.error_dict[ 'url'][0]
def _test_harvest_create(self, all_results_filename, single_results_filenames, num_objects, expected_packages, mocker, **kwargs): self._mock_csw_results(all_results_filename, single_results_filenames, mocker) harvest_source = self._get_or_create_harvest_source(**kwargs) self._run_full_job(harvest_source['id'], num_objects=num_objects) # Check that correct amount of datasets were created fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) results = h.call_action('package_search', {}, fq=fq) eq_(results['count'], expected_packages) return results
def test_resource_form_create_url(self): dataset = Dataset() app = self._get_test_app() env, response = _get_resource_new_page_as_sysadmin(app, dataset['id']) form = response.forms['resource-edit'] value = 'https://example.com/schemas.json' form['url'] = 'https://example.com/data.csv' form['schema_json'] = value submit_and_follow(app, form, env, 'save') dataset = call_action('package_show', id=dataset['id']) assert_equals(dataset['resources'][0]['schema'], value)
def _get_action_user_name(kwargs): '''Return the name of the user in kwargs, defaulting to the site user It can be overriden by explictly setting {'user': None} in the keyword arguments. In that case, this method will return None. ''' if 'user' in kwargs: user = kwargs['user'] else: user = helpers.call_action('get_site_user') if user is None: user_name = None else: user_name = user['name'] return user_name
def test_resource_form_update(self, app): value = {"a": 1, "b": 2} dataset = Dataset( type="test-schema", resources=[{ "url": "http://example.com/data.csv", "a_resource_json_field": value, }], ) env, response = _get_resource_update_page_as_sysadmin( app, dataset["id"], dataset["resources"][0]["id"]) form = BeautifulSoup(response.body).select_one("#resource-edit") assert form.select_one( "textarea[name=a_resource_json_field]").text == json.dumps( value, indent=2) url = ckantoolkit.h.url_for( "test-schema_resource.edit", id=dataset["id"], resource_id=dataset["resources"][0]["id"], ) if not url.startswith('/'): # ckan < 2.9 url = '/dataset/{ds}/resource_edit/{rs}'.format( ds=dataset["id"], rs=dataset["resources"][0]["id"]) value = {"a": 1, "b": 2, "c": 3} json_value = json.dumps(value) data = { "id": dataset["resources"][0]["id"], "save": "", "a_resource_json_field": json_value, "name": dataset["name"], } try: app.post(url, environ_overrides=env, data=data, follow_redirects=False) except TypeError: app.post(url.encode('ascii'), params=data, extra_environ=env) dataset = call_action("package_show", id=dataset["id"]) assert dataset["resources"][0]["a_resource_json_field"] == value
def _get_action_user_name(kwargs): '''Return the name of the user in kwargs, defaulting to the site user It can be overriden by explictly setting {'user': None} in the keyword arguments. In that case, this method will return None. ''' if 'user' in kwargs: user = kwargs['user'] else: user = helpers.call_action('get_site_user') if user is None: user_name = None else: user_name = user['name'] return user_name
def test_resource_form_create_json(self): dataset = Dataset() app = self._get_test_app() env, response = _get_resource_new_page_as_sysadmin(app, dataset['id']) form = response.forms['resource-edit'] value = {'fields': [{'name': 'code'}, {'name': 'department'}]} json_value = json.dumps(value) form['url'] = 'https://example.com/data.csv' form['schema_json'] = json_value submit_and_follow(app, form, env, 'save') dataset = call_action('package_show', id=dataset['id']) assert_equals(dataset['resources'][0]['schema'], value)
def test_dataset_show_without_format(self): dataset = factories.Dataset(notes='Test dataset') content = helpers.call_action('dcat_dataset_show', id=dataset['id']) # Parse the contents to check it's an actual serialization p = RDFParser() p.parse(content) dcat_datasets = [d for d in p.datasets()] eq_(len(dcat_datasets), 1) dcat_dataset = dcat_datasets[0] eq_(dcat_dataset['title'], dataset['title']) eq_(dcat_dataset['notes'], dataset['notes'])
def test_dataset_form_create(self): app = self._get_test_app() env, response = _get_package_new_page_as_sysadmin(app) form = response.forms['dataset-edit'] value = { 'a': 1, 'b': 2, } json_value = json.dumps(value) form['name'] = 'json_dataset_1' form['a_json_field'] = json_value submit_and_follow(app, form, env, 'save') dataset = call_action('package_show', id='json_dataset_1') assert_equals(dataset['a_json_field'], value)
def test_dataset_show_without_format(self): dataset = factories.Dataset( notes='Test dataset' ) content = helpers.call_action('dcat_dataset_show', id=dataset['id']) # Parse the contents to check it's an actual serialization p = RDFParser() p.parse(content) dcat_datasets = [d for d in p.datasets()] eq_(len(dcat_datasets), 1) dcat_dataset = dcat_datasets[0] eq_(dcat_dataset['title'], dataset['title']) eq_(dcat_dataset['notes'], dataset['notes'])
def test_resource_form_create(self): dataset = Dataset(type='test-schema') app = self._get_test_app() env, response = _get_resource_new_page_as_sysadmin(app, dataset['id']) form = response.forms['resource-edit'] value = { 'a': 1, 'b': 2, } json_value = json.dumps(value) form['url'] = 'http://example.com/data.csv' form['a_resource_json_field'] = json_value submit_and_follow(app, form, env, 'save') dataset = call_action('package_show', id=dataset['id']) assert_equals(dataset['resources'][0]['a_resource_json_field'], value)