def setup(self): self.pkgname = u'resourcetest' assert not model.Package.by_name(self.pkgname) assert model.Session.query(model.Resource).count() == 0 self.urls = [u'http://somewhere.com/', u'http://elsewhere.com/'] self.format = u'csv' self.description = u'Important part.' self.hash = u'abc123' self.alt_url = u'http://alturl' self.size = 200 self.label = 'labeltest' self.sort_order = '1' rev = model.repo.new_revision() pkg = model.Package(name=self.pkgname) model.Session.add(pkg) for url in self.urls: pr = model.Resource(url=url, format=self.format, description=self.description, hash=self.hash, alt_url=self.alt_url, extras={u'size': self.size}, package_id=pkg.id) pkg.resources_all.append(pr) pr = model.Resource(url="no_extra", format=self.format, description=self.description, hash=self.hash, package_id=pkg.id) pkg.resources_all.append(pr) model.repo.commit_and_remove()
def initial_data(self, clean_db): assert not model.Package.by_name(self.pkgname) assert model.Session.query(model.Resource).count() == 0 pkg = model.Package(name=self.pkgname) model.Session.add(pkg) for url in self.urls: pr = model.Resource( url=url, format=self.format, description=self.description, hash=self.hash, alt_url=self.alt_url, extras={u"size": self.size}, package_id=pkg.id, ) pkg.resources_all.append(pr) pr = model.Resource( url="no_extra", format=self.format, description=self.description, hash=self.hash, package_id=pkg.id, ) pkg.resources_all.append(pr) model.repo.commit_and_remove()
def create_updating_revision(self, name, **kwds): rev = model.repo.new_revision() rev.author = "Test Revision Updating" package = self.get_package(name) if 'resources' in kwds: resources = kwds.pop('resources') for resource in package.resource_groups_all[0].resources_all: resource.state = 'deleted' for resource in resources: resource = model.Resource(**resource) model.Session.add(resource) package.resource_groups_all[0].resources_all.append(resource) if 'extras' in kwds: extras_data = kwds.pop('extras') # extras = [] # for key,value in extras_data.items(): # extra = model.PackageExtra(key=key, value=value) # model.Session.add(extra) # extras.append(extra) for key, value in extras_data.items(): package.extras[key] = value for name, value in kwds.items(): setattr(package, name, value) model.Session.add(package) model.Session.commit() model.Session.remove() if not model.repo.history()[0].packages: raise Exception, "Didn't set up revision right."
def update_resources(self, res_dicts, autoflush=True): '''Change this package\'s resources. @param res_dicts - ordered list of dicts, each detailing a resource The resource dictionaries contain 'url', 'format' etc. Optionally they can also provide the 'id' of the Resource, to help matching res_dicts to existing Resources. Otherwise, it searches for an otherwise exactly matching Resource. The caller is responsible for creating a revision and committing.''' from ckan import model assert isinstance(res_dicts, (list, tuple)) # Map the incoming res_dicts (by index) to existing resources index_to_res = {} # Match up the res_dicts by id def get_resource_identity(resource_obj_or_dict): if isinstance(resource_obj_or_dict, dict): # Convert dict into a Resource object, since that ensures # all columns exist when you redictize it. This object is # garbage collected as it isn't added to the Session. res_keys = set(resource_obj_or_dict.keys()) - \ set(('id', 'position')) res_dict = dict([(res_key, resource_obj_or_dict[res_key]) \ for res_key in res_keys]) resource = model.Resource(**res_dict) else: resource = resource_obj_or_dict res_dict = resource.as_dict(core_columns_only=True) return res_dict existing_res_identites = [get_resource_identity(res) \ for res in self.resources] for i, res_dict in enumerate(res_dicts): assert isinstance(res_dict, dict) id = res_dict.get('id') if id: res = Session.query(model.Resource).autoflush(autoflush).get(id) if res: index_to_res[i] = res else: res_identity = get_resource_identity(res_dict) try: matching_res_index = existing_res_identites.index(res_identity) except ValueError: continue index_to_res[i] = self.resources[matching_res_index] # Edit resources and create the new ones new_res_list = [] for i, res_dict in enumerate(res_dicts): if i in index_to_res: res = index_to_res[i] for col in set(res_dict.keys()) - set(('id', 'position')): setattr(res, col, res_dict[col]) else: # ignore particular keys that disrupt creation of new resource for key in set(res_dict.keys()) & set(('id', 'position')): del res_dict[key] res = model.Resource(**res_dict) model.Session.add(res) new_res_list.append(res) self.resource_groups[0].resources = new_res_list
def _download_file(self, res, label): # We need this as a resource object to check access so create a dummy # obj and trick CKAN resource = model.Resource() for k in res.keys(): setattr(resource, k, res[k]) context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'for_view': True, 'auth_user_obj': c.userobj, 'resource': resource} data_dict = {'id': resource.id} try: logic.check_access('resource_show', context, data_dict) except logic.NotAuthorized: redirect_url = h.url_for(controller='user', action='login', came_from=resource.url) r = generate_response(303, u'Not authorized to read file ' + resource.id, other_headers={'Location': redirect_url, 'X-CKAN-Error': '403 Access Denied'}) return r exists = self.ofs.exists(BUCKET, label) if not exists: # handle erroneous trailing slash by redirecting to url w/o slash if label.endswith('/'): label = label[:-1] # This may be best being cached_url until we have moved it into # permanent storage file_url = h.url_for('storage_file', label=label) h.redirect_to(file_url) else: # abort(404) r = generate_response(404, u'File not found') return r file_url = self.ofs.get_url(BUCKET, label) if file_url.startswith("file://") or file_url.endswith('xlsx'): metadata = self.ofs.get_metadata(BUCKET, label) filepath = file_url[len("file://"):] headers = { # 'Content-Disposition':'attachment; filename="%s"' % label, 'Pragma': 'no-cache', 'Cache-Control': 'max-age=0, no-store, no-cache', 'Content-Type': metadata.get('_format', 'text/plain')} if resource.name: res_name = resource.name.replace('"', '_') res_name_encoded = res_name.encode('utf-8', 'ignore') file_name, file_extension = os.path.splitext(res_name_encoded) if file_extension == '' and resource.format: file_name = file_name + '.' + resource.format else: file_name = res_name_encoded headers[ 'Content-Disposition'] = 'inline; filename="{}"'.format(file_name) fapp = FileApp(filepath, headers=None, **headers) return fapp(request.environ, self.start_response) else: h.redirect_to(file_url.encode('ascii', 'ignore'))
def test_04_insert_resource(self): pkg = model.Package.by_name(self.pkgname) newurl = u'http://xxxxxxxxxxxxxxx' resource = model.Resource(url=newurl) pkg.resources_all.insert(0, resource) model.repo.commit_and_remove() pkg = model.Package.by_name(self.pkgname) assert len(pkg.resources) == 4, pkg.resources assert pkg.resources_all[1].url == self.urls[0]
def test_revision__resource_addition(self): rev = model.repo.new_revision() res = model.Resource(description="April to September 2010", format="CSV", url="http://www.barnsley.nhs.uk/spend.csv") model.Session.add(res) model.Package.by_name('latest').resource_groups_all[0].resources_all.append(res) model.repo.commit_and_remove() self._assert_revision_created() res = self._last_revision() assert_equal(res['datasets'][0]['name'], 'latest')
def get_resource_identity(resource_obj_or_dict): if isinstance(resource_obj_or_dict, dict): # Convert dict into a Resource object, since that ensures # all columns exist when you redictize it. This object is # garbage collected as it isn't added to the Session. res_keys = set(resource_obj_or_dict.keys()) - \ set(('id', 'position')) res_dict = dict([(res_key, resource_obj_or_dict[res_key]) \ for res_key in res_keys]) resource = model.Resource(**res_dict) else: resource = resource_obj_or_dict res_dict = resource.as_dict(core_columns_only=True) return res_dict
def test_04_insert_resource(self): pkg = model.Package.by_name(self.pkgname) rev = model.repo.new_revision() newurl = u'http://xxxxxxxxxxxxxxx' resource = model.Resource(url=newurl) rg = pkg.resource_groups_all[0] rg.resources_all.insert(0, resource) model.repo.commit_and_remove() rg = model.Package.by_name(self.pkgname).resource_groups_all[0] assert len(rg.package.resources) == 4, rg.resources assert rg.resources_all[1].url == self.urls[0] assert len(rg.resources_all[1].all_revisions) == 2
def resource_dict_save(res_dict: dict[str, Any], context: Context) -> 'model.Resource': model = context["model"] session = context["session"] id = res_dict.get("id") obj = None if id: obj = session.query(model.Resource).get(id) if not obj: new = True obj = model.Resource() else: new = False # Strip the full url for resources of type 'upload' if res_dict.get('url') and res_dict.get('url_type') == u'upload': res_dict['url'] = res_dict['url'].rsplit('/')[-1] # unconditionally ignored fields res_dict.pop('extras', None) res_dict.pop('revision_timestamp', None) res_dict.pop('tracking_summary', None) changed, skipped = obj.from_dict(res_dict) if 'url' in changed or ('last_modified' in changed and not new): obj.url_changed = True if changed or obj.extras != skipped: obj.metadata_modified = datetime.datetime.utcnow() obj.state = u'active' obj.extras = skipped session.add(obj) return obj
def create(cls, auth_profile="", package_type=None): model.Session.remove() rev = model.repo.new_revision() # same name as user we create below rev.author = cls.author rev.message = u'''Creating test data. * Package: annakarenina * Package: warandpeace * Associated tags, etc etc ''' if auth_profile == "publisher": organization_group = model.Group(name=u"organization_group", type="organization") cls.pkg_names = [u'annakarenina', u'warandpeace'] pkg1 = model.Package(name=cls.pkg_names[0], type=package_type) if auth_profile == "publisher": pkg1.group = organization_group model.Session.add(pkg1) pkg1.title = u'A Novel By Tolstoy' pkg1.version = u'0.7a' pkg1.url = u'http://datahub.io' # put an & in the url string to test escaping if 'alt_url' in model.Resource.get_extra_columns(): configured_extras = ({ 'alt_url': u'alt123' }, { 'alt_url': u'alt345' }) else: configured_extras = ({}, {}) pr1 = model.Resource( url=u'http://datahub.io/download/x=1&y=2', format=u'plain text', description=u'Full text. Needs escaping: " Umlaut: \xfc', hash=u'abc123', extras={'size_extra': u'123'}, **configured_extras[0]) pr2 = model.Resource(url=u'http://datahub.io/index.json', format=u'JSON', description=u'Index of the novel', hash=u'def456', extras={'size_extra': u'345'}, **configured_extras[1]) model.Session.add(pr1) model.Session.add(pr2) pkg1.resources_all.append(pr1) pkg1.resources_all.append(pr2) pkg1.notes = u'''Some test notes ### A 3rd level heading **Some bolded text.** *Some italicized text.* Foreign characters: u with umlaut \xfc 66-style quote \u201c foreign word: th\xfcmb Needs escaping: left arrow < <http://ckan.net/> ''' pkg2 = model.Package(name=cls.pkg_names[1], type=package_type) tag1 = model.Tag(name=u'russian') tag2 = model.Tag(name=u'tolstoy') if auth_profile == "publisher": pkg2.group = organization_group # Flexible tag, allows spaces, upper-case, # and all punctuation except commas tag3 = model.Tag(name=u'Flexible \u30a1') for obj in [pkg2, tag1, tag2, tag3]: model.Session.add(obj) pkg1.add_tags([tag1, tag2, tag3]) pkg2.add_tags([tag1, tag3]) cls.tag_names = [t.name for t in (tag1, tag2, tag3)] pkg1.license_id = u'other-open' pkg2.license_id = u'cc-nc' # closed license pkg2.title = u'A Wonderful Story' pkg1.extras = {u'genre': 'romantic novel', u'original media': 'book'} # group david = model.Group(name=u'david', title=u'Dave\'s books', description=u'These are books that David likes.', type=auth_profile or 'group') roger = model.Group(name=u'roger', title=u'Roger\'s books', description=u'Roger likes these books.', type=auth_profile or 'group') for obj in [david, roger]: model.Session.add(obj) cls.group_names.add(u'david') cls.group_names.add(u'roger') model.Session.flush() model.Session.add( model.Member(table_id=pkg1.id, table_name='package', group=david)) model.Session.add( model.Member(table_id=pkg2.id, table_name='package', group=david)) model.Session.add( model.Member(table_id=pkg1.id, table_name='package', group=roger)) # authz sysadmin = model.User(name=u'testsysadmin', password=u'testsysadmin') sysadmin.sysadmin = True model.Session.add_all([ model.User(name=u'tester', apikey=u'tester', password=u'tester'), model.User(name=u'joeadmin', password=u'joeadmin'), model.User( name=u'annafan', about= u'I love reading Annakarenina. My site: http://datahub.io', password=u'annafan'), model.User(name=u'russianfan', password=u'russianfan'), sysadmin, ]) cls.user_refs.extend([ u'tester', u'joeadmin', u'annafan', u'russianfan', u'testsysadmin' ]) model.repo.commit_and_remove()
def setup_class(self): CreateTestData.create() model.Session.remove() self.name = u'difftest' # create pkg - PackageRevision rev = model.repo.new_revision() self.pkg1 = model.Package(name=self.name) model.Session.add(self.pkg1) self.pkg1.version = u'First version' model.repo.commit_and_remove() # edit pkg - PackageRevision rev = model.repo.new_revision() pkg1 = model.Package.by_name(self.name) pkg1.notes = u'New notes' rev.message = u'Added notes' model.repo.commit_and_remove() # edit pkg - PackageExtraRevision rev = model.repo.new_revision() pkg1 = model.Package.by_name(self.name) pkg1.extras = {u'a':u'b', u'c':u'd'} rev.message = u'Added extras' model.repo.commit_and_remove() # edit pkg - PackageTagRevision rev = model.repo.new_revision() pkg1 = model.Package.by_name(self.name) pkg1.add_tag_by_name(u'geo') pkg1.add_tag_by_name(u'scientific') rev.message = u'Added tags' model.repo.commit_and_remove() # edit pkg - ResourceRevision rev = model.repo.new_revision() pkg1 = model.Package.by_name(self.name) pkg1.resources.append(model.Resource(url=u'http://url1.com', format=u'xls', description=u'It is.', hash=u'abc123')) rev.message = u'Added resource' model.repo.commit_and_remove() # edit pkg - ResourceRevision rev = model.repo.new_revision() pkg1 = model.Package.by_name(self.name) pkg1.resources[0].url = u'http://url1.com/edited' pkg1.resources.append(model.Resource(url=u'http://url2.com')) rev.message = u'Added resource' model.repo.commit_and_remove() # edit pkg - PackageRevision rev = model.repo.new_revision() pkg1 = model.Package.by_name(self.name) pkg1.notes = u'Changed notes' rev.message = u'Changed notes' model.repo.commit_and_remove() self.pkg1 = model.Package.by_name(self.name) self.res1 = model.Session.query(model.Resource).filter_by(url=u'http://url1.com/edited').one() self.res2 = model.Session.query(model.Resource).filter_by(url=u'http://url2.com').one() assert self.pkg1
def command(dry_run=False): from ckan import model # Register a translator in this thread so that # the _() functions in logic layer can work from ckan.lib.cli import MockTranslator registry=Registry() registry.prepare() translator_obj=MockTranslator() registry.register(translator, translator_obj) global_log.info('Tidying package fields') stats = StatsList() if not dry_run: rev = model.repo.new_revision() rev.message = 'Package fields migration' for pkg in model.Session.query(model.Package)\ .filter_by(state='active')\ .order_by(model.Package.name): # field map for existing_fields, destination_field in field_map.items(): value = pkg.extras.get(destination_field) if value: continue for existing_field in existing_fields: if hasattr(pkg, existing_field): value = getattr(pkg, existing_field) else: value = pkg.extras.get(existing_field) if value: value = value.strip() if value: # take the first hit continue if not dry_run: pkg.extras[destination_field] = value or '' # delete existing field values for existing_field in existing_fields: if hasattr(pkg, existing_field): setattr(pkg, existing_field, '') elif existing_field in pkg.extras: del pkg.extras[existing_field] if value: stats.add('Merged to field "%s"' % destination_field, pkg.name) else: stats.add('Not merged to field "%s"' % destination_field, pkg.name) # move url to additional resource if pkg.url: stats.add('Url moved to additional resource', value) if not dry_run: if not pkg.resource_groups: res_group = model.ResourceGroup(label="default") pkg.resource_groups.append(res_group) res_group = pkg.resource_groups[0] res = model.Resource(format='HTML', resource_type='documentation', url=pkg.url, description='Web page about the data') res_group.resources.append(res) model.Session.add(res) #pkg.url = '' stats.add('URL moved to additional resource', pkg.name) else: stats.add('No URL to move to additional resource', pkg.name) # delete fields for field in delete_fields: if field in pkg.extras: if not dry_run: del pkg.extras[field] stats.add('Deleted field "%s"' % field, pkg.name) else: stats.add('No field to delete "%s"' % field, pkg.name) if not dry_run: model.repo.commit_and_remove() global_log.info(stats.report())
def create_package_from_data(self, package_data, package=None): ''' {"extras": {"INSPIRE": "True", "bbox-east-long": "-3.12442", "bbox-north-lat": "54.218407", "bbox-south-lat": "54.039634", "bbox-west-long": "-3.32485", "constraint": "conditions unknown; (e) intellectual property rights;", "dataset-reference-date": [{"type": "creation", "value": "2008-10-10"}, {"type": "revision", "value": "2009-10-08"}], "guid": "00a743bf-cca4-4c19-a8e5-e64f7edbcadd", "metadata-date": "2009-10-16", "metadata-language": "eng", "published_by": 0, "resource-type": "dataset", "spatial-reference-system": "wee", "temporal_coverage-from": "1977-03-10T11:45:30", "temporal_coverage-to": "2005-01-15T09:10:00"}, "name": "council-owned-litter-bins", "notes": "Location of Council owned litter bins within Borough.", "resources": [{"description": "Resource locator", "format": "Unverified", "url": "http://www.barrowbc.gov.uk"}], "tags": ["Utility and governmental services"], "title": "Council Owned Litter Bins"} ''' if not package: package = model.Package() rev = model.repo.new_revision() relationship_attr = ['extras', 'resources', 'tags'] package_properties = {} for key, value in package_data.iteritems(): if key not in relationship_attr: setattr(package, key, value) tags = package_data.get('tags', []) for tag in tags: package.add_tag_by_name(tag, autoflush=False) for resource_dict in package_data.get("resources", []): resource = model.Resource(**resource_dict) package.resources[:] = [] package.resources.append(resource) for key, value in package_data.get("extras", {}).iteritems(): extra = model.PackageExtra(key=key, value=value) package._extras[key] = extra model.Session.add(package) model.Session.flush() model.setup_default_user_roles(package, []) model.Session.add(rev) model.Session.commit() return package
def create(cls, auth_profile="", package_type=None): import ckan.model as model model.Session.remove() rev = model.repo.new_revision() # same name as user we create below rev.author = cls.author rev.message = u'''Creating test data. * Package: annakarenina * Package: warandpeace * Associated tags, etc etc ''' if auth_profile == "publisher": publisher_group = model.Group(name=u"publisher_group", type="publisher") cls.pkg_names = [u'annakarenina', u'warandpeace'] pkg1 = model.Package(name=cls.pkg_names[0], type=package_type) if auth_profile == "publisher": pkg1.group = publisher_group model.Session.add(pkg1) pkg1.title = u'A Novel By Tolstoy' pkg1.version = u'0.7a' pkg1.url = u'http://www.annakarenina.com' # put an & in the url string to test escaping if 'alt_url' in model.Resource.get_extra_columns(): configured_extras = ({'alt_url': u'alt123'}, {'alt_url': u'alt345'}) else: configured_extras = ({}, {}) pr1 = model.Resource( url=u'http://www.annakarenina.com/download/x=1&y=2', format=u'plain text', description=u'Full text. Needs escaping: " Umlaut: \xfc', hash=u'abc123', extras={'size_extra': u'123'}, **configured_extras[0] ) pr2 = model.Resource( url=u'http://www.annakarenina.com/index.json', format=u'json', description=u'Index of the novel', hash=u'def456', extras={'size_extra': u'345'}, **configured_extras[1] ) model.Session.add(pr1) model.Session.add(pr2) pkg1.resources.append(pr1) pkg1.resources.append(pr2) pkg1.notes = u'''Some test notes ### A 3rd level heading **Some bolded text.** *Some italicized text.* Foreign characters: u with umlaut \xfc 66-style quote \u201c foreign word: th\xfcmb Needs escaping: left arrow < <http://ckan.net/> ''' pkg2 = model.Package(name=cls.pkg_names[1], type=package_type) tag1 = model.Tag(name=u'russian') tag2 = model.Tag(name=u'tolstoy') if auth_profile == "publisher": pkg2.group = publisher_group # Flexible tag, allows spaces, upper-case, # and all punctuation except commas tag3 = model.Tag(name=u'Flexible \u30a1') for obj in [pkg2, tag1, tag2, tag3]: model.Session.add(obj) pkg1.add_tags([tag1, tag2, tag3]) pkg2.add_tags([ tag1, tag3 ]) cls.tag_names = [ t.name for t in (tag1, tag2, tag3) ] pkg1.license_id = u'other-open' pkg2.license_id = u'cc-nc' # closed license pkg2.title = u'A Wonderful Story' pkg1.extras = {u'genre':'romantic novel', u'original media':'book'} # group david = model.Group(name=u'david', title=u'Dave\'s books', description=u'These are books that David likes.', type=auth_profile or 'group') roger = model.Group(name=u'roger', title=u'Roger\'s books', description=u'Roger likes these books.', type=auth_profile or 'group') for obj in [david, roger]: model.Session.add(obj) cls.group_names.add(u'david') cls.group_names.add(u'roger') model.Session.flush() model.Session.add(model.Member(table_id=pkg1.id, table_name='package', group=david)) model.Session.add(model.Member(table_id=pkg2.id, table_name='package', group=david)) model.Session.add(model.Member(table_id=pkg1.id, table_name='package', group=roger)) # authz model.Session.add_all([ model.User(name=u'tester', apikey=u'tester', password=u'tester'), model.User(name=u'joeadmin', password=u'joeadmin'), model.User(name=u'annafan', about=u'I love reading Annakarenina. My site: <a href="http://anna.com">anna.com</a>', password=u'annafan'), model.User(name=u'russianfan', password=u'russianfan'), model.User(name=u'testsysadmin', password=u'testsysadmin'), ]) cls.user_refs.extend([u'tester', u'joeadmin', u'annafan', u'russianfan', u'testsysadmin']) model.repo.commit_and_remove() visitor = model.User.by_name(model.PSEUDO_USER__VISITOR) anna = model.Package.by_name(u'annakarenina') war = model.Package.by_name(u'warandpeace') annafan = model.User.by_name(u'annafan') russianfan = model.User.by_name(u'russianfan') model.setup_default_user_roles(anna, [annafan]) model.setup_default_user_roles(war, [russianfan]) model.add_user_to_role(visitor, model.Role.ADMIN, war) david = model.Group.by_name(u'david') roger = model.Group.by_name(u'roger') model.setup_default_user_roles(david, [russianfan]) model.setup_default_user_roles(roger, [russianfan]) model.add_user_to_role(visitor, model.Role.ADMIN, roger) testsysadmin = model.User.by_name(u'testsysadmin') model.add_user_to_role(testsysadmin, model.Role.ADMIN, model.System()) model.repo.commit_and_remove() # Create a couple of authorization groups for ag_name in [u'anauthzgroup', u'anotherauthzgroup']: ag=model.AuthorizationGroup.by_name(ag_name) if not ag: #may already exist, if not create ag=model.AuthorizationGroup(name=ag_name) model.Session.add(ag) model.repo.commit_and_remove() # and give them a range of roles on various things ag = model.AuthorizationGroup.by_name(u'anauthzgroup') aag = model.AuthorizationGroup.by_name(u'anotherauthzgroup') pkg = model.Package.by_name(u'warandpeace') g = model.Group.by_name('david') model.add_authorization_group_to_role(ag, u'editor', model.System()) model.add_authorization_group_to_role(ag, u'reader', pkg) model.add_authorization_group_to_role(ag, u'admin', aag) model.add_authorization_group_to_role(aag, u'editor', ag) model.add_authorization_group_to_role(ag, u'editor', g) model.repo.commit_and_remove()