def moderate_change_apply(self, change): print "Change summary:" self.print_changes([change]) print "" answer = raw_input("Do you want to apply this change? [Y/n] ").strip() or "y" answer = answer[0].lower() print "" if answer == 'y': return True else: print answer = raw_input("Do you want to mask changes to this ref? [Y/n] ").strip() or "y" answer = answer[0].lower() print "" if answer == 'y': from ckan.model.changeset import ChangemaskRegister, Session register = ChangemaskRegister() mask = register.create_entity(change.ref) Session.add(mask) Session.commit() print "Mask has been set for ref: %s" % change.ref print "" else: print "Warning: Not setting a mask after not applying changes may lead to conflicts." import time time.sleep(5) print ""
def get_or_create_doi(package_id): '''Create or retrieve the unique identifier for this package_id. :param package_id: ''' datacite_api = DOIDataCiteAPI() doi = get_doi(package_id) if doi is None: while True: identifier = os.path.join( get_prefix(), u'{0:07}'.format(random.randint(1, 100000))) # Check this identifier doesn't exist in the table if Session.query(DOI).filter(DOI.identifier == identifier).count(): continue # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass else: if datacite_doi.text: continue doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() break return doi
def create(cls, term, label, lang): inst = cls(term=term, label=label, lang=lang) Session.add(inst) Session.flush() return inst
def test_auth_publisher_profile_different_publisher(self): # Create a source for publisher 1 source = HarvestSource(url=u'http://test-source.com',type='ckan', publisher_id=self.publisher1.id) Session.add(source) Session.commit() extra_environ = {'REMOTE_USER': self.publisher2_user.name.encode('utf8')} # List (Publihsers can see the sources list) res = self.app.get('/harvest', extra_environ=extra_environ) assert 'Harvesting Sources' in res # Create res = self.app.get('/harvest/new', extra_environ=extra_environ) assert 'New harvest source' in res assert 'publisher_id' in res # Check that this publihser is not allowed to manage sources from other publishers status = 401 # Read res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) # Edit res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) # Refresh res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
def moderate_change_apply(self, change): print "Change summary:" self.print_changes([change]) print "" answer = raw_input("Do you want to apply this change? [Y/n] ").strip() or "y" answer = answer[0].lower() print "" if answer == 'y': return True else: print answer = raw_input("Do you want to mask changes to this ref? [Y/n] ").strip() or "y" answer = answer[0].lower() print "" if answer == 'y': from ckan.model.changeset import ChangemaskRegister, Session register = ChangemaskRegister() mask = register.create_entity(change.ref) Session.add(mask) Session.commit() print "Mask has been set for ref: %s" % change.ref print "" else: print "Warning: Not setting a mask after not applying changes may lead to conflicts." import time time.sleep(5) print ""
def _refresh_harvest_objects(self, harvest_object, package_id): """ Perform harvester housekeeping: - Flag the other objects of the source as not current - Set a refernce to the package in the harvest object - Flag it as current - And save the changes """ # Flag the other objects of this source as not current from ckanext.harvest.model import harvest_object_table u = update(harvest_object_table) \ .where(harvest_object_table.c.package_id == bindparam('pkg_id')) \ .values(current=False) Session.execute(u, params={'pkg_id': package_id}) Session.commit() # Refresh current object from session, otherwise the # import paster command fails # (Copied from the Gemini harvester--not sure if necessary) Session.remove() Session.add(harvest_object) Session.refresh(harvest_object) # Set reference to package in the HarvestObject and flag it as # the current one if not harvest_object.package_id: harvest_object.package_id = package_id harvest_object.current = True harvest_object.save()
def handle_submit(self, id): data = clean_dict(dict_fns.unflatten(tuplize_dict(parse_params( request.params)))) data['dataset_url'] = toolkit.url_for( controller='package', action='read', id=id, qualified=True ) package = get_action('package_show')(None, {'id': id}) self.fail_if_private(package, data['dataset_url']) # Comma separated config var to_addrs = config['ckanext.ands.support_emails'].split(',') subject = 'DataPortal Support: Request to publish dataset' body = base.render( 'package/doi_email.text', extra_vars=data) for email in to_addrs: mail_recipient('Dataportal support', email, subject, body) data['package_id'] = package['id'] data['user_id'] = c.userobj.id doi_request = DoiRequest(**data) Session.add(doi_request) Session.commit() h.flash_success("DOI Request sent") return toolkit.redirect_to(data['dataset_url'])
def test_harvest_update_records(self): # Create source source_fixture = {"url": u"http://127.0.0.1:8999/single/dataset1.xml", "type": u"gemini-single"} source, first_job = self._create_source_and_job(source_fixture) first_obj = self._run_job_for_single_document(first_job) first_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was created assert first_package_dict assert first_obj.current == True assert first_obj.package # Create and run a second job, the package should not be updated second_job = self._create_job(source.id) second_obj = self._run_job_for_single_document(second_job) Session.remove() Session.add(first_obj) Session.add(second_obj) Session.refresh(first_obj) Session.refresh(second_obj) second_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was not updated assert second_package_dict, first_package_dict["id"] == second_package_dict["id"] assert first_package_dict["metadata_modified"] == second_package_dict["metadata_modified"] assert not second_obj.package, not second_obj.package_id assert second_obj.current == False, first_obj.current == True # Create and run a third job, forcing the importing to simulate an update in the package third_job = self._create_job(source.id) third_obj = self._run_job_for_single_document(third_job, force_import=True) # For some reason first_obj does not get updated after the import_stage, # and we have to force a refresh to get the actual DB values. Session.remove() Session.add(first_obj) Session.add(second_obj) Session.add(third_obj) Session.refresh(first_obj) Session.refresh(second_obj) Session.refresh(third_obj) third_package_dict = get_action("package_show_rest")(self.context, {"id": third_obj.package_id}) # Package was updated assert third_package_dict, first_package_dict["id"] == third_package_dict["id"] assert third_package_dict["metadata_modified"] > second_package_dict["metadata_modified"] assert third_obj.package, third_obj.package_id == first_package_dict["id"] assert third_obj.current == True assert second_obj.current == False assert first_obj.current == False
def create_doi_from_identifier(package_id, identifier): """Can be called when an identifier has already been created elsewhere. Does not ensure the identifier is unique""" doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi
def create_unique_identifier(package_id): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ datacite_api = DOIDataCiteAPI() while True: identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000))) # Check this identifier doesn't exist in the table if not Session.query(DOI).filter(DOI.identifier == identifier).count(): # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass else: if datacite_doi.text: continue doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi
def create_application(data_dict, image): if 'image' in data_dict: del data_dict['image'] data, errors = validate(data_dict, application_schema()) if errors: raise ValidationError(errors, error_summary(errors)) application = Application( name=Application.generate_name(data.get('title')), title=data.get('title'), url=data.get('url'), description=data.get('description'), featured=data.get('featured'), submitter=data.get('submitter'), developer=data.get('developer'), developer_url=data.get('developer_url'), license=data.get('license'), code_url=data.get('code_url'), api_url=data.get('api_url'), ) tags = data.get('tags', '').split(' ') application.update_tags(tags) if image and image.filename and image.file: image = ApplicationImage(name=image.filename, data=image.file.read(), application=application) Session.add(image) application.save() return application
def record_existing_unique_identifier(package_id, identifier): """ Based on a provided identifier, checks datacite for an existing DOI Saves to local CKAN database :param package_id: string :param identifier: string :return DOI object if saved, false if it didn't exist in datacite """ datacite_api = DOIDataCiteAPI() # Check this identifier doesn't exist in the table existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first() if not existing_doi: # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) if datacite_doi.text: # Determine whether or not we need to delete a doi that points to the current dataset doi_for_this_pkg = Session.query(DOI).filter(DOI.package_id == package_id).first() if doi_for_this_pkg: datacite_api doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi except HTTPError: pass
def create_unique_identifier(package_id): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ datacite_api = DOIDataCiteAPI() while True: identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000))) # Check this identifier doesn't exist in the table if not Session.query(DOI).filter(DOI.identifier == identifier).count(): # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass else: if datacite_doi.text: continue doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi
def test_auth_publisher_profile_different_publisher(self): # Create a source for publisher 1 source = HarvestSource(url=u'http://test-source.com', type='ckan', publisher_id=self.publisher1.id) Session.add(source) Session.commit() extra_environ = { 'REMOTE_USER': self.publisher2_user.name.encode('utf8') } # List (Publihsers can see the sources list) res = self.app.get('/harvest', extra_environ=extra_environ) assert 'Harvesting Sources' in res # Create res = self.app.get('/harvest/new', extra_environ=extra_environ) assert 'New harvest source' in res assert 'publisher_id' in res # Check that this publihser is not allowed to manage sources from other publishers status = 401 # Read res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) # Edit res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) # Refresh res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
def _test_auth_not_allowed(self, user_name=None, source=None, status=401): if not source: # Create harvest source source = HarvestSource(url=u'http://test-source.com', type='ckan') Session.add(source) Session.commit() if user_name: extra_environ = {'REMOTE_USER': user_name.encode('utf8')} else: extra_environ = {} # List res = self.app.get('/harvest', status=status, extra_environ=extra_environ) # Create res = self.app.get('/harvest/new', status=status, extra_environ=extra_environ) # Read res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) # Edit res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) # Refresh res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
def handle_submit(self, id): data = clean_dict( dict_fns.unflatten(tuplize_dict(parse_params(request.params)))) data['dataset_url'] = toolkit.url_for(controller='package', action='read', id=id, qualified=True) package = get_action('package_show')(None, {'id': id}) self.fail_if_private(package, data['dataset_url']) # Comma separated config var to_addrs = config['ckanext.ands.support_emails'].split(',') subject = 'DataPortal Support: Request to publish dataset' body = base.render('package/doi_email.text', extra_vars=data) for email in to_addrs: mail_recipient('Dataportal support', email, subject, body) data['package_id'] = package['id'] data['user_id'] = c.userobj.id doi_request = DoiRequest(**data) Session.add(doi_request) Session.commit() h.flash_success("DOI Request sent") return toolkit.redirect_to(data['dataset_url'])
def _create_pkg(self): model.repo.new_revision() pkg = Package.get("annakarenina") pkg.name = "fookarenina" pkg.add_resource("www.google.com", description="foo", name="foo") Session.add(pkg) Session.commit() return pkg
def _create_pkg(self): model.repo.new_revision() pkg = Package.get('annakarenina') pkg.name = "fookarenina" pkg.add_resource('www.google.com', description='foo', name="foo") Session.add(pkg) Session.commit() return pkg
def save(self, commit=True): if not hasattr(self, 'slug') or not self.slug: self.slug = slugify(self.name) session = Session() log.debug(self) session.add(self) if commit: session.commit()
def create(cls, **kwargs): ''' Create a new record of type :class:`~m`. ''' item_dict = cls._columns(**kwargs) new_item = cls.m(**item_dict) Session.add(new_item) Session.commit() return new_item
def set_names(self, langs): """ Set translated license names """ self.names = [] for lang_name, label in langs.items(): localized = LocalizedLicenseName(license_id=self.id, lang=lang_name, label=label) Session.add(localized)
def save_package_extent(package_id, geometry=None, srid=None): '''Adds, updates or deletes the package extent geometry. package_id: Package unique identifier geometry: a Python object implementing the Python Geo Interface (i.e a loaded GeoJSON object) srid: The spatial reference in which the geometry is provided. If None, it defaults to the DB srid. Will throw ValueError if the geometry object does not provide a geo interface. The responsibility for calling model.Session.commit() is left to the caller. :param package_id: :param geometry: (Default value = None) :param srid: (Default value = None) ''' db_srid = int(toolkit.config.get(u'ckan.spatial.srid', u'4326')) existing_package_extent = Session.query(PackageExtent).filter( PackageExtent.package_id == package_id).first() if geometry: shape = asShape(geometry) if not srid: srid = db_srid package_extent = PackageExtent(package_id=package_id, the_geom=WKTElement(shape.wkt, srid)) # Check if extent exists if existing_package_extent: # If extent exists but we received no geometry, we'll delete the existing one if not geometry: existing_package_extent.delete() log.debug(u'Deleted extent for package %s' % package_id) else: # Check if extent changed if not compare_geometry_fields(package_extent.the_geom, existing_package_extent.the_geom): # Update extent existing_package_extent.the_geom = package_extent.the_geom existing_package_extent.save() log.debug(u'Updated extent for package %s' % package_id) else: log.debug(u'Extent for package %s unchanged' % package_id) elif geometry: # Insert extent Session.add(package_extent) log.debug(u'Created new extent for package %s' % package_id)
def setup(self): # Add sysadmin user harvest_user = model.User(name=u'harvest', password=u'test', sysadmin=True) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context ={'model':model, 'session':Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2'}
def _create_harvester_info(self, config=True): rev = model.repo.new_revision() harv = OAIPMHHarvester() harvest_job = HarvestJob() harvest_job.source = HarvestSource() harvest_job.source.title = "Test" harvest_job.source.url = "http://helda.helsinki.fi/oai/request" if config: harvest_job.source.config = '{"query": ""}' harvest_job.source.type = "OAI-PMH" Session.add(harvest_job) return harvest_job, harv
def setup(self): # Add sysadmin user harvest_user = model.User(name=u'harvest', password=u'test', sysadmin=True) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context ={'model':model, 'session':Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2'}
def test_00duplicate_authz_group(self): group1 = AuthorizationGroup(name="1234567 My group") Session.add(group1) Session.commit() group2 = AuthorizationGroup(name="1234567 My group") Session.add(group2) Session.commit() plugin = self._makeOne() environ = {"REQUEST_METHOD": "GET", "QUERY_STRING": "oauth_token=foo", "ckan.who.oauth.challenge": "1"} self.assertRaises(AssertionError, plugin.identify, environ) group1.delete() group2.delete() Session.commit()
def create(cls, identifier, package_id, published=None): ''' Create a new record in the DOI table. :param identifier: a new DOI string :param package_id: the id of the package this DOI represents :param published: when this DOI was published (datetime, nullable) :return: the newly created record object ''' new_record = DOI(identifier=identifier, package_id=package_id, published=published) Session.add(new_record) Session.commit() return new_record
def setup(self): model.Session.remove() CreateTestData.create(auth_profile='publisher') self.sysadmin_user = model.User.get('testsysadmin') self.normal_user = model.User.get( 'annafan') # Does not belong to a publisher self.publisher1_user = model.User.by_name('russianfan') self.publisher2_user = model.User.by_name('tester') # Create two Publishers rev = model.repo.new_revision() self.publisher1 = model.Group(name=u'test-publisher1', title=u'Test Publihser 1', type=u'publisher') Session.add(self.publisher1) self.publisher2 = model.Group(name=u'test-publisher2', title=u'Test Publihser 2', type=u'publisher') Session.add(self.publisher2) member1 = model.Member(table_name='user', table_id=self.publisher1_user.id, group=self.publisher1, capacity='admin') Session.add(member1) member2 = model.Member(table_name='user', table_id=self.publisher2_user.id, group=self.publisher2, capacity='admin') Session.add(member2) Session.commit()
def update_resource_stats(resource_id, total_visits, total_downloads): q_result = Session.query(PiwikResource).filter(PiwikResource.resource_id == resource_id).first() if q_result: # update if resource is already in table q_result.total_visits = total_visits q_result.total_downloads = total_downloads else: # add new entry for resource rt = PiwikResource() rt.resource_id = resource_id rt.total_visits = total_visits rt.total_downloads = total_downloads Session.add(rt) Session.commit()
def update_resource_stats(resource_id, total_visits, total_downloads): q_result = Session.query(PiwikResource).filter(PiwikResource.resource_id == resource_id).first() if q_result: # update if resource is already in table q_result.total_visits = total_visits q_result.total_downloads = total_downloads else: # add new entry for resource rt = PiwikResource() rt.resource_id = resource_id rt.total_visits = total_visits rt.total_downloads = total_downloads Session.add(rt) Session.commit()
def setup(self): # Add sysadmin user harvest_user = model.User(name=u"harvest", password=u"test") model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System()) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context = { "model": model, "session": Session, "user": u"harvest", "schema": package_schema, "api_version": "2", } if config.get("ckan.harvest.auth.profile") == u"publisher": # Create a publisher user rev = model.repo.new_revision() self.publisher_user = model.User(name=u"test-publisher-user", password=u"test") self.publisher = model.Group(name=u"test-publisher", title=u"Test Publihser", type=u"publisher") Session.add(self.publisher_user) Session.add(self.publisher) Session.commit() member = model.Member( table_name="user", table_id=self.publisher_user.id, group=self.publisher, capacity="admin" ) Session.add(member) Session.commit()
def setup(self): # Add sysadmin user harvest_user = model.User(name=u'harvest', password=u'test') model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System()) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context ={'model':model, 'session':Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2'} if config.get('ckan.harvest.auth.profile') == u'publisher': # Create a publisher user rev = model.repo.new_revision() self.publisher_user = model.User(name=u'test-publisher-user',password=u'test') self.publisher = model.Group(name=u'test-publisher',title=u'Test Publihser',type=u'publisher') Session.add(self.publisher_user) Session.add(self.publisher) Session.commit() member = model.Member(table_name = 'user', table_id = self.publisher_user.id, group=self.publisher, capacity='admin') Session.add(member) Session.commit()
def set_parent(self, parent_uri): """ Set parent for given license """ parent = License.get(parent_uri) if not parent: raise ValueError("No parent %s object" % parent_uri) self.parent_id = parent.id Session.add(self) try: rev = Session.revision except AttributeError: rev = None Session.flush()
def create_package_doi(pkg_dict): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ _validate_package(pkg_dict) identifier = _create_unique_identifier() package_doi = CeonPackageDOI(package_id=pkg_dict['id'], identifier=identifier) Session.add(package_doi) Session.commit() log.debug(u"Created DOI {} for package {}".format(package_doi.identifier, pkg_dict['id'])) return package_doi
def setup(self): model.Session.remove() CreateTestData.create(auth_profile='publisher') self.sysadmin_user = model.User.get('testsysadmin') self.normal_user = model.User.get('annafan') # Does not belong to a publisher self.publisher1_user = model.User.by_name('russianfan') self.publisher2_user = model.User.by_name('tester') # Create two Publishers rev = model.repo.new_revision() self.publisher1 = model.Group(name=u'test-publisher1',title=u'Test Publihser 1',type=u'publisher') Session.add(self.publisher1) self.publisher2 = model.Group(name=u'test-publisher2',title=u'Test Publihser 2',type=u'publisher') Session.add(self.publisher2) member1 = model.Member(table_name = 'user', table_id = self.publisher1_user.id, group=self.publisher1, capacity='admin') Session.add(member1) member2 = model.Member(table_name = 'user', table_id = self.publisher2_user.id, group=self.publisher2, capacity='admin') Session.add(member2) Session.commit()
def _create_harvester(self, config=True): harv = DDIHarvester() harv.config = "{}" harvest_job = HarvestJob() harvest_job.source = HarvestSource() harvest_job.source.title = "Test" harvest_job.source.url = "http://foo" if config: harvest_job.source.config = '' else: harvest_job.source.config = None harvest_job.source.type = "DDI" Session.add(harvest_job) return harv, harvest_job
def save_package_extent(package_id, geometry = None, srid = None): '''Adds, updates or deletes the package extent geometry. package_id: Package unique identifier geometry: a Python object implementing the Python Geo Interface (i.e a loaded GeoJSON object) srid: The spatial reference in which the geometry is provided. If None, it defaults to the DB srid. Will throw ValueError if the geometry object does not provide a geo interface. The responsibility for calling model.Session.commit() is left to the caller. ''' db_srid = int(config.get('ckan.spatial.srid', '4326')) existing_package_extent = Session.query(PackageExtent).filter(PackageExtent.package_id==package_id).first() if geometry: shape = asShape(geometry) if not srid: srid = db_srid package_extent = PackageExtent(package_id=package_id, the_geom=WKTElement(shape.wkt, srid)) # Check if extent exists if existing_package_extent: # If extent exists but we received no geometry, we'll delete the existing one if not geometry: existing_package_extent.delete() log.debug('Deleted extent for package %s' % package_id) else: # Check if extent changed if not compare_geometry_fields(package_extent.the_geom, existing_package_extent.the_geom): # Update extent existing_package_extent.the_geom = package_extent.the_geom existing_package_extent.save() log.debug('Updated extent for package %s' % package_id) else: log.debug('Extent for package %s unchanged' % package_id) elif geometry: # Insert extent Session.add(package_extent) log.debug('Created new extent for package %s' % package_id)
def update_package_stats(package_name, total_visits, recent_visits): q_result = Session.query(PiwikPackage).filter(PiwikPackage.package_name == package_name).first() if q_result: # update if package is already in table q_result.total_visits = total_visits q_result.recent_visits = recent_visits Session.commit() else: # add new entry for package pt = PiwikPackage() pt.package_name = package_name pt.total_visits = total_visits pt.recent_visits = recent_visits Session.add(pt) Session.commit()
def setup(self): super(FisbrokerTestBase, self).setup() reset_mock_server() # Add sysadmin user user_name = u'harvest' harvest_user = model.User(name=user_name, password=u'test', sysadmin=True) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context = { 'model': model, 'session': Session, 'user': user_name, 'schema': package_schema, 'api_version': '2' }
def create_package_doi(pkg_dict): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ _validate_package(pkg_dict) identifier = _create_unique_identifier() package_doi = CeonPackageDOI(package_id=pkg_dict['id'], identifier=identifier) Session.add(package_doi) Session.commit() log.info(u"Created DOI {} for package {}".format(package_doi.identifier, pkg_dict['id'])) return package_doi
def update_package_stats(package_name, total_visits, recent_visits): q_result = Session.query(PiwikPackage).filter(PiwikPackage.package_name == package_name).first() if q_result: # update if package is already in table q_result.total_visits = total_visits q_result.recent_visits = recent_visits Session.commit() else: # add new entry for package pt = PiwikPackage() pt.package_name = package_name pt.total_visits = total_visits pt.recent_visits = recent_visits Session.add(pt) Session.commit()
def create(cls, vocab, name, labels=None, parent=None, properties=None): if not isinstance(vocab, Vocabulary): vocab = Vocabulary.get(vocab) inst = cls(vocabulary=vocab, name=name, depth=parent.depth +1 if parent else 0, parent=parent) inst.properties = properties or {} if labels: inst.set_labels(labels) inst.update_path() Session.add(inst) Session.flush() return inst
def save_package_extent(package_id, geometry = None, srid = None): '''Adds, updates or deletes the package extent geometry. package_id: Package unique identifier geometry: a Python object implementing the Python Geo Interface (i.e a loaded GeoJSON object) srid: The spatial reference in which the geometry is provided. If None, it defaults to the DB srid. Will throw ValueError if the geometry object does not provide a geo interface. ''' db_srid = int(config.get('ckan.spatial.srid', '4326')) existing_package_extent = Session.query(PackageExtent).filter(PackageExtent.package_id==package_id).first() if geometry: shape = asShape(geometry) if not srid: srid = db_srid package_extent = PackageExtent(package_id=package_id,the_geom=WKTSpatialElement(shape.wkt, srid)) # Check if extent exists if existing_package_extent: # If extent exists but we received no geometry, we'll delete the existing one if not geometry: existing_package_extent.delete() log.debug('Deleted extent for package %s' % package_id) else: # Check if extent changed if Session.scalar(package_extent.the_geom.wkt) <> Session.scalar(existing_package_extent.the_geom.wkt): # Update extent existing_package_extent.the_geom = package_extent.the_geom existing_package_extent.save() log.debug('Updated extent for package %s' % package_id) else: log.debug('Extent for package %s unchanged' % package_id) elif geometry: # Insert extent Session.add(package_extent) log.debug('Created new extent for package %s' % package_id)
def create_resource_doi(pkg_dict, res_dict): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ _validate_resource(res_dict) resource_id = res_dict['id'] package_doi = CeonPackageDOI.get(pkg_dict['id']) if not package_doi: create_package_doi(pkg_dict) identifier = _create_unique_identifier(package_doi.identifier) resource_doi = CeonResourceDOI(resource_id=resource_id, identifier=identifier) Session.add(resource_doi) Session.commit() log.debug(u"Created DOI {} for resource {}".format(resource_doi.identifier, res_dict['id'])) return resource_doi
def map_nonconformant_groups(harvest_object): """ Adds themes to fetched data """ themes_data = _load_mapping_data() if not themes_data: return data = json.loads(harvest_object.content) _groups = data.get('groups') if not _groups: return groups = [g['name'] for g in _groups] groups.extend([g['display_name'] for g in _groups if 'display_name' in g]) new_themes = _get_new_themes(groups, themes_data, add_existing=False) if not new_themes: return # ensure themes are upper-case, otherwise will be discarded # by validators tdata = {'key': 'theme', 'value': _encode_list(new_themes).upper()} existing = False extra = data.get('extras') or [] for eitem in extra: if eitem['key'] == 'theme': existing = True eitem['value'] = tdata['value'] break if not existing: extra.append(tdata) data['extras'] = extra data['theme'] = tdata['value'] harvest_object.content = json.dumps(data) Session.add(harvest_object) try: rev = Session.revision except AttributeError: rev = None Session.flush() Session.revision = rev
def updateValidationJobStatus(self, session=None, resource_id=None, status=None, report=None, error=None, validationRecord=None): # type: (object, Session, str, str, object, object) -> model.Validation """ If report or error is attached, update finished to be now :param self: :param session Session :param resource_id: :param status: :param report: :param error: :return: """ log.debug("updateValidationJobStatus: %s status: %s", resource_id, status) if validationRecord is None: validationRecord = self.getValidationJob(session, resource_id) if validationRecord is None: log.error("record not found to update statues: %s", resource_id) raise ValidationJobDoesNotExist() # Handle already running status in in last hour if status == StatusTypes.running and validationRecord.status == status: if self.getHoursSince(validationRecord.created) < 1: raise ValidationJobAlreadyRunning() validationRecord.status = status validationRecord.report = report validationRecord.error = error if status in (StatusTypes.success, StatusTypes.failure, StatusTypes.error): validationRecord.finished = datetime.datetime.utcnow() Session.add(validationRecord) Session.commit() # Flush so other transactions are not waiting Session.flush() return validationRecord
def init_db(): """ Create board, app, mark tables in the database. Prepopulate category table with default data. """ if not model.package_table.exists(): # during tests? return session = Session() for table in [board_table, app_table, mark_table]: if not table.exists(): table.create(checkfirst=True) log.debug("Apps {} have been created".format(table.name)) for board_name, board_desc in DEFAULT_BOARDS.iteritems(): if not Board.get_by_slug(slugify(board_name)): board = Board() board.name = board_name board.slug = slugify(board_name) board.description = board_desc session.add(board) log.debug("Add {0} to {1} table".format(board_name, board_table.name)) session.commit() if not migration_table.exists(): migration_table.create(checkfirst=True) session.commit() migration_number = session.query(migration_table).count() log.debug('Migration number: %s', migration_number) migration_sql_list = [] for counter, sql in enumerate(migration_sql_list, start=1): if migration_number < counter: try: session.execute(sql) except ProgrammingError: session.rollback() finally: session.execute(migration_table.insert()) session.commit() session.close()
def authenticate(self, environ, identity): if 'repoze.who.plugins.openid.userid' in identity: openid = identity.get('repoze.who.plugins.openid.userid') user = User.by_openid(openid) if user is None: # TODO: Implement a mask to ask for an alternative user # name instead of just using the OpenID identifier. name = identity.get('repoze.who.plugins.openid.nickname') if not User.check_name_valid(name): name = openid if not User.check_name_available(name): name = openid user = User(openid=openid, name=name, fullname=identity.get('repoze.who.plugins.openid.fullname'), email=identity.get('repoze.who.plugins.openid.email')) Session.add(user) Session.commit() Session.remove() return user.name return None
def create_resource_doi(pkg_dict, res_dict): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ _validate_resource(res_dict) resource_id = res_dict['id'] package_doi = CeonPackageDOI.get(pkg_dict['id']) if not package_doi: create_package_doi(pkg_dict) identifier = _create_unique_identifier(package_doi.identifier) resource_doi = CeonResourceDOI(resource_id=resource_id, identifier=identifier) Session.add(resource_doi) Session.commit() log.info(u"Created DOI {} for resource {}".format(resource_doi.identifier, res_dict['id'])) return resource_doi
def _test_auth_not_allowed(self, user_name=None, source=None, status=401): if not source: # Create harvest source source = HarvestSource(url=u'http://test-source.com', type='ckan') Session.add(source) Session.commit() if user_name: extra_environ = {'REMOTE_USER': user_name.encode('utf8')} else: extra_environ = {} # List res = self.app.get( '/harvest', status=status, extra_environ=extra_environ) # Create res = self.app.get( '/harvest/new', status=status, extra_environ=extra_environ) # Read res = self.app.get( '/harvest/%s' % source.id, status=status, extra_environ=extra_environ) # Edit res = self.app.get( '/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) # Refresh res = self.app.get( '/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
def from_data(cls, license_type, version, uri, path, document_uri, rank_order, names, default_lang=None, parent=None): if default_lang is None: default_lang = 'it' default_name = names[default_lang] if parent is not None: parent_inst = Session.query(License).filter_by( uri=str(parent)).first() if parent_inst: parent = parent_inst.id inst = cls(license_type=license_type, version=version, uri=uri, path=path, document_uri=document_uri, rank_order=rank_order, parent_id=parent, default_name=default_name) Session.add(inst) try: rev = Session.revision except AttributeError: rev = None Session.flush() Session.revision = rev inst.set_names(names) Session.flush() Session.revision = rev return inst
def test_zaincremental_harvester(self): client = CKANServer() metadata_registry = metadata.MetadataRegistry() metadata_registry.registerReader('oai_dc', oai_dc_reader) metadata_registry.registerWriter('oai_dc', oai_dc_writer) serv = BatchingServer(client, metadata_registry=metadata_registry) oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry)) harv = OAIPMHHarvester() harvest_job = HarvestJob() harvest_job.source = HarvestSource() harvest_job.source.title = "Test" harvest_job.source.url = "http://helda.helsinki.fi/oai/request" harvest_job.gather_started = ((datetime.now() + timedelta(days=1))) harvest_job.source.config = '{"incremental":"True"}' harvest_job.source.type = "OAI-PMH" Session.add(harvest_job) rev = model.repo.new_revision() rev.timestamp = ((datetime.now() + timedelta(days=2))) pkg = Package(name='footest', revision=rev) Session.add(pkg) pkg.save() roger = Group.get('roger') roger.add_package_by_name('footest') Session.add(roger) roger.save() gathered = harv.gather_stage(harvest_job) harvest_object = HarvestObject.get(gathered[0]) harv.fetch_stage(harvest_object) harvobj = json.loads(harvest_object.content) self.assert_(harvobj['records'])
def _get_or_create_user(self, env): # WSGI Variables # Shib-Application-ID 'default' # Shib-Authentication-Instant '2012-08-13T12:04:22.492Z' # Shib-Authentication-Method 'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport' # Shib-AuthnContext-Class 'urn:oasis:names:tc:SAML:2.0:ac:classes:PasswordProtectedTransport' # Shib-Identity-Provider 'https://idp.example.com/idp/shibboleth' # Shib-Session-ID '_7ec5a681e6dbae627c1cefcc7cb4d56a' # Shib-Session-Index '39dafd8477850f5e0b968e3561570197f2109948c1d374a7a2b4c9a7adbf8628' # cn 'My Other Self' # givenName 'My Other Self' # mail '*****@*****.**' email = env.get(self.mail, None) fullname = env.get(self.name, None) if not email or not fullname: log.debug("Environ does not contain mail or cn attributes, user not loaded.") return None user = meta.Session.query(User).autoflush(False).filter_by(openid=email).first() if user is None: log.debug("User does not exists, creating new one.") import re username = re.sub("[.@]", "_", email) user = User(name=username, fullname=fullname, email=email, openid=email) Session.add(user) Session.commit() Session.remove() log.debug("Created new user %s" % fullname) return user
def preauthenticate(self, environ, identity): # turn the oauth identity into a CKAN one; set it in our identity import oauth2 as oauth try: access_token = dict(urlparse.parse_qsl(identity['userdata'])) oauth_token = access_token['oauth_token'] oauth_token_secret = access_token['oauth_token_secret'] except KeyError: return None access_token = oauth.Token(oauth_token, oauth_token_secret) client = oauth.Client(self.consumer, access_token) resp, content = client.request(self.user_url, "GET") data = json.loads(content) user_id = data['id'] logging.info("Preauth: Got oauth user data for user %s" % user_id) user = User.by_openid(user_id) if user is None: user = User(openid=user_id, name=data['id'], fullname=data['name'], email=data['mail']) Session.add(user) else: user.fullname = data['name'] # if the name is updated Session.commit() Session.remove() logging.info("Preauth: Created new/updated user %s" % user_id) # deal with groups user_groups = data['groups'] _sync_auth_groups(user, user_groups) name = user.name.encode("utf8") logging.info("Preauth: Returning user identifier %s" % name) identity['repoze.who.userid'] = name return identity
def test_harvest_different_sources_same_document(self): # Create source1 source1_fixture = {"url": u"http://127.0.0.1:8999/single/source1/same_dataset.xml", "type": u"gemini-single"} source1, first_job = self._create_source_and_job(source1_fixture) first_obj = self._run_job_for_single_document(first_job) first_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was created assert first_package_dict assert first_package_dict["state"] == u"active" assert first_obj.current == True # Harvest the same document, unchanged, from another source, the package # is not updated. # (As of https://github.com/okfn/ckanext-inspire/commit/9fb67 # we are no longer throwing an exception when this happens) source2_fixture = {"url": u"http://127.0.0.1:8999/single/source2/same_dataset.xml", "type": u"gemini-single"} source2, second_job = self._create_source_and_job(source2_fixture) second_obj = self._run_job_for_single_document(second_job) second_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was not updated assert second_package_dict, first_package_dict["id"] == second_package_dict["id"] assert first_package_dict["metadata_modified"] == second_package_dict["metadata_modified"] assert not second_obj.package, not second_obj.package_id assert second_obj.current == False, first_obj.current == True # Inactivate source1 and reharvest from source2, package should be updated third_job = self._create_job(source2.id) third_obj = self._run_job_for_single_document(third_job, force_import=True) Session.remove() Session.add(first_obj) Session.add(second_obj) Session.add(third_obj) Session.refresh(first_obj) Session.refresh(second_obj) Session.refresh(third_obj) third_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was updated assert third_package_dict, first_package_dict["id"] == third_package_dict["id"] assert third_package_dict["metadata_modified"] > second_package_dict["metadata_modified"] assert third_obj.package, third_obj.package_id == first_package_dict["id"] assert third_obj.current == True assert second_obj.current == False assert first_obj.current == False
def test_zzcomplete(self): raise SkipTest('Takes ages, do not run') urllib2.urlopen = realopen harv = DDIHarvester() harv.config = "{}" harvest_job = HarvestJob() harvest_job.source = HarvestSource() harvest_job.source.title = "Test" harvest_job.source.url = "http://www.fsd.uta.fi/fi/aineistot/luettelo/fsd-ddi-records-uris-fi.txt" harvest_job.source.config = '' harvest_job.source.type = "DDI" Session.add(harvest_job) gathered = harv.gather_stage(harvest_job) diffs = [] for gath in gathered: harvest_object = HarvestObject.get(gath) print json.loads(harvest_object.content)['url'] before = datetime.now() harv.fetch_stage(harvest_object) harv.import_stage(harvest_object) diff = datetime.now() - before print diff diffs.append(diff) print sum(diffs, timedelta)
def test_harvest_import_command(self): # Create source source_fixture = { 'title': 'Test Source', 'name': 'test-source', 'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml', 'source_type': u'gemini-single' } source, first_job = self._create_source_and_job(source_fixture) first_obj = self._run_job_for_single_document(first_job) before_package_dict = get_action('package_show_rest')(self.context,{'id':first_obj.package_id}) # Package was created assert before_package_dict assert first_obj.current == True assert first_obj.package # Create and run two more jobs, the package should not be updated second_job = self._create_job(source.id) second_obj = self._run_job_for_single_document(second_job) third_job = self._create_job(source.id) third_obj = self._run_job_for_single_document(third_job) # Run the import command manually imported_objects = get_action('harvest_objects_import')(self.context,{'source_id':source.id}) Session.remove() Session.add(first_obj) Session.add(second_obj) Session.add(third_obj) Session.refresh(first_obj) Session.refresh(second_obj) Session.refresh(third_obj) after_package_dict = get_action('package_show_rest')(self.context,{'id':first_obj.package_id}) # Package was updated, and the current object remains the same assert after_package_dict, before_package_dict['id'] == after_package_dict['id'] assert after_package_dict['metadata_modified'] > before_package_dict['metadata_modified'] assert third_obj.current == False assert second_obj.current == False assert first_obj.current == True source_dict = get_action('harvest_source_show')(self.context,{'id':source.id}) assert source_dict['status']['total_datasets'] == 1
def test_harvest_import_command(self): # Create source source_fixture = {"url": u"http://127.0.0.1:8999/single/dataset1.xml", "type": u"gemini-single"} source, first_job = self._create_source_and_job(source_fixture) first_obj = self._run_job_for_single_document(first_job) before_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was created assert before_package_dict assert first_obj.current == True assert first_obj.package # Create and run two more jobs, the package should not be updated second_job = self._create_job(source.id) second_obj = self._run_job_for_single_document(second_job) third_job = self._create_job(source.id) third_obj = self._run_job_for_single_document(third_job) # Run the import command manually imported_objects = get_action("harvest_objects_import")(self.context, {"source_id": source.id}) Session.remove() Session.add(first_obj) Session.add(second_obj) Session.add(third_obj) Session.refresh(first_obj) Session.refresh(second_obj) Session.refresh(third_obj) after_package_dict = get_action("package_show_rest")(self.context, {"id": imported_objects[0]["package_id"]}) # Package was updated, and the current object remains the same assert after_package_dict, before_package_dict["id"] == after_package_dict["id"] assert after_package_dict["metadata_modified"] > before_package_dict["metadata_modified"] assert third_obj.current == False assert second_obj.current == False assert first_obj.current == True source_dict = get_action("harvest_source_show")(self.context, {"id": source.id}) assert len(source_dict["status"]["packages"]) == 1
def test_harvest_deleted_record(self): # Create source source_fixture = {"url": u"http://127.0.0.1:8999/single/service1.xml", "type": u"gemini-single"} source, first_job = self._create_source_and_job(source_fixture) first_obj = self._run_job_for_single_document(first_job) first_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was created assert first_package_dict assert first_package_dict["state"] == u"active" assert first_obj.current == True # Delete package first_package_dict["state"] = u"deleted" self.context.update({"id": first_package_dict["id"]}) updated_package_dict = get_action("package_update_rest")(self.context, first_package_dict) # Create and run a second job, the date has not changed, so the package should not be updated # and remain deleted first_job.status = u"Finished" first_job.save() second_job = self._create_job(source.id) second_obj = self._run_job_for_single_document(second_job) second_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) # Package was not updated assert second_package_dict, updated_package_dict["id"] == second_package_dict["id"] assert not second_obj.package, not second_obj.package_id assert second_obj.current == False, first_obj.current == True # Harvest an updated document, with a more recent modified date, package should be # updated and reactivated source.url = u"http://127.0.0.1:8999/single/service1_newer.xml" source.save() third_job = self._create_job(source.id) third_obj = self._run_job_for_single_document(third_job) third_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id}) Session.remove() Session.add(first_obj) Session.add(second_obj) Session.add(third_obj) Session.refresh(first_obj) Session.refresh(second_obj) Session.refresh(third_obj) # Package was updated assert third_package_dict, third_package_dict["id"] == second_package_dict["id"] assert third_obj.package, third_obj.package assert third_obj.current == True, second_obj.current == False assert first_obj.current == False assert "NEWER" in third_package_dict["title"] assert third_package_dict["state"] == u"active"