def latest_post(): '''Return the most recent blog post. Returns None if there are no blog posts. :rtype: ckanext.sweden.blog.model.post.Post or None ''' try: from ckanext.sweden.blog.model.post import Post post = Session.query(Post).\ filter(Post.visible == True).\ order_by('created desc').\ first() except NoResultFound: return None if post is None: return None post.content_markdown = markdown( unicode(truncate(post.content, length=320, indicator='...', whole_word=True))) post.post_author = (model.User.get(post.user_id) or Session.query(model.User).filter_by( id=post.user_id).first()) return post
def get_or_create_doi(package_id): '''Create or retrieve the unique identifier for this package_id. :param package_id: ''' datacite_api = DOIDataCiteAPI() doi = get_doi(package_id) if doi is None: while True: identifier = os.path.join( get_prefix(), u'{0:07}'.format(random.randint(1, 100000))) # Check this identifier doesn't exist in the table if Session.query(DOI).filter(DOI.identifier == identifier).count(): continue # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass else: if datacite_doi.text: continue doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() break return doi
def setUp(self): def get_path(fname): return os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', fname) licenses = get_path('licenses.rdf') load_from_graph(path=licenses) Session.flush()
def test_create_extent(self): package = factories.Dataset() geojson = json.loads(self.geojson_examples["point"]) shape = asShape(geojson) package_extent = PackageExtent( package_id=package["id"], the_geom=WKTElement(shape.wkt, self.db_srid), ) package_extent.save() assert (package_extent.package_id == package["id"]) if legacy_geoalchemy: assert (Session.scalar( package_extent.the_geom.x) == geojson["coordinates"][0]) assert (Session.scalar( package_extent.the_geom.y) == geojson["coordinates"][1]) assert (Session.scalar( package_extent.the_geom.srid) == self.db_srid) else: from sqlalchemy import func assert (Session.query(func.ST_X(package_extent.the_geom)).first() [0] == geojson["coordinates"][0]) assert (Session.query(func.ST_Y(package_extent.the_geom)).first() [0] == geojson["coordinates"][1]) assert (package_extent.the_geom.srid == self.db_srid)
def moderate_change_apply(self, change): print "Change summary:" self.print_changes([change]) print "" answer = raw_input("Do you want to apply this change? [Y/n] ").strip() or "y" answer = answer[0].lower() print "" if answer == 'y': return True else: print answer = raw_input("Do you want to mask changes to this ref? [Y/n] ").strip() or "y" answer = answer[0].lower() print "" if answer == 'y': from ckan.model.changeset import ChangemaskRegister, Session register = ChangemaskRegister() mask = register.create_entity(change.ref) Session.add(mask) Session.commit() print "Mask has been set for ref: %s" % change.ref print "" else: print "Warning: Not setting a mask after not applying changes may lead to conflicts." import time time.sleep(5) print ""
def test_zfaulty_xml_unknown_errors(self): harv, job = self._create_harvester() res = "http://www.fsd.uta.fi/fi/aineistot/luettelo/FSD0115/FSD0115.xml" urllib2.urlopen = mock.Mock(return_value=StringIO(res)) gathered = harv.gather_stage(job) urllib2.urlopen = mock.Mock(return_value=open("FSD2355.xml")) harvest_obj = HarvestObject.get(gathered[0]) self.assert_(harv.fetch_stage(harvest_obj)) self.assert_(harv.import_stage(harvest_obj)) print Package.text_search(\ Session.query(Package), 'Kansalaiskeskustelu ydinvoimasta 2006').all() self.assert_(len(Package.text_search(\ Session.query(Package), 'Kansalaiskeskustelu ydinvoimasta 2006').all()) >= 1) res = "http://www.fsd.uta.fi/fi/aineistot/luettelo/FSD0115/FSD0115.xml" urllib2.urlopen = mock.Mock(return_value=StringIO(res)) gathered = harv.gather_stage(job) urllib2.urlopen = mock.Mock(return_value=open("FSD2362.xml")) harvest_obj = HarvestObject.get(gathered[0]) self.assert_(harv.fetch_stage(harvest_obj)) self.assert_(harv.import_stage(harvest_obj)) self.assert_(len(Package.text_search(\ Session.query(Package), 'Energia-asennetutkimus 2004').all()) >= 1)
def load_themes(): vocab_file_path = _get_path('data-theme-skos.rdf', 'vocabularies') class Opts(object): def __init__(self, filename, name, format): self.filename = filename self.url = None #filename self.name = name self.format = format themes_loader.options = Opts(vocab_file_path, 'eu_themes', None) themes_loader.load() tag_localized = interfaces.get_localized_tag_name('ECON') Session.flush() assert tag_localized q = Session.query(Vocabulary).filter_by(name='eu_themes') vocab = q.first() assert vocab map_f = _get_path(MAPPING_FILE) voc_f = _get_path(EUROVOC_FILE) clear_subthemes() load_subthemes(map_f, voc_f) assert Subtheme.q().first()
def test_update_extent(self): package = Package.get('annakarenina') geojson = json.loads(self.geojson_examples['point']) shape = asShape(geojson) package_extent = PackageExtent(package_id=package.id, the_geom=WKTSpatialElement( shape.wkt, self.db_srid)) package_extent.save() assert Session.scalar( package_extent.the_geom.geometry_type) == 'ST_Point' # Update the geometry (Point -> Polygon) geojson = json.loads(self.geojson_examples['polygon']) shape = asShape(geojson) package_extent.the_geom = WKTSpatialElement(shape.wkt, self.db_srid) package_extent.save() assert package_extent.package_id == package.id assert Session.scalar( package_extent.the_geom.geometry_type) == 'ST_Polygon' assert Session.scalar(package_extent.the_geom.srid) == self.db_srid
def setup(self): model.Session.remove() CreateTestData.create(auth_profile='publisher') self.sysadmin_user = model.User.get('testsysadmin') self.normal_user = model.User.get( 'annafan') # Does not belong to a publisher self.publisher1_user = model.User.by_name('russianfan') self.publisher2_user = model.User.by_name('tester') # Create two Publishers rev = model.repo.new_revision() self.publisher1 = model.Group(name=u'test-publisher1', title=u'Test Publihser 1', type=u'publisher') Session.add(self.publisher1) self.publisher2 = model.Group(name=u'test-publisher2', title=u'Test Publihser 2', type=u'publisher') Session.add(self.publisher2) member1 = model.Member(table_name='user', table_id=self.publisher1_user.id, group=self.publisher1, capacity='admin') Session.add(member1) member2 = model.Member(table_name='user', table_id=self.publisher2_user.id, group=self.publisher2, capacity='admin') Session.add(member2) Session.commit()
def test_tokenizer(self): load_from_graph(path=self.licenses) Session.flush() tokens = License.get_as_tokens() self.assertTrue(len(tokens.keys())>0) from_token, default = License.find_by_token('cc-by-sa') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('ccbysa' in from_token.uri.lower()) from_token, default = License.find_by_token('cc-zero') #http://opendefinition.org/licenses/cc-zero/') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('PublicDomain' in from_token.license_type) from_token, default = License.find_by_token('Creative Commons Attribuzione') #http://opendefinition.org/licenses/cc-zero/') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('Attribution' in from_token.license_type) odbl = """["Open Data Commons Open Database License / OSM (ODbL/OSM): You are free to copy, distribute, transmit and adapt our data, as long as you credit OpenStreetMap and its contributors\nIf you alter or build upon our data, you may distribute the result only under the same licence. (http://www.openstreetmap.org/copyright)"]""" from_token, default = License.find_by_token(odbl, 'other') self.assertFalse(default) self.assertTrue(from_token) self.assertTrue('odbl' in from_token.default_name.lower())
def _run_job_for_single_document( self, job, force_import=False, expect_gather_errors=False, expect_obj_errors=False ): harvester = GeminiDocHarvester() harvester.force_import = force_import object_ids = harvester.gather_stage(job) assert object_ids, len(object_ids) == 1 if expect_gather_errors: assert len(job.gather_errors) > 0 else: assert len(job.gather_errors) == 0 assert harvester.fetch_stage(object_ids) == True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content harvester.import_stage(obj) Session.refresh(obj) if expect_obj_errors: assert len(obj.errors) > 0 else: assert len(obj.errors) == 0 job.status = u"Finished" job.save() return obj
def persist_tag_multilang(tag: model.Tag, lang, label, vocab): log.debug('DCAT-AP_IT: persisting tag multilang for tag %r ...', tag.name) tag_loc = TagLocalization.by_tag_id(tag.id, lang) if tag_loc: # Update the existing record if label: if label != tag_loc.text: try: tag_loc.text = label tag_loc.save() return DBAction.UPDATED, tag_loc.id except Exception as err: # on rollback, the same closure of state # as that of commit proceeds. Session.rollback() log.error( 'Exception occurred while persisting DB objects: %s', err) raise else: return DBAction.NONE, tag_loc.id else: log.warning( f'Skipping empty label V:{vocab.name} T:{tag.name} L:{lang}') return DBAction.ERROR, tag_loc.id else: # Create a new localized record tag_loc = TagLocalization.persist(tag, label, lang) return DBAction.CREATED, tag_loc.id
def make_package_name(self, title, exclude_existing_package): ''' Creates a URL friendly name from a title If the name already exists, it will add some random characters at the end ''' name = munge_title_to_name(title).replace('_', '-') while '--' in name: name = name.replace('--', '-') name = name[0:90] # max length is 100 # Is this slug already in use (and if we're updating a package, is it in # use by a different package?). pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first() if not pkg_obj: # The name is available, so use it. Note that if we're updating an # existing package we will be updating this package's URL, so incoming # links may break. return name if exclude_existing_package: # The name is not available, and we're updating a package. Chances # are the package's name already had some random string attached # to it last time. Prevent spurrious updates to the package's URL # (choosing new random text) by just reusing the existing package's # name. pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first() if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated return pkg_obj.name # Append some random text to the URL. Hope that with five character # there will be no collsion. return name + "-" + str(uuid.uuid4())[:5]
def _test_auth_not_allowed(self, user_name=None, source=None, status=401): if not source: # Create harvest source source = HarvestSource(url=u'http://test-source.com', type='ckan') Session.add(source) Session.commit() if user_name: extra_environ = {'REMOTE_USER': user_name.encode('utf8')} else: extra_environ = {} # List res = self.app.get('/harvest', status=status, extra_environ=extra_environ) # Create res = self.app.get('/harvest/new', status=status, extra_environ=extra_environ) # Read res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) # Edit res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) # Refresh res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
def create_doi_from_identifier(package_id, identifier): """Can be called when an identifier has already been created elsewhere. Does not ensure the identifier is unique""" doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi
def test_auth_publisher_profile_different_publisher(self): # Create a source for publisher 1 source = HarvestSource(url=u'http://test-source.com', type='ckan', publisher_id=self.publisher1.id) Session.add(source) Session.commit() extra_environ = { 'REMOTE_USER': self.publisher2_user.name.encode('utf8') } # List (Publihsers can see the sources list) res = self.app.get('/harvest', extra_environ=extra_environ) assert 'Harvesting Sources' in res # Create res = self.app.get('/harvest/new', extra_environ=extra_environ) assert 'New harvest source' in res assert 'publisher_id' in res # Check that this publihser is not allowed to manage sources from other publishers status = 401 # Read res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) # Edit res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) # Refresh res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
def setup(srid=None): if package_extent_table is None: define_spatial_tables(srid) log.debug('Spatial tables defined in memory') if model.package_table.exists(): if not Table('geometry_columns', meta.metadata).exists() or \ not Table('spatial_ref_sys', meta.metadata).exists(): raise Exception( 'The spatial extension is enabled, but PostGIS ' 'has not been set up in the database. ' 'Please refer to the "Setting up PostGIS" section in the README.' ) if not package_extent_table.exists(): try: package_extent_table.create() except Exception as e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if package_extent_table.exists(): Session.execute('DROP TABLE package_extent') Session.commit() raise e log.debug('Spatial tables created') else: log.debug('Spatial tables already exist') # Future migrations go here else: log.debug('Spatial tables creation deferred')
def _refresh_harvest_objects(self, harvest_object, package_id): """ Perform harvester housekeeping: - Flag the other objects of the source as not current - Set a refernce to the package in the harvest object - Flag it as current - And save the changes """ # Flag the other objects of this source as not current from ckanext.harvest.model import harvest_object_table u = update(harvest_object_table) \ .where(harvest_object_table.c.package_id == bindparam('pkg_id')) \ .values(current=False) Session.execute(u, params={'pkg_id': package_id}) Session.commit() # Refresh current object from session, otherwise the # import paster command fails # (Copied from the Gemini harvester--not sure if necessary) Session.remove() Session.add(harvest_object) Session.refresh(harvest_object) # Set reference to package in the HarvestObject and flag it as # the current one if not harvest_object.package_id: harvest_object.package_id = package_id harvest_object.current = True harvest_object.save()
def setup(srid=None): if package_extent_table is None: define_spatial_tables(srid) # log.debug('Spatial tables defined in memory') if model.package_table.exists(): if not Table('geometry_columns',meta.metadata).exists() or \ not Table('spatial_ref_sys',meta.metadata).exists(): raise Exception('The spatial extension is enabled, but PostGIS ' + \ 'has not been set up in the database. ' + \ 'Please refer to the "Setting up PostGIS" section in the README.') if not package_extent_table.exists(): try: package_extent_table.create() except Exception,e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if package_extent_table.exists(): Session.execute('DROP TABLE package_extent') Session.commit() raise e log.debug('Spatial tables created') else: # log.debug('Spatial tables already exist') doNothing = "true"
def setup_class(cls): """ Remove any initial sessions. """ Session.remove() # TODO: Should also remove test data CreateTestData.create()
def delete_tests(self): print 'Deleting all test DOIs' Session.query(DOI).filter( DOI.identifier.like('%' + TEST_PREFIX + '%')).delete(synchronize_session=False) Session.commit()
def create_unique_identifier(package_id): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ datacite_api = DOIDataCiteAPI() while True: identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000))) # Check this identifier doesn't exist in the table if not Session.query(DOI).filter(DOI.identifier == identifier).count(): # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass else: if datacite_doi.text: continue doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi
def create_application(data_dict, image): if 'image' in data_dict: del data_dict['image'] data, errors = validate(data_dict, application_schema()) if errors: raise ValidationError(errors, error_summary(errors)) application = Application( name=Application.generate_name(data.get('title')), title=data.get('title'), url=data.get('url'), description=data.get('description'), featured=data.get('featured'), submitter=data.get('submitter'), developer=data.get('developer'), developer_url=data.get('developer_url'), license=data.get('license'), code_url=data.get('code_url'), api_url=data.get('api_url'), ) tags = data.get('tags', '').split(' ') application.update_tags(tags) if image and image.filename and image.file: image = ApplicationImage(name=image.filename, data=image.file.read(), application=application) Session.add(image) application.save() return application
def create(cls, term, label, lang): inst = cls(term=term, label=label, lang=lang) Session.add(inst) Session.flush() return inst
def test_spatial_extra_base(self, app): user = factories.User() env = {"REMOTE_USER": user["name"].encode("ascii")} dataset = factories.Dataset(user=user) offset = url_for("dataset.edit", id=dataset["id"]) res = app.get(offset, extra_environ=env) data = { "name": dataset['name'], "extras__0__key": u"spatial", "extras__0__value": self.geojson_examples["point"] } res = app.post(offset, environ_overrides=env, data=data) assert "Error" not in res, res package_extent = (Session.query(PackageExtent).filter( PackageExtent.package_id == dataset["id"]).first()) geojson = json.loads(self.geojson_examples["point"]) assert package_extent.package_id == dataset["id"] from sqlalchemy import func assert (Session.query(func.ST_X( package_extent.the_geom)).first()[0] == geojson["coordinates"][0]) assert (Session.query(func.ST_Y( package_extent.the_geom)).first()[0] == geojson["coordinates"][1]) assert package_extent.the_geom.srid == self.db_srid
def _create_unique_identifier(package_doi_identifier=None): datacite_api = DOIDataCiteAPI() while True: if package_doi_identifier: identifier = os.path.join(package_doi_identifier, '{0:03}'.format(random.randint(1, 999))) query = Session.query(CeonResourceDOI) query = query.filter(CeonResourceDOI.identifier == identifier) exists = query.count() else: identifier = os.path.join( get_doi_prefix(), '{0:07}'.format(random.randint(1, 9999999))) query = Session.query(CeonPackageDOI) query = query.filter(CeonPackageDOI.identifier == identifier) exists = query.count() # Check this identifier doesn't exist in the table if not exists: # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass # TODO remove the nest 2 lines (ConnectionError) ignoring except ConnectionError: pass else: if datacite_doi.text: continue return identifier
def authenticate(self, environ, identity): """ Mimic most of UsernamePasswordAuthenticator.authenticate but add account lockout after 10 failed attempts. """ if 'login' not in identity or 'password' not in identity: return None user = User.by_name(identity.get('login')) if user is None: LOG.debug('Login failed - username %r not found', identity.get('login')) return None qgov_user = Session.query(QGOVUser).filter_by( name=identity.get('login')).first() if qgov_user.login_attempts >= 10: LOG.debug('Login as %r failed - account is locked', identity.get('login')) elif user.validate_password(identity.get('password')): # reset attempt count to 0 qgov_user.login_attempts = 0 Session.commit() return user.name else: LOG.debug('Login as %r failed - password not valid', identity.get('login')) qgov_user.login_attempts += 1 Session.commit() return None
def test_create_extent(self): package = factories.Dataset() geojson = json.loads(self.geojson_examples['point']) shape = asShape(geojson) package_extent = PackageExtent(package_id=package['id'], the_geom=WKTElement(shape.wkt, self.db_srid)) package_extent.save() assert_equals(package_extent.package_id, package['id']) if legacy_geoalchemy: assert_equals(Session.scalar(package_extent.the_geom.x), geojson['coordinates'][0]) assert_equals(Session.scalar(package_extent.the_geom.y), geojson['coordinates'][1]) assert_equals(Session.scalar(package_extent.the_geom.srid), self.db_srid) else: from sqlalchemy import func assert_equals( Session.query(func.ST_X(package_extent.the_geom)).first()[0], geojson['coordinates'][0]) assert_equals( Session.query(func.ST_Y(package_extent.the_geom)).first()[0], geojson['coordinates'][1]) assert_equals(package_extent.the_geom.srid, self.db_srid)
def test_new(self): name = 'test-spatial-dataset-1' offset = url_for(controller='package', action='new') res = self.app.get(offset, extra_environ=self.extra_environ) assert 'Add - Datasets' in res fv = res.forms['dataset-edit'] prefix = '' fv[prefix + 'name'] = name fv[prefix + 'extras__0__key'] = u'spatial' fv[prefix + 'extras__0__value'] = self.geojson_examples['point'] res = fv.submit('save', extra_environ=self.extra_environ) assert not 'Error' in res, res package = Package.get(name) # Check that a PackageExtent object has been created package_extent = Session.query(PackageExtent).filter( PackageExtent.package_id == package.id).first() geojson = json.loads(self.geojson_examples['point']) assert package_extent assert package_extent.package_id == package.id assert Session.scalar( package_extent.the_geom.x) == geojson['coordinates'][0] assert Session.scalar( package_extent.the_geom.y) == geojson['coordinates'][1] assert Session.scalar(package_extent.the_geom.srid) == self.db_srid
def _run_job_for_single_document(self,job,force_import=False,expect_gather_errors=False,expect_obj_errors=False): harvester = GeminiDocHarvester() harvester.force_import = force_import object_ids = harvester.gather_stage(job) assert object_ids, len(object_ids) == 1 if expect_gather_errors: assert len(job.gather_errors) > 0 else: assert len(job.gather_errors) == 0 assert harvester.fetch_stage(object_ids) == True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content harvester.import_stage(obj) Session.refresh(obj) if expect_obj_errors: assert len(obj.errors) > 0 else: assert len(obj.errors) == 0 job.status = u'Finished' job.save() return obj
def handle_submit(self, id): data = clean_dict(dict_fns.unflatten(tuplize_dict(parse_params( request.params)))) data['dataset_url'] = toolkit.url_for( controller='package', action='read', id=id, qualified=True ) package = get_action('package_show')(None, {'id': id}) self.fail_if_private(package, data['dataset_url']) # Comma separated config var to_addrs = config['ckanext.ands.support_emails'].split(',') subject = 'DataPortal Support: Request to publish dataset' body = base.render( 'package/doi_email.text', extra_vars=data) for email in to_addrs: mail_recipient('Dataportal support', email, subject, body) data['package_id'] = package['id'] data['user_id'] = c.userobj.id doi_request = DoiRequest(**data) Session.add(doi_request) Session.commit() h.flash_success("DOI Request sent") return toolkit.redirect_to(data['dataset_url'])
def handle_submit(self, id): data = clean_dict( dict_fns.unflatten(tuplize_dict(parse_params(request.params)))) data['dataset_url'] = toolkit.url_for(controller='package', action='read', id=id, qualified=True) package = get_action('package_show')(None, {'id': id}) self.fail_if_private(package, data['dataset_url']) # Comma separated config var to_addrs = config['ckanext.ands.support_emails'].split(',') subject = 'DataPortal Support: Request to publish dataset' body = base.render('package/doi_email.text', extra_vars=data) for email in to_addrs: mail_recipient('Dataportal support', email, subject, body) data['package_id'] = package['id'] data['user_id'] = c.userobj.id doi_request = DoiRequest(**data) Session.add(doi_request) Session.commit() h.flash_success("DOI Request sent") return toolkit.redirect_to(data['dataset_url'])
def authenticate(self, environ, identity): if not 'login' in identity or not 'password' in identity: return None user = User.by_name(identity.get('login')) if user is None: log.debug('Login failed - username %r not found', identity.get('login')) return None seedUser = Session.query(SEEDUser).filter_by( name=identity.get('login')).first() if seedUser.login_attempts >= 10: log.debug('Login as %r failed - account is locked', identity.get('login')) elif user.validate_password(identity.get('password')): # reset attempt count to 0 seedUser.login_attempts = 0 Session.commit() return user.name else: log.debug('Login as %r failed - password not valid', identity.get('login')) seedUser.login_attempts += 1 Session.commit() return None
def bbox_query_ordered(bbox, srid=None): ''' Performs a spatial query of a bounding box. Returns packages in order of how similar the data\'s bounding box is to the search box (best first). bbox - bounding box dict Returns a query object of PackageExtents, which each reference a package by ID. ''' input_geometry = _bbox_2_wkt(bbox, srid) params = {'query_bbox': str(input_geometry), 'query_srid': input_geometry.srid} # First get the area of the query box sql = "SELECT ST_Area(GeomFromText(:query_bbox, :query_srid));" params['search_area'] = Session.execute(sql, params).fetchone()[0] # Uses spatial ranking method from "USGS - 2006-1279" (Lanfear) sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom, POWER(ST_Area(ST_Intersection(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking, package_extent.package_id AS package_id FROM package_extent, package WHERE package_extent.package_id = package.id AND ST_Intersects(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid)) AND package.state = 'active' ORDER BY spatial_ranking desc""" extents = Session.execute(sql, params).fetchall() log.debug('Spatial results: %r', [('%.2f' % extent.spatial_ranking, extent.package_id) for extent in extents[:20]]) return extents
def test_create_extent(self): package = factories.Dataset() geojson = json.loads(self.geojson_examples['point']) shape = asShape(geojson) package_extent = PackageExtent(package_id=package['id'], the_geom=WKTElement( shape.wkt, self.db_srid)) package_extent.save() assert_equals(package_extent.package_id, package['id']) if legacy_geoalchemy: assert_equals(Session.scalar(package_extent.the_geom.x), geojson['coordinates'][0]) assert_equals(Session.scalar(package_extent.the_geom.y), geojson['coordinates'][1]) assert_equals(Session.scalar(package_extent.the_geom.srid), self.db_srid) else: from sqlalchemy import func assert_equals( Session.query(func.ST_X(package_extent.the_geom)).first()[0], geojson['coordinates'][0]) assert_equals( Session.query(func.ST_Y(package_extent.the_geom)).first()[0], geojson['coordinates'][1]) assert_equals(package_extent.the_geom.srid, self.db_srid)
def update_extents(): from ckan.model import PackageExtra, Package, Session conn = Session.connection() packages = [extra.package \ for extra in \ Session.query(PackageExtra).filter(PackageExtra.key == 'spatial').all()] errors = [] count = 0 for package in packages: try: value = package.extras['spatial'] log.debug('Received: %r' % value) geometry = json.loads(value) count += 1 except ValueError as e: errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id, six.text_type(e))) except TypeError as e: errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id, six.text_type(e))) save_package_extent(package.id, geometry) Session.commit() if errors: msg = 'Errors were found:\n%s' % '\n'.join(errors) print(msg) msg = "Done. Extents generated for %i out of %i packages" % (count, len(packages)) print(msg)
def _save_object_error(self, message, obj, stage=u"Fetch", line=None): err = HarvestObjectError(message=message, object=obj, stage=stage, line=line) try: err.save() except InvalidRequestError, e: Session.rollback() err.save()
def test_new(self): name = "test-spatial-dataset-1" offset = url_for(controller="package", action="new") res = self.app.get(offset, extra_environ=self.extra_environ) assert "Add - Datasets" in res fv = res.forms["dataset-edit"] prefix = "" fv[prefix + "name"] = name fv[prefix + "extras__0__key"] = u"spatial" fv[prefix + "extras__0__value"] = self.geojson_examples["point"] res = fv.submit("save", extra_environ=self.extra_environ) assert not "Error" in res, res package = Package.get(name) # Check that a PackageExtent object has been created package_extent = Session.query(PackageExtent).filter(PackageExtent.package_id == package.id).first() geojson = json.loads(self.geojson_examples["point"]) assert package_extent assert package_extent.package_id == package.id assert Session.scalar(package_extent.the_geom.x) == geojson["coordinates"][0] assert Session.scalar(package_extent.the_geom.y) == geojson["coordinates"][1] assert Session.scalar(package_extent.the_geom.srid) == self.db_srid
def bbox_query_ordered(bbox, srid=None): ''' Performs a spatial query of a bounding box. Returns packages in order of how similar the data\'s bounding box is to the search box (best first). bbox - bounding box dict Returns a query object of PackageExtents, which each reference a package by ID. ''' input_geometry = _bbox_2_wkt(bbox, srid) params = {'query_bbox': str(input_geometry), 'query_srid': input_geometry.srid} # First get the area of the query box sql = "SELECT ST_Area(ST_GeomFromText(:query_bbox, :query_srid));" params['search_area'] = Session.execute(sql, params).fetchone()[0] # Uses spatial ranking method from "USGS - 2006-1279" (Lanfear) sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom, POWER(ST_Area(ST_Intersection(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking, package_extent.package_id AS package_id FROM package_extent, package WHERE package_extent.package_id = package.id AND ST_Intersects(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid)) AND package.state = 'active' ORDER BY spatial_ranking desc""" extents = Session.execute(sql, params).fetchall() log.debug('Spatial results: %r', [('%.2f' % extent.spatial_ranking, extent.package_id) for extent in extents[:20]]) return extents
def _create_unique_identifier(package_doi_identifier=None): datacite_api = DOIDataCiteAPI() while True: if package_doi_identifier: identifier = os.path.join(package_doi_identifier, '{0:03}'.format(random.randint(1, 999))) query = Session.query(CeonResourceDOI) query = query.filter(CeonResourceDOI.identifier == identifier) exists = query.count() else: identifier = os.path.join(get_doi_prefix(), '{0:07}'.format(random.randint(1, 9999999))) query = Session.query(CeonPackageDOI) query = query.filter(CeonPackageDOI.identifier == identifier) exists = query.count() # Check this identifier doesn't exist in the table if not exists: # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass # TODO remove the nest 2 lines (ConnectionError) ignoring except ConnectionError: pass else: if datacite_doi.text: continue return identifier
def test_auth_publisher_profile_different_publisher(self): # Create a source for publisher 1 source = HarvestSource(url=u'http://test-source.com',type='ckan', publisher_id=self.publisher1.id) Session.add(source) Session.commit() extra_environ = {'REMOTE_USER': self.publisher2_user.name.encode('utf8')} # List (Publihsers can see the sources list) res = self.app.get('/harvest', extra_environ=extra_environ) assert 'Harvesting Sources' in res # Create res = self.app.get('/harvest/new', extra_environ=extra_environ) assert 'New harvest source' in res assert 'publisher_id' in res # Check that this publihser is not allowed to manage sources from other publishers status = 401 # Read res = self.app.get('/harvest/%s' % source.id, status=status, extra_environ=extra_environ) # Edit res = self.app.get('/harvest/edit/%s' % source.id, status=status, extra_environ=extra_environ) # Refresh res = self.app.get('/harvest/refresh/%s' % source.id, status=status, extra_environ=extra_environ)
def setup(self): model.Session.remove() CreateTestData.create(auth_profile='publisher') self.sysadmin_user = model.User.get('testsysadmin') self.normal_user = model.User.get('annafan') # Does not belong to a publisher self.publisher1_user = model.User.by_name('russianfan') self.publisher2_user = model.User.by_name('tester') # Create two Publishers rev = model.repo.new_revision() self.publisher1 = model.Group(name=u'test-publisher1',title=u'Test Publihser 1',type=u'publisher') Session.add(self.publisher1) self.publisher2 = model.Group(name=u'test-publisher2',title=u'Test Publihser 2',type=u'publisher') Session.add(self.publisher2) member1 = model.Member(table_name = 'user', table_id = self.publisher1_user.id, group=self.publisher1, capacity='admin') Session.add(member1) member2 = model.Member(table_name = 'user', table_id = self.publisher2_user.id, group=self.publisher2, capacity='admin') Session.add(member2) Session.commit()
def test_ckan_duplicated_name(self): dataset0 = { 'owner_org': self.org['id'], 'holder_name': 'test holder', 'holder_identifier': 'abcdef', 'notes': 'some notes', 'modified': '2000-01-01', 'theme': 'AGRI', 'frequency': 'UNKNOWN', 'publisher_name': 'publisher', 'identifier': 'aasdfa', 'publisher_identifier': 'publisher', 'resources': [], 'extras': [], } dataset1 = { 'owner_org': self.org['id'], 'title': 'duplicated title', 'name': 'duplicated-title', 'id': 'dummyid' } dataset1.update(dataset0) data = json.dumps(dataset1) harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname1', owner_org=self.org['id']) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = data h = DCATRDFHarvester() import_successful = h.import_stage(harvest_obj) self.assertTrue(import_successful, harvest_obj.errors) Session.flush() dataset1['_id'] = harvest_obj.package_id dataset2 = {'title': 'duplicated title', 'name': 'duplicated-title', 'id': 'dummyid2'} dataset2.update(dataset0) dataset2['identifier'] = 'otherid' data = json.dumps(dataset2) harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname2', owner_org=self.org['id']) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = data h = DCATRDFHarvester() import_successful = h.import_stage(harvest_obj) self.assertTrue(import_successful, harvest_obj.errors) Session.flush() dataset2['_id'] = harvest_obj.package_id # duplicated names are mangled, one should have numeric suffix pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset1['_id']) self.assertEqual(pkg_dict['title'], dataset1['title']) self.assertEqual(pkg_dict['name'], 'duplicated-title') pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset2['_id']) self.assertEqual(pkg_dict['title'], dataset2['title']) self.assertEqual(pkg_dict['name'], 'duplicated-title1')
def setup_class(self): """ Remove any initial sessions. """ Session.remove() # TODO: Should also remove test data CreateTestData.create() self.sysadmin = model.User.get('testsysadmin')
def get(cls, reference): """Returns a IssueCategory object referenced by its id or name.""" if type(reference) is int: # if reference is an integer, get by ID return Session.query(cls).filter(cls.id == reference).first() else: # if not, get by name return Session.query(cls).filter(cls.name == reference).first()
def _execute_sql(cls, script): engine = create_engine(cls.sqlalchemy_url) Session.bind = engine connection = Session.connection() connection.execute(script) Session.commit()
def _create_pkg(self): model.repo.new_revision() pkg = Package.get("annakarenina") pkg.name = "fookarenina" pkg.add_resource("www.google.com", description="foo", name="foo") Session.add(pkg) Session.commit() return pkg
def _create_pkg(self): model.repo.new_revision() pkg = Package.get('annakarenina') pkg.name = "fookarenina" pkg.add_resource('www.google.com', description='foo', name="foo") Session.add(pkg) Session.commit() return pkg
def import_stage(self, harvest_object): log.debug('In SwisstopoHarvester import_stage') if not harvest_object: log.error('No harvest object received') return False try: package_dict = json.loads(harvest_object.content) package_dict['id'] = harvest_object.guid package_dict['name'] = munge_title_to_name( package_dict['layer_name'] ) user = model.User.get(self.HARVEST_USER) context = { 'model': model, 'session': Session, 'user': self.HARVEST_USER } # Find or create group the dataset should get assigned to package_dict['groups'] = self._find_or_create_groups(context) # Find or create the organization # the dataset should get assigned to package_dict['owner_org'] = self._find_or_create_organization( context, package_dict ) # Save license url in extras extras = [] if 'license_url' in package_dict: extras.append(('license_url', package_dict['license_url'])) package_dict['extras'] = extras package = model.Package.get(package_dict['id']) model.PackageRole( package=package, user=user, role=model.Role.ADMIN ) log.debug( 'Save or update package %s (%s)' % (package_dict['name'], package_dict['id']) ) self._create_or_update_package(package_dict, harvest_object) log.debug('Save or update term translations') self._submit_term_translations(context, package_dict) Session.commit() except Exception, e: log.exception(e) raise
def update_stat_metameta_table(org_id, metameta_id, state): """ update stat on metameta table with new status """ update_sql = "UPDATE metameta SET state='%s', modified_date='%s' WHERE id ='%s' AND org_id='%s';"%(state, datetime.datetime.utcnow().isoformat(), metameta_id, org_id) result = Session.execute(update_sql) Session.commit()
def setup_class(cls): """ Remove any initial sessions. """ Session.remove() CreateTestData.create() url = url_for(controller="ckanext.sitemap.controller:SitemapController", action="view") cls.cont = cls.app.get(url) cls.content_file = StringIO(cls.cont.body)
def _save_gather_error(self, message, job): err = HarvestGatherError(message=message, job=job) try: err.save() except InvalidRequestError: Session.rollback() err.save() finally: log.error(message)
def _execute_script(script_path): conn = Session.connection() script = open(script_path, 'r').read() for cmd in script.split(';'): cmd = re.sub(r'--(.*)|[\n\t]', '', cmd) if len(cmd): conn.execute(cmd) Session.commit()
def insert_metameta_options(org_id, key, option_value): """ creates entries in metameta options table for the organisation. value fields are null """ sql = "INSERT INTO metameta_options(id, org_id, key, option_value, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s');"%(str(uuid.uuid4()), org_id, key, option_value, datetime.datetime.utcnow().isoformat()) result = Session.execute(sql) Session.commit() return
def populate_harvest_job(self, harvest_job, set_ids, config, client): # Check if this source has been harvested before previous_job = Session.query(HarvestJob) \ .filter(HarvestJob.source == harvest_job.source) \ .filter(HarvestJob.gather_finished != None) \ .filter(HarvestJob.id != harvest_job.id) \ .order_by(HarvestJob.gather_finished.desc()) \ .limit(1).first() last_time = None if previous_job and previous_job.finished and model.Package.get(harvest_job.source.id).metadata_modified < previous_job.gather_started: last_time = previous_job.gather_started.isoformat() # Collect package ids package_ids = list(self.get_package_ids(set_ids, config, last_time, client)) log.debug('Identifiers: %s', package_ids) if not self._recreate(harvest_job) and package_ids: converted_identifiers = {} for identifier in package_ids: converted_identifiers[datapid_to_name(identifier)] = identifier if identifier.endswith(u'm'): converted_identifiers[datapid_to_name(u"%ss" % identifier[0:-1])] = identifier for package in model.Session.query(model.Package).filter(model.Package.name.in_(converted_identifiers.keys())).all(): converted_name = package.name if converted_identifiers[converted_name] not in package_ids: converted_name = "%sm" % converted_name[0:-1] package_ids.remove(converted_identifiers[converted_name]) if previous_job: for previous_error in [error.guid for error in Session.query(HarvestObject). filter(HarvestObject.harvest_job_id == previous_job.id). filter(HarvestObject.state == 'ERROR').all()]: if previous_error not in package_ids: package_ids.append(previous_error) try: object_ids = [] if len(package_ids): for package_id in islice(package_ids, config['limit']) if 'limit' in config else package_ids: # Create a new HarvestObject for this identifier obj = HarvestObject(guid=package_id, job=harvest_job) obj.save() object_ids.append(obj.id) log.debug('Object ids: {i}'.format(i=object_ids)) return object_ids else: self._save_gather_error('No packages received for URL: {u}'.format( u=harvest_job.source.url), harvest_job) return None except Exception as e: self._save_gather_error('Gather: {e}'.format(e=e), harvest_job) raise
def setup_postgis_tables(): conn = Session.connection() script_path = os.path.join(os.path.dirname(os.path.abspath( __file__ )), 'scripts', 'postgis.sql') script = open(script_path,'r').read() for cmd in script.split(';'): cmd = re.sub(r'--(.*)|[\n\t]','',cmd) if len(cmd): conn.execute(cmd) Session.commit()
def init_db(): #Setting up custom fiels table for locales try: custom_field_table.create() except Exception,e: # Make sure the table does not remain incorrectly created if custom_field_table.exists(): Session.execute('DROP TABLE custom_field_table') Session.commit() raise e