def purge_queues(): connection = get_connection() if config.get('ckan.harvest.mq.type') == 'ampq': channel = connection.channel() channel.queue_purge(queue='ckan.harvest.gather') channel.queue_purge(queue='ckan.harvest.fetch') return if config.get('ckan.harvest.mq.type') == 'redis': connection.flushall()
def get_ofs(): storage_backend = config.get('ofs.impl') kw = {} for k in config.keys(): if not k.startswith('ofs.') or k == 'ofs.impl': continue kw[k[4:]] = config.get(k) ofs = get_impl(storage_backend)(**kw) return ofs
def _redis_connection(self): # redis connection try: r = redis.StrictRedis( host=config.get('ckan.harvest.mq.hostname', HOSTNAME), port=int(config.get('ckan.harvest.mq.port', REDIS_PORT)), db=int(config.get('ckan.harvest.mq.redis_db', REDIS_DB)) ) return r except: print str(datetime.datetime.now()) + ' PUBLISH_OGC: Error Connecting to Redis while building publish_ogc_queue'
def resubmit_jobs(): if config.get('ckan.harvest.mq.type') != 'redis': return redis = get_connection() log.debug('_create_or_update_package') harvest_object_pending = redis.keys('harvest_object_id:*') for key in harvest_object_pending: date_of_key = datetime.datetime.strptime(redis.get(key), "%Y-%m-%d %H:%M:%S.%f") if (datetime.datetime.now() - date_of_key).seconds > 180: # 3 minuites for fetch and import max redis.rpush('harvest_object_id', json.dumps({'harvest_object_id': key.split(':')[-1]}) ) redis.delete(key) harvest_jobs_pending = redis.keys('harvest_job_id:*') for key in harvest_jobs_pending: date_of_key = datetime.datetime.strptime(redis.get(key), "%Y-%m-%d %H:%M:%S.%f") if (datetime.datetime.now() - date_of_key).seconds > 7200: # 3 hours for a gather redis.rpush('harvest_job_id', json.dumps({'harvest_job_id': key.split(':')[-1]}) ) redis.delete(key)
def as_dict(self, only_active_children=True): """ Returns this model as a dictionary, including all child comments (as dicts) if if has any """ name = 'anonymous' u = model.User.get(self.user_id) if u: name = u.fullname or u.name # Hack if name == config.get('ckan.site_id', 'ckan_site_user') or not name: name = 'anonymous' d = {} d['id'] = self.id d['user_id'] = self.user_id d['user_display_name'] = name d['username'] = u.name d['user_email_hash'] = u.email_hash d['subject'] = self.subject d['content'] = self.comment d['state'] = self.state d['thread_id'] = self.thread_id d['creation_date'] = self.creation_date.isoformat() if self.modified_date: d['modified_date'] = self.modified_date.isoformat() if only_active_children is True: d['comments'] = [c.as_dict() for c in self.children if c.state == 'active'] else: d['comments'] = [c.as_dict() for c in self.children] return d
def setup_class(self): print ("") harvest_model() # Make the Paste TestApp that we'll use to simulate HTTP requests to CKAN. self.app = paste.fixture.TestApp(pylons.test.pylonsapp) # get config options config = ConfigParser.RawConfigParser({ 'ckan_host': '0.0.0.0', }) config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests_config.cfg')) self.SampleDatasetFile = config.get('tests', 'sample_file_path') if not self.SampleDatasetFile: raise Exception('You must add the sample xml path to the tests ' ' configuration file') sampleDatasetFile = os.path.join(os.path.dirname(os.path.abspath(__file__)), self.SampleDatasetFile) if not os.path.exists(sampleDatasetFile): raise Exception('The file %s not found' % sampleDatasetFile) with open(sampleDatasetFile, 'rb') as content_file: contentDataset = content_file.read() self.contentDataset = contentDataset
def setup(self): # Add sysadmin user harvest_user = model.User(name=u'harvest', password=u'test') model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System()) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context ={'model':model, 'session':Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2'} if config.get('ckan.harvest.auth.profile') == u'publisher': # Create a publisher user rev = model.repo.new_revision() self.publisher_user = model.User(name=u'test-publisher-user',password=u'test') self.publisher = model.Group(name=u'test-publisher',title=u'Test Publihser',type=u'publisher') Session.add(self.publisher_user) Session.add(self.publisher) Session.commit() member = model.Member(table_name = 'user', table_id = self.publisher_user.id, group=self.publisher, capacity='admin') Session.add(member) Session.commit()
def get_ga_code(): """ This helper simply returns Google Analytics code, as defined in the settings (ckan.ini). usage: {{ h.get_ga_code() }} """ return config.get('google_analytics_code', '')
def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) # Auto-set some values based on configuration from pylons import config if toolkit.asbool(config.get('ckan.comments.moderation', 'true')): self.approval_status = COMMENT_PENDING else: # If user wants first comment moderated and the user who wrote this hasn't # got another comment, put it into moderation, otherwise approve if toolkit.asbool(config.get('ckan.comments.moderation.first_only', 'true')) and \ Comment.count_for_user(self.user, COMMENT_APPROVED) == 0: self.approval_status = COMMENT_PENDING else: self.approval_status = COMMENT_APPROVED
def setup(self): # Add sysadmin user harvest_user = model.User(name=u"harvest", password=u"test") model.add_user_to_role(harvest_user, model.Role.ADMIN, model.System()) Session.add(harvest_user) Session.commit() package_schema = default_update_package_schema() self.context = { "model": model, "session": Session, "user": u"harvest", "schema": package_schema, "api_version": "2", } if config.get("ckan.harvest.auth.profile") == u"publisher": # Create a publisher user rev = model.repo.new_revision() self.publisher_user = model.User(name=u"test-publisher-user", password=u"test") self.publisher = model.Group(name=u"test-publisher", title=u"Test Publihser", type=u"publisher") Session.add(self.publisher_user) Session.add(self.publisher) Session.commit() member = model.Member( table_name="user", table_id=self.publisher_user.id, group=self.publisher, capacity="admin" ) Session.add(member) Session.commit()
def __init__(self, app, app_conf): self.app = app self.wordpress_client = None self._user_name_prefix = 'user_wp' minutes_between_checking_wordpress_cookie = config.get('minutes_between_checking_wordpress_cookie', 30) self.seconds_between_checking_wordpress_cookie = int(minutes_between_checking_wordpress_cookie) * 60
def get_connection(): backend = config.get('ckan.harvest.mq.type', MQ_TYPE) if backend == 'ampq': return get_connection_ampq() if backend == 'redis': return get_connection_redis() raise Exception('not a valid queue type %s' % backend)
def resubmit_jobs(): ''' Examines the fetch and gather queues for items that are suspiciously old. These are removed from the queues and placed back on them afresh, to ensure the fetch & gather consumers are triggered to process it. ''' if config.get('ckan.harvest.mq.type') != 'redis': return redis = get_connection() # fetch queue harvest_object_pending = redis.keys(get_fetch_routing_key() + ':*') for key in harvest_object_pending: date_of_key = datetime.datetime.strptime(redis.get(key), "%Y-%m-%d %H:%M:%S.%f") # 3 minutes for fetch and import max if (datetime.datetime.now() - date_of_key).seconds > 180: redis.rpush(get_fetch_routing_key(), json.dumps({'harvest_object_id': key.split(':')[-1]}) ) redis.delete(key) # gather queue harvest_jobs_pending = redis.keys(get_gather_routing_key() + ':*') for key in harvest_jobs_pending: date_of_key = datetime.datetime.strptime(redis.get(key), "%Y-%m-%d %H:%M:%S.%f") # 3 hours for a gather if (datetime.datetime.now() - date_of_key).seconds > 7200: redis.rpush(get_gather_routing_key(), json.dumps({'harvest_job_id': key.split(':')[-1]}) ) redis.delete(key)
def get_connection(): backend = config.get('ckan.harvest.mq.type', MQ_TYPE) if backend in ('amqp', 'ampq'): # "ampq" is for compat with old typo return get_connection_amqp() if backend == 'redis': return get_connection_redis() raise Exception('not a valid queue type %s' % backend)
def wordpress_url(): """ This helper simply returns WordPress URL, as defined in the settings (ckan.ini). The URL is used to dynamically craft URLs to the right places (notably in dev and staging environments). usage: {{ h.wordpress_url() }} """ return config.get('wordpress_url', 'www.hri.fi')
def __before__(self): # Check Google Docs parameters username = config.get('pdeu.gdocs.username',None) password = config.get('pdeu.gdocs.password',None) dockey = config.get('pdeu.gdocs.dockey',None) sheet = config.get('pdeu.gdocs.sheet','Sheet1') if not username or not password or not dockey: log.error('Google Docs connection settings not specified') abort(500) # Setup connection self.client = gdata.spreadsheet.text_db.DatabaseClient( username=username,password=password) db = self.client.GetDatabases(dockey)[0] self.table = db.GetTables(name=sheet)[0] self.table.LookupFields()
def _log_out(self, environ, new_headers): ''' This function "logs out" the current user by (forcibly) removing login cookies. Note that the removal is two-fold: * First we populate new_headers with expired cookies ** Always expire CKAN cookie ** Expire WordPress cookie only if we're about to log out * Then we clear away the present login cookies from current headers at environ['HTTP_COOKIE'] Note that for logout - and logout only - expiring the WordPress cookie is the vital part here - if simply removed generally browsers will ignore the removal and reset the cookie. And once reset the cookies presence will trigger the CKAN cookie to regenerate, which completely nullifies the logout process! ''' # Don't progress the user info for this request. environ['REMOTE_USER'] = None environ['repoze.who.identity'] = None # Expire CKAN cookies so they will be deleted. identity = {} headers = environ['repoze.who.plugins']['hri_auth_tkt'].forget(environ, identity) if headers: new_headers.extend(headers) environ['HTTP_HOST'] = config.get('wordpress_url', 'www.hri.fi') headers = environ['repoze.who.plugins']['hri_auth_tkt'].forget(environ, identity) if headers: new_headers.extend(headers) # Expire WordPress cookies so they too will be deleted - this is the more important one # as presence of WordPress cookies retrigger the generation of CKAN cookies! cookies = environ.get('HTTP_COOKIE', '').split('; ') if '_logout' in str(environ['REQUEST_URI']): for cookie in cookies: if 'wordpress' in cookie: new_headers.append(('Set-Cookie', cookie + '="INVALID"; Path=/; Max-Age=0; Expires=Thu, 01 Jan 1970 02:00:00')) # Remove cookie from request, so that if we are doing a login again in this request then # it is aware of the cookie removal. #log.debug('Removing cookies from request: %r', environ.get('HTTP_COOKIE', '')) cookies = '; '.join([cookie for cookie in cookies if 'auth_tkt' not in cookie and 'wordpress' not in cookie]) environ['HTTP_COOKIE'] = cookies #raise Exception(str(new_headers)) #raise Exception('Got this:' + str(environ['PATH_INFO'])) #log.error('Cookies in request now: %r', environ['HTTP_COOKIE']) log.debug('Logged out WordPress user') # Redirect using headers rather than force, so that the headers set here actually get set... if '_logout' in str(environ['REQUEST_URI']): new_headers.append(('Location', 'http://' + config.get('wordpress_url', 'www.hri.fi') + '/fi/wp-login.php'))
def update_config(self, config): """ This IConfigurer implementation causes CKAN to look in the ```templates``` directory when looking for the group_form() """ here = os.path.dirname(__file__) rootdir = os.path.dirname(os.path.dirname(here)) template_dir = os.path.join(rootdir, "ckanext", "publisher_form", "templates") config["extra_template_paths"] = ",".join([template_dir, config.get("extra_template_paths", "")])
def setup(self): print ("") print ("TestUM:setup() before each test method") # Add sysadmin user self.harvestUser = model.User(name=u'harvest', password=u'test', sysadmin=True) model.Session.add(self.harvestUser) model.Session.commit() source_fixture = { 'title': 'Test Source', 'name': 'test-source', 'url': u'xml/sample.xml', 'source_type': u'ngds' } context = { 'model': model, 'session': model.Session, 'user': u'harvest' } if config.get('ckan.harvest.auth.profile') == u'publisher' \ and not 'publisher_id' in source_fixture: source_fixture['publisher_id'] = self.publisher.id source_dict=get_action('harvest_source_create')(context, source_fixture) self.oHarvestSource = HarvestSource.get(source_dict['id']) job_dict=get_action('harvest_job_create')(context,{'source_id': self.oHarvestSource.id}) self.oHarvestJob = HarvestJob.get(job_dict['id']) context = { 'model' : model, 'session': model.Session, 'ignore_auth': True, } data_dict = { 'guid' : 'guid', 'content' : self.contentDataset, 'job_id' : self.oHarvestJob.id, 'extras' : { 'a key' : 'a value' }, } oHarvestObject = toolkit.get_action('harvest_object_create')(context, data_dict) self.oHarvestObject = HarvestObject.get(oHarvestObject['id']) package_schema = default_update_package_schema() self.context = { 'model':model, 'session': model.Session, 'user':u'harvest', 'schema':package_schema, 'api_version': '2' }
def snoobi_url(): """ This helper queries the settings (ckan.ini) for snoobi_enabled setting. usage: <!-- BEGIN Snoobi v1.4 --> <script type="text/javascript" src="{{ h.snoobi_url() }}"></script> <!-- END Snoobi v1.4 --> """ return config.get('snoobi_url', '')
def define_spatial_tables(db_srid=None): global package_extent_table if not db_srid: db_srid = int(config.get('ckan.spatial.srid', DEFAULT_SRID)) else: db_srid = int(db_srid) package_extent_table = setup_spatial_table(PackageExtent, db_srid)
def get_connection_amqp(): try: port = int(config.get('ckan.harvest.mq.port', PORT)) except ValueError: port = PORT userid = config.get('ckan.harvest.mq.user_id', USERID) password = config.get('ckan.harvest.mq.password', PASSWORD) hostname = config.get('ckan.harvest.mq.hostname', HOSTNAME) virtual_host = config.get('ckan.harvest.mq.virtual_host', VIRTUAL_HOST) credentials = pika.PlainCredentials(userid, password) parameters = pika.ConnectionParameters(host=hostname, port=port, virtual_host=virtual_host, credentials=credentials, frame_max=10000) log.debug("pika connection using %s" % parameters.__dict__) return pika.BlockingConnection(parameters)
def purge_queues(): backend = config.get('ckan.harvest.mq.type', MQ_TYPE) connection = get_connection() if backend in ('amqp', 'ampq'): channel = connection.channel() channel.queue_purge(queue='ckan.harvest.gather') channel.queue_purge(queue='ckan.harvest.fetch') return if backend == 'redis': connection.flushall()
def snoobi_is_enabled(): """ This helper queries the settings (ckan.ini) for snoobi_enabled setting. usage: {% if h.snoobi_is_enabled() %}...{% endif %} """ enabled = config.get('snoobi_enabled', False) if enabled == 'true' or enabled == 'True' or enabled == True: return True else: return False
def update_config(self, config): """ This IConfigurer implementation causes CKAN to look in the ```templates``` directory when looking for the group_form() """ here = os.path.dirname(__file__) rootdir = os.path.dirname(os.path.dirname(here)) template_dir = os.path.join(rootdir, 'ckanext', 'publisher_form', 'templates') config['extra_template_paths'] = ','.join([template_dir, config.get('extra_template_paths', '')])
def index(self, context=None): c = p.toolkit.c data = request.params or {} errors = {} error_summary = {} print data print config.get("email_to") if not data == {}: import ckan.lib.mailer if data.get("contact_us.nochange") != "http://": errors["contact_us.nochange"] = [_("The value was edited")] if not data.get("contact_us.name"): errors["contact_us.name"] = [_("Missing value")] if not data.get("contact_us.email"): errors["contact_us.email"] = [_("Missing value")] elif not validate_email(data.get("contact_us.email")): errors["contact_us.email"] = [_("Invalid email")] if not data.get("contact_us.message"): errors["contact_us.message"] = [_("Missing value")] if errors == {}: try: emails = config.get("contact_us.email") for v in emails.split(","): ckan.lib.mailer._mail_recipient( "Admin", v, data.get("contact_us.name"), data.get("contact_us.email"), "Contact form", data.get("contact_us.message"), ) h.flash_success(_("Email sent")) data = {} except ckan.lib.mailer.MailerException: raise # error_summary = errors vars = {"data": data, "errors": errors, "error_summary": error_summary} return render("ckanext/contact_us/index.html", extra_vars=vars)
def get_publisher(routing_key): connection = get_connection() backend = config.get('ckan.harvest.mq.type', MQ_TYPE) if backend in ('amqp', 'ampq'): channel = connection.channel() channel.exchange_declare(exchange=EXCHANGE_NAME, durable=True) return Publisher(connection, channel, EXCHANGE_NAME, routing_key=routing_key) if backend == 'redis': return RedisPublisher(connection, routing_key)
def update_config(self, config): """ This IConfigurer implementation causes CKAN to look in the ```templates``` directory when looking for the group_form() """ templates = 'templates' if p.toolkit.asbool(config.get('ckan.legacy_templates', False)): templates = 'templates_legacy' p.toolkit.add_template_directory(config, templates) p.toolkit.add_public_directory(config, 'public') # Override /group/* as the default groups urls config['ckan.default.group_type'] = 'organization'
def _create_source_and_job(self, source_fixture): context = {"model": model, "session": Session, "user": u"harvest"} if config.get("ckan.harvest.auth.profile") == u"publisher" and not "publisher_id" in source_fixture: source_fixture["publisher_id"] = self.publisher.id source_dict = get_action("harvest_source_create")(context, source_fixture) source = HarvestSource.get(source_dict["id"]) assert source job = self._create_job(source.id) return source, job
def get_consumer(queue_name, routing_key): connection = get_connection() backend = config.get('ckan.harvest.mq.type', MQ_TYPE) if backend in ('amqp', 'ampq'): channel = connection.channel() channel.exchange_declare(exchange=EXCHANGE_NAME, durable=True) channel.queue_declare(queue=queue_name, durable=True) channel.queue_bind(queue=queue_name, exchange=EXCHANGE_NAME, routing_key=routing_key) return channel if backend == 'redis': return RedisConsumer(connection, routing_key)
def __before__(self, action, **params): super(ViewController, self).__before__(action, **params) c.publisher_auth = (config.get('ckan.harvest.auth.profile', None) == 'publisher')
from sqlalchemy import Column, ForeignKey, Index, and_, or_, orm, types from sqlalchemy.exc import SQLAlchemyError as SAError, IntegrityError from sqlalchemy.ext.declarative import declarative_base, declared_attr from ckanext.dcat.profiles import DCT log = logging.getLogger(__name__) __all__ = ['Subtheme', 'SubthemeLabel', 'clear_subthemes'] DeclarativeBase = declarative_base(metadata=meta.metadata) CONFIG_THEME_LANGS = 'ckan.dcatapit.subthemes.langs' THEME_LANGS = (config.get(CONFIG_THEME_LANGS) or '').split(' ') DEFAULT_LANG = config.get('ckan.locale_default', 'it') class ThemeToSubtheme(DeclarativeBase, DomainObject): __tablename__ = 'dcatapit_theme_to_subtheme' VOCAB_NAME = 'eu_themes' id = Column(types.Integer, primary_key=True) tag_id = Column(types.Unicode, ForeignKey(Tag.id), nullable=False) subtheme_id = Column(types.Integer, ForeignKey('dcatapit_subtheme.id'), nullable=False) subtheme = orm.relationship('Subtheme') tag = orm.relationship(Tag)
def setup_class(cls): if (config.get('ckan.harvest.auth.profile') != 'publisher'): raise SkipTest('Skipping publisher auth profile tests. Set ckan.harvest.auth.profile = \'publisher\' to run them') super(TestAuthPublisherProfile,cls).setup_class()
from ckanext.dgulocal.model import OrganizationExtent if not orgid: log.error('No organization provided') return shape = asShape(geojson) extent = Session.query(OrganizationExtent)\ .filter(OrganizationExtent.organization_id == orgid).first() if not extent: extent = OrganizationExtent(organization_id=orgid) extent.the_geom = WKTSpatialElement(shape.wkt, db_srid) extent.save() db_srid = int(config.get('ckan.spatial.srid', DEFAULT_SRID)) organization_extent_table = Table( 'organization_extent', meta.metadata, Column('organization_id', types.UnicodeText, primary_key=True), GeometryExtensionColumn('the_geom', Geometry(2, srid=db_srid))) meta.mapper(OrganizationExtent, organization_extent_table, properties={ 'the_geom': GeometryColumn(organization_extent_table.c.the_geom, comparator=PGComparator) }) # enable the DDL extension
def setup_class(cls): if (config.get('ckan.harvest.auth.profile','') != ''): raise SkipTest('Skipping default auth profile tests. Set ckan.harvest.auth.profile = \'\' to run them') super(TestAuthDefaultProfile,cls).setup_class()
def get_gather_queue_name(): return 'ckan.harvest.{0}.gather'.format( config.get('ckan.site_id', 'default'))
def test_harvest_fields_service(self): # Create source source_fixture = { 'title': 'Test Source', 'name': 'test-source', 'url': u'http://127.0.0.1:8999/gemini2.1/service1.xml', 'source_type': u'gemini-single' } source, job = self._create_source_and_job(source_fixture) harvester = GeminiDocHarvester() object_ids = harvester.gather_stage(job) assert object_ids, len(object_ids) == 1 # No gather errors assert len(job.gather_errors) == 0 # Fetch stage always returns True for Single Doc harvesters assert harvester.fetch_stage(object_ids) == True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content assert obj.guid == u'test-service-1' harvester.import_stage(obj) # No object errors assert len(obj.errors) == 0 package_dict = get_action('package_show_rest')(self.context, { 'id': obj.package_id }) assert package_dict expected = { 'name': u'one-scotland-address-gazetteer-web-map-service-wms', 'title': u'One Scotland Address Gazetteer Web Map Service (WMS)', 'tags': [u'Addresses', u'Scottish National Gazetteer'], 'notes': u'This service displays its contents at larger scale than 1:10000. [edited]', } for key, value in expected.iteritems(): if not package_dict[key] == value: raise AssertionError('Unexpected value for %s: %s (was expecting %s)' % \ (key, package_dict[key], value)) if config.get('ckan.harvest.auth.profile') == u'publisher': assert package_dict['groups'] == [self.publisher.id] expected_extras = { # Basic 'harvest_object_id': obj.id, 'guid': obj.guid, 'UKLP': u'True', 'resource-type': u'service', 'access_constraints': u'["No restriction on public access"]', 'responsible-party': u'The Improvement Service (owner)', 'provider': u'The Improvement Service', 'contact-email': u'*****@*****.**', # Spatial 'bbox-east-long': u'0.5242365625', 'bbox-north-lat': u'61.0243', 'bbox-south-lat': u'54.4764484375', 'bbox-west-long': u'-9.099786875', 'spatial': u'{"type": "Polygon", "coordinates": [[[0.5242365625, 54.4764484375], [-9.099786875, 54.4764484375], [-9.099786875, 61.0243], [0.5242365625, 61.0243], [0.5242365625, 54.4764484375]]]}', # Other 'coupled-resource': u'[{"href": ["http://scotgovsdi.edina.ac.uk/srv/en/csw?service=CSW&request=GetRecordById&version=2.0.2&outputSchema=http://www.isotc211.org/2005/gmd&elementSetName=full&id=250ea276-48e2-4189-8a89-fcc4ca92d652"], "uuid": ["250ea276-48e2-4189-8a89-fcc4ca92d652"], "title": []}]', 'dataset-reference-date': u'[{"type": "publication", "value": "2011-09-08"}]', 'frequency-of-update': u'daily', 'licence': u'["Use of the One Scotland Gazetteer data used by this this service is available to any organisation that is a member of the One Scotland Mapping Agreement. It is not currently commercially available", "http://www.test.gov.uk/licenseurl"]', 'licence_url': u'http://www.test.gov.uk/licenseurl', 'metadata-date': u'2011-09-08T16:07:32', 'metadata-language': u'eng', 'spatial-data-service-type': u'other', 'spatial-reference-system': u'OSGB 1936 / British National Grid (EPSG:27700)', 'temporal_coverage-from': u'["1904-06-16"]', 'temporal_coverage-to': u'["2004-06-16"]', } for key, value in expected_extras.iteritems(): if not key in package_dict['extras']: raise AssertionError('Extra %s not present in package' % key) if not package_dict['extras'][key] == value: raise AssertionError('Unexpected value for extra %s: %s (was expecting %s)' % \ (key, package_dict['extras'][key], value)) expected_resource = { 'ckan_recommended_wms_preview': 'True', 'description': 'Link to the GetCapabilities request for this service', 'name': 'Web Map Service (WMS)', 'resource_locator_function': 'download', 'resource_locator_protocol': 'OGC:WMS-1.3.0-http-get-capabilities', 'resource_type': None, 'size': None, 'url': u'http://127.0.0.1:8999/wms/capabilities.xml', 'verified': 'True', } resource = package_dict['resources'][0] for key, value in expected_resource.iteritems(): if not resource[key] == value: raise AssertionError('Unexpected value in resource for %s: %s (was expecting %s)' % \ (key, resource[key], value)) assert datetime.strptime( resource['verified_date'], '%Y-%m-%dT%H:%M:%S.%f').date() == date.today() assert resource['format'].lower() == 'wms'
def _get_config(self): c.startColor = config.get('pdeu.map.start_color', '#FFFFFF') c.endColor = config.get('pdeu.map.end_color', '#045A8D') c.num_groups = config.get('pdeu.map.groups', 5)
def test_harvest_fields_dataset(self): # Create source source_fixture = { 'url': u'http://127.0.0.1:8999/single/dataset1.xml', 'type': u'gemini-single' } source, job = self._create_source_and_job(source_fixture) harvester = GeminiDocHarvester() object_ids = harvester.gather_stage(job) assert object_ids, len(object_ids) == 1 # No gather errors assert len(job.gather_errors) == 0 # Fetch stage always returns True for Single Doc harvesters assert harvester.fetch_stage(object_ids) == True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content assert obj.guid == u'test-dataset-1' harvester.import_stage(obj) # No object errors assert len(obj.errors) == 0 package_dict = get_action('package_show_rest')(self.context, { 'id': obj.package_id }) assert package_dict expected = { 'name': u'country-parks-scotland', 'title': u'Country Parks (Scotland)', 'tags': [u'Nature conservation'], 'notes': u'Parks are set up by Local Authorities to provide open-air recreation facilities close to towns and cities. [edited]' } for key, value in expected.iteritems(): if not package_dict[key] == value: raise AssertionError('Unexpected value for %s: %s (was expecting %s)' % \ (key, package_dict[key], value)) if config.get('ckan.harvest.auth.profile') == u'publisher': assert package_dict['groups'] == [self.publisher.id] expected_extras = { # Basic 'harvest_object_id': obj.id, 'guid': obj.guid, 'resource-type': u'dataset', 'responsible-party': u'Scottish Natural Heritage (custodian, distributor)', 'access_constraints': u'["Copyright Scottish Natural Heritage"]', 'contact-email': u'*****@*****.**', 'provider': '', # Spatial 'bbox-east-long': u'0.205857204', 'bbox-north-lat': u'61.06066944', 'bbox-south-lat': u'54.529947158', 'bbox-west-long': u'-8.97114288', 'spatial': u'{"type":"Polygon","coordinates":[[[0.205857204, 54.529947158],[0.205857204, 61.06066944], [-8.97114288, 61.06066944], [-8.97114288, 54.529947158], [0.205857204, 54.529947158]]]}', # Other 'coupled-resource': u'[]', 'dataset-reference-date': u'[{"type": "creation", "value": "2004-02"}, {"type": "revision", "value": "2006-07-03"}]', 'frequency-of-update': u'irregular', 'licence': u'["Reference and PSMA Only", "http://www.test.gov.uk/licenseurl"]', 'licence_url': u'http://www.test.gov.uk/licenseurl', 'metadata-date': u'2011-09-23T10:06:08', 'metadata-language': u'eng', 'spatial-reference-system': u'urn:ogc:def:crs:EPSG::27700', 'temporal_coverage-from': u'["1998"]', 'temporal_coverage-to': u'["2010"]', } for key, value in expected_extras.iteritems(): if not key in package_dict['extras']: raise AssertionError('Extra %s not present in package' % key) if not package_dict['extras'][key] == value: raise AssertionError('Unexpected value for extra %s: %s (was expecting %s)' % \ (key, package_dict['extras'][key], value)) expected_resource = { 'description': 'Test Resource Description', 'format': u'', 'name': 'Test Resource Name', 'resource_locator_function': 'download', 'resource_locator_protocol': 'test-protocol', 'resource_type': None, 'size': None, 'url': u'https://gateway.snh.gov.uk/pls/apex_ddtdb2/f?p=101', } resource = package_dict['resources'][0] for key, value in expected_resource.iteritems(): if not resource[key] == value: raise AssertionError('Unexpected value in resource for %s: %s (was expecting %s)' % \ (key, resource[key], value))
def get_fetch_routing_key(): return 'ckanext-harvest:{0}:harvest_object_id'.format( config.get('ckan.site_id', 'default'))
def get_gather_routing_key(): return 'ckanext-harvest:{0}:harvest_job_id'.format( config.get('ckan.site_id', 'default'))
def get_fetch_queue_name(): return 'ckan.harvest.{0}.fetch'.format( config.get('ckan.site_id', 'default'))
def do_load(g, vocab_name: str): def _update_label_counter(cnt, action): action_mapping = { DBAction.CREATED: 'label_added', DBAction.UPDATED: 'label_updated', DBAction.NONE: 'label_exists', DBAction.ERROR: 'label_skipped', } try: action_mapped = action_mapping[action] cnt.incr(action_mapped) except KeyError: log.error(f'Unknown action {action}') if vocab_name == LANGUAGE_THEME_NAME: for offered_language in config.get('ckan.locales_offered', 'it').split(' '): if offered_language not in LANGUAGE_IMPORT_FILTER: log.info( f"'{offered_language}' language is fitlered out in this plugin " f"and will be skipped during the import stage (vocabulary '{vocab_name}')" ) # Loading the RDF vocabulary log.debug(f'Loading graph for {vocab_name}') if vocab_name == REGIONS_NAME: vocab_load = do_load_regions else: vocab_load = do_load_vocab ids = [] cnt = Counter() concepts = vocab_load(g, vocab_name) user = toolkit.get_action('get_site_user')({'ignore_auth': True}, {}) context = {'user': user['name'], 'ignore_auth': True} log.debug("Using site user '%s'", user['name']) vocab = Vocabulary.get(vocab_name) if vocab: log.info(f'Vocabulary "{vocab_name}" already exists, skipping...') else: log.info(f'Creating vocabulary "{vocab_name}"') vocab = Vocabulary(vocab_name) vocab.save() for concept in concepts: tag_name = concept['name'] if len(tag_name) < 2: log.error( f"Tag too short: skipping tag '{tag_name}' for vocabulary '{vocab_name}'" ) cnt.incr('tag_skipped') continue tag = model.Tag.by_name(tag_name, vocab) if tag is None: log.info(f"Adding tag {vocab_name}::{tag_name}") tag = model.Tag(name=tag_name, vocabulary_id=vocab.id) tag.save() cnt.incr('tag_added') else: cnt.incr('tag_exists') log.debug(f'Creating multilang labels for tag {vocab_name}:{tag_name}') for pref_label in concept['labels']: if pref_label['lang'] not in LANG_MAPPING_SKOS_TO_CKAN: cnt.incr('label_skipped') continue tag_lang = LANG_MAPPING_SKOS_TO_CKAN[pref_label['lang']] tag_text = pref_label['text'] try: log.debug('Storing tag: name[%s] lang[%s] label[%s]', tag_name, tag_lang, tag_text) except UnicodeEncodeError: log.error(f'Storing tag: name[{tag_name}] lang[{tag_lang}]') action, tl_id = interfaces.persist_tag_multilang( tag, tag_lang, tag_text, vocab) _update_label_counter(cnt, action) ids.append(tag.id) # delete from DB old tags not found in input graph tag_not_in_voc = model.Session.query(model.Tag)\ .filter(model.Tag.id.notin_(ids))\ .filter(model.Tag.vocabulary_id==vocab.id)\ .all() for tag_to_delete in tag_not_in_voc: pkg_cnt = len(tag_to_delete.packages) if pkg_cnt == 0: tag_to_delete.delete() Session.commit() log.info( f"Deleting tag {tag_to_delete} from vocabulary '{vocab_name}'") cnt.incr('tag_deleted') else: log.info( f"Cannot delete tag {tag_to_delete} from vocabulary '{vocab_name}' used in {pkg_cnt} packages" ) cnt.incr('tag_notdeletable') log.info(f'Vocabulary successfully loaded ({vocab_name})') return cnt.get()
def populate_theme_groups(instance, clean_existing=False): """ For given instance, it finds groups from mapping corresponding to Dataset's themes, and will assign dataset to those groups. Existing groups will be removed, if clean_existing is set to True. This utilizes `ckanext.dcatapit.theme_group_mapping.add_new_groups` configuration option. If it's set to true, and mapped group doesn't exist, new group will be created. """ add_new = toolkit.asbool(config.get(DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS)) themes = [] for ex in (instance.get('extras') or []): if ex['key'] == 'theme': _t = ex['value'] if isinstance(_t, list): themes.extend(_t) else: try: tval = json.loads(_t) except Exception: tval = [{'theme': t, 'subthemes': []} for t in _decode_list(_t)] for tv in tval: themes.append(tv['theme']) if not themes: log.debug("no theme from %s", instance) return instance theme_map = get_theme_to_groups() if not theme_map: log.warning("Theme to group map is empty") return instance if not isinstance(themes, list): themes = [themes] all_groups = set() for theme in themes: _groups = theme_map.get(theme) if not _groups: continue all_groups = all_groups.union(set(_groups)) if clean_existing: _clean_groups(instance) groups = [] for gname in all_groups: gname = gname.strip() if not gname: continue group = Group.get(gname) or _get_group_from_session(gname) if add_new and group is None: group = Group(name=gname) Session.add(group) if group: groups.append(group) if Session.new: # flush to db, refresh with ids rev = Session.revision Session.flush() Session.revision = rev groups = [(Group.get(g.name) if g.id is None else g) for g in groups] _add_groups(instance['id'], set(groups)) # preserve revision, since it's not a commit yet rev = Session.revision Session.flush() Session.revision = rev return instance
def leaderboard(self, id=None): c = p.toolkit.c c.solr_core_url = config.get('ckanext.stats.solr_core_url', 'http://solr.okfn.org/solr/ckan') return p.toolkit.render('ckanext/stats/leaderboard.html')
def get_connection_redis(): import redis return redis.StrictRedis( host=config.get('ckan.harvest.mq.hostname', HOSTNAME), port=int(config.get('ckan.harvest.mq.port', REDIS_PORT)), db=int(config.get('ckan.harvest.mq.redis_db', REDIS_DB)))
def populate_theme_groups(instance, clean_existing=False): """ For given instance, it finds groups from mapping corresponding to Dataset's themes, and will assign dataset to those groups. Existing groups will be removed, if clean_existing is set to True. This utilizes `ckanext.dcatapit.theme_group_mapping.add_new_groups` configuration option. If it's set to true, and mapped group doesn't exist, new group will be created. """ add_new = toolkit.asbool( config.get(DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS)) themes = [] for ex in (instance.get('extras') or []): if ex['key'] == FIELD_THEMES_AGGREGATE: _t = ex['value'] if isinstance(_t, list): themes.extend(_t) else: try: tval = json.loads(_t) except Exception: log.warning(f'Trying old themes format for {_t}') tval = [{ 'theme': t, 'subthemes': [] } for t in _t.strip('{}').split(',')] for tv in tval: themes.append(tv['theme']) break # we don't need any other info - if there are 'themes' is ok to bypass them elif ex['key'] == 'theme': _t = ex['value'] if isinstance(_t, list): themes.extend(_t) else: try: tval = json.loads(_t) except Exception: log.warning(f'Trying old themes format for {_t}') tval = _t.strip('{}').split(',') themes.extend(tval) # dont break the for loop: if aggregates are there, they get precedence if not themes: log.debug('no theme from %s', instance) return instance theme_map = get_theme_to_groups() if not theme_map: log.warning('Theme to group map is empty') return instance if not isinstance(themes, list): themes = [themes] all_groups = set() for theme in themes: _groups = theme_map.get(theme) if not _groups: continue all_groups = all_groups.union(set(_groups)) if clean_existing: _clean_groups(instance) groups = [] for gname in all_groups: gname = gname.strip() if not gname: continue group = Group.get(gname) or _get_group_from_session(gname) if add_new and group is None: group = Group(name=gname) Session.add(group) if group: groups.append(group) if Session.new: # flush to db, refresh with ids Session.flush() groups = [(Group.get(g.name) if g.id is None else g) for g in groups] _add_groups(instance['id'], set(groups)) Session.flush() return instance
from ckanext.dcatapit.dcat.profiles import (DCATAPIT) from ckanext.dcatapit import validators from ckanext.dcat.profiles import (DCAT, DCT, FOAF, OWL) from ckanext.dcatapit.mapping import DCATAPIT_THEMES_MAP, map_nonconformant_groups from ckanext.dcatapit.mapping import DCATAPIT_THEME_TO_MAPPING_SOURCE, DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS from ckanext.dcatapit.harvesters.ckanharvester import CKANMappingHarvester from ckanext.harvest.model import HarvestObject from ckanext.dcatapit.plugin import DCATAPITGroupMapper from ckanext.dcatapit.tests.utils import load_themes from ckanext.dcatapit.model.license import _get_graph, load_from_graph, License DEFAULT_LANG = config.get('ckan.locale_default', 'en') eq_ = nose.tools.eq_ ok_ = nose.tools.ok_ assert_true = nose.tools.assert_true class BaseParseTest(object): def _extras(self, dataset): extras = {} for extra in dataset.get('extras'): extras[extra['key']] = extra['value'] return extras def _get_file_contents(self, file_name): path = os.path.join(os.path.dirname(__file__), '..', '..', '..',
from routes.mapper import SubMapper, Mapper as _Mapper log = logging.getLogger(__file__) try: from ckan.lib.plugins import DefaultTranslation except ImportError: class DefaultTranslation(): pass LOCALIZED_RESOURCES_KEY = 'ckanext.dcatapit.localized_resources' LOCALIZED_RESOURCES_ENABLED = toolkit.asbool( config.get(LOCALIZED_RESOURCES_KEY, "False")) MLR = None if LOCALIZED_RESOURCES_ENABLED: from ckanext.multilang.plugin import MultilangResourcesAux MLR = MultilangResourcesAux() # admin chose to enable the localized resource, so let the ImportError out class DCATAPITPackagePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm, DefaultTranslation): # IDatasetForm plugins.implements(plugins.IDatasetForm) # IConfigurer plugins.implements(plugins.IConfigurer)
def cmd_import_agrovoc(self, in_file, *args, **kwargs): """ Import AGROVOC terms from RDF file syntax: import_agrovoc rdf_file """ OFFERED_LANGS = (config.get('ckan.locales_offered') or 'en es fr de it').lower().split(' ') header = ('parent', 'term',) + tuple(('lang:{}'.format(L) for L in OFFERED_LANGS)) + ( 'property:parents',) rdata = [] rdata.append(header) g = Graph() g.parse(in_file, format='nt') for o, p, s in g.triples((None, RDF.type, SKOS.Concept)): cid = str(o).split('/')[-1] row = {'term': cid} for label_r in g.triples((o, SKOS.prefLabel, None)): label = label_r[2] if not label.language in OFFERED_LANGS: continue row['lang:{}'.format(label.language)] = label.value iparents = g.triples((o, SKOS.broader, None)) parents = [] for to in iparents: parent = to[-1] row['parent'] = str(parent).split('/')[-1] parents.append(row['parent']) if not parents: row['parent'] = None row['property:parents'] = ','.join(parents) row_data = [] for col in header: if col.startswith('lang'): val = row.get(col) or row.get('lang:en') or row.get('lang:fr') else: val = row[col] row_data.append(val.encode('utf-8') if isinstance(val, unicode) else val) if row['parent']: rdata.append(row_data) else: # top-level should be first rdata.insert(1, row_data) log.info('AGROVOC terms parsed: %s', len(rdata)) csvdata = StringIO() w = csv.writer(csvdata) w.writerows(rdata) csvdata.seek(0) voc_name = Vocabulary.VOCABULARY_AGROVOC try: voc = Vocabulary.get(voc_name) except ValueError: voc = Vocabulary.create(voc_name, has_relations=True) count = load_vocabulary(voc_name, csvdata) log.info('AGROVOC terms imported: %s', count) cleanup_stats = find_unused_terms(voc_name, 'fao_agrovoc') if cleanup_stats['datasets']: print("Following dataset have terms not present in vocabulary:") for dname, tvals in sorted(cleanup_stats['datasets'].items()): print(' dataset', dname,':', ','.join(tvals))
def do_wordpress_login_logout(self, environ, new_headers): '''Looks at cookies and auth_tkt and may tell auth_tkt to log-in or log-out to a WordPress user.''' is_ckan_cookie, wordpress_session_id = self._parse_cookies(environ) # Is there a WordPress cookie? We may want to do a log-in for it. if wordpress_session_id: #log.error('Got this:' + wordpress_session_id) # Look at any authtkt logged in user details authtkt_identity = environ.get('repoze.who.identity') if authtkt_identity: authtkt_user_name = authtkt_identity[ 'repoze.who.userid'] #same as environ.get('REMOTE_USER', '') authtkt_wordpress_session_id = authtkt_identity['userdata'] else: authtkt_user_name = '' authtkt_wordpress_session_id = '' if not authtkt_user_name: # authtkt not logged in, so log-in with the WordPress cookie self._do_wordpress_login(environ, wordpress_session_id, new_headers) return #elif authtkt_user_name.startswith(self._user_name_prefix): else: # A WordPress user is logged in with authtkt. # See if that the authtkt matches the wp cookie's session if authtkt_wordpress_session_id != wordpress_session_id: # WordPress cookie session has changed, so tell authkit to forget the old one # before we do the new login. log.error( 'WordPress cookie session has changed from %r to %r.', authtkt_wordpress_session_id, wordpress_session_id) self._log_out(environ, new_headers) self._do_wordpress_login(environ, wordpress_session_id, new_headers) return else: log.debug('WordPress cookie session stayed the same.') # WordPress cookie session matches the authtkt - leave user logged in # Just check that authtkt cookie is not too old - in the # mean-time, WordPress may have invalidated the user, for example. if self.is_authtkt_cookie_too_old(authtkt_identity): log.info('Rechecking WordPress cookie') self._log_out(environ, new_headers) self._do_wordpress_login(environ, wordpress_session_id, new_headers) return #else: # There's a WordPress cookie, but user is logged in as a normal CKAN user. # Ignore the WordPress cookie. #return elif not wordpress_session_id and is_ckan_cookie: # Deal with the case where user is logged out of WordPress # i.e. user WAS were logged in with WordPress and the cookie was # deleted (probably because WordPress logged out) # Is the logged in user a WordPress user? user_name = environ.get('REMOTE_USER', '') if user_name and user_name.startswith(self._user_name_prefix): log.error( 'Was logged in as WordPress user %r but WordPress cookie no longer there. Forcing logout.', user_name) else: log.error( 'User %r was logged into CKAN but not WordPress. Forcing logout.', user_name) # Force log out. self._log_out(environ, new_headers) # Redirect to the same URL to ensure no more cookies get set up and our force log out comes in effect. #redirect_to('http://' + config.get('wordpress_url', 'www.hri.fi') + environ['REQUEST_URI']) redirect_to('http://' + config.get('wordpress_url', 'www.hri.fi') + '/fi/wp-login.php?action=logout')