def bbox_query_ordered(bbox, srid=None): ''' Performs a spatial query of a bounding box. Returns packages in order of how similar the data\'s bounding box is to the search box (best first). bbox - bounding box dict Returns a query object of PackageExtents, which each reference a package by ID. ''' input_geometry = _bbox_2_wkt(bbox, srid) params = {'query_bbox': str(input_geometry), 'query_srid': input_geometry.srid} # First get the area of the query box sql = "SELECT ST_Area(ST_GeomFromText(:query_bbox, :query_srid));" params['search_area'] = Session.execute(sql, params).fetchone()[0] # Uses spatial ranking method from "USGS - 2006-1279" (Lanfear) sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom, POWER(ST_Area(ST_Intersection(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking, package_extent.package_id AS package_id FROM package_extent, package WHERE package_extent.package_id = package.id AND ST_Intersects(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid)) AND package.state = 'active' ORDER BY spatial_ranking desc""" extents = Session.execute(sql, params).fetchall() log.debug('Spatial results: %r', [('%.2f' % extent.spatial_ranking, extent.package_id) for extent in extents[:20]]) return extents
def bbox_query_ordered(bbox, srid=None): ''' Performs a spatial query of a bounding box. Returns packages in order of how similar the data\'s bounding box is to the search box (best first). bbox - bounding box dict Returns a query object of PackageExtents, which each reference a package by ID. ''' input_geometry = _bbox_2_wkt(bbox, srid) params = {'query_bbox': str(input_geometry), 'query_srid': input_geometry.srid} # First get the area of the query box sql = "SELECT ST_Area(GeomFromText(:query_bbox, :query_srid));" params['search_area'] = Session.execute(sql, params).fetchone()[0] # Uses spatial ranking method from "USGS - 2006-1279" (Lanfear) sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom, POWER(ST_Area(ST_Intersection(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking, package_extent.package_id AS package_id FROM package_extent, package WHERE package_extent.package_id = package.id AND ST_Intersects(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid)) AND package.state = 'active' ORDER BY spatial_ranking desc""" extents = Session.execute(sql, params).fetchall() log.debug('Spatial results: %r', [('%.2f' % extent.spatial_ranking, extent.package_id) for extent in extents[:20]]) return extents
def setup(srid=None): if package_extent_table is None: define_spatial_tables(srid) log.debug('Spatial tables defined in memory') if model.package_table.exists(): if not Table('geometry_columns', meta.metadata).exists() or \ not Table('spatial_ref_sys', meta.metadata).exists(): raise Exception( 'The spatial extension is enabled, but PostGIS ' 'has not been set up in the database. ' 'Please refer to the "Setting up PostGIS" section in the README.' ) if not package_extent_table.exists(): try: package_extent_table.create() except Exception as e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if package_extent_table.exists(): Session.execute('DROP TABLE package_extent') Session.commit() raise e log.debug('Spatial tables created') else: log.debug('Spatial tables already exist') # Future migrations go here else: log.debug('Spatial tables creation deferred')
def _refresh_harvest_objects(self, harvest_object, package_id): """ Perform harvester housekeeping: - Flag the other objects of the source as not current - Set a refernce to the package in the harvest object - Flag it as current - And save the changes """ # Flag the other objects of this source as not current from ckanext.harvest.model import harvest_object_table u = update(harvest_object_table) \ .where(harvest_object_table.c.package_id == bindparam('pkg_id')) \ .values(current=False) Session.execute(u, params={'pkg_id': package_id}) Session.commit() # Refresh current object from session, otherwise the # import paster command fails # (Copied from the Gemini harvester--not sure if necessary) Session.remove() Session.add(harvest_object) Session.refresh(harvest_object) # Set reference to package in the HarvestObject and flag it as # the current one if not harvest_object.package_id: harvest_object.package_id = package_id harvest_object.current = True harvest_object.save()
def setup(srid=None): if package_extent_table is None: define_spatial_tables(srid) # log.debug('Spatial tables defined in memory') if model.package_table.exists(): if not Table('geometry_columns',meta.metadata).exists() or \ not Table('spatial_ref_sys',meta.metadata).exists(): raise Exception('The spatial extension is enabled, but PostGIS ' + \ 'has not been set up in the database. ' + \ 'Please refer to the "Setting up PostGIS" section in the README.') if not package_extent_table.exists(): try: package_extent_table.create() except Exception,e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if package_extent_table.exists(): Session.execute('DROP TABLE package_extent') Session.commit() raise e log.debug('Spatial tables created') else: # log.debug('Spatial tables already exist') doNothing = "true"
def init_db(): #Setting up custom fiels table for locales try: custom_field_table.create() except Exception,e: # Make sure the table does not remain incorrectly created if custom_field_table.exists(): Session.execute('DROP TABLE custom_field_table') Session.commit() raise e
def init_db(): #Setting up custom fiels table for locales try: custom_field_table.create() except Exception, e: # Make sure the table does not remain incorrectly created if custom_field_table.exists(): Session.execute('DROP TABLE custom_field_table') Session.commit() raise e
def bbox_query(bbox, srid=None): ''' Performs a spatial query of a bounding box. bbox - bounding box dict Returns a query object of PackageExtents, which each reference a package by ID. ''' input_geometry = _bbox_2_wkt(bbox, srid) params = { 'query_bbox': str(input_geometry), 'query_srid': input_geometry.srid } sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom, package_extent.package_id AS package_id FROM package_extent, package WHERE package_extent.package_id = package.id AND ST_CoveredBy(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid)) AND package.state = 'active'""" extents = Session.execute(sql, params).fetchall() #extents = Session.query(PackageExtent) \ # .filter(PackageExtent.package_id==Package.id) \ # .filter(PackageExtent.the_geom.intersects(input_geometry)) \ # .filter(Package.state==u'active') return extents
def bbox_query(bbox,srid=None): ''' Performs a spatial query of a bounding box. bbox - bounding box dict Returns a query object of PackageExtents, which each reference a package by ID. ''' input_geometry = _bbox_2_wkt(bbox, srid) params = {'query_bbox': str(input_geometry), 'query_srid': input_geometry.srid} sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom, package_extent.package_id AS package_id FROM package_extent, package WHERE package_extent.package_id = package.id AND ST_CoveredBy(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid)) AND package.state = 'active'""" extents = Session.execute(sql, params).fetchall() #extents = Session.query(PackageExtent) \ # .filter(PackageExtent.package_id==Package.id) \ # .filter(PackageExtent.the_geom.intersects(input_geometry)) \ # .filter(Package.state==u'active') return extents
def setup(): log.debug('Multilingual tables defined in memory') #Setting up package multilang table if not package_multilang_table.exists(): try: package_multilang_table.create() except Exception, e: # Make sure the table does not remain incorrectly created if package_multilang_table.exists(): Session.execute('DROP TABLE package_multilang') Session.commit() raise e log.info('Package Multilingual table created')
def setup(): log.debug('DCAT_AP-IT tables defined in memory') #Setting up tag multilang table if not dcatapit_vocabulary_table.exists(): try: dcatapit_vocabulary_table.create() except Exception, e: # Make sure the table does not remain incorrectly created if dcatapit_vocabulary_table.exists(): Session.execute('DROP TABLE dcatapit_vocabulary') Session.commit() raise e log.info('DCATAPIT Tag Vocabulary table created')
def setup(): log.debug('Multilingual tables defined in memory') #Setting up package multilang table if not package_multilang_table.exists(): try: package_multilang_table.create() except Exception,e: # Make sure the table does not remain incorrectly created if package_multilang_table.exists(): Session.execute('DROP TABLE package_multilang') Session.commit() raise e log.info('Package Multilingual table created')
def migrate(self): ''' Migrate filestore over in our very HDXish way :) ''' results = Session.execute( "select id, revision_id, url from resource " "where resource_type = 'file.upload' " "and (url_type <> 'upload' or url_type is null)" "and url like '%storage%'") for id, revision_id, url in results: # Give it a second, would you? time.sleep(0.7) url_parts = urlparse(url) url_parts = url_parts.path.split("/") filename = url_parts[len(url_parts) - 1] response = requests.get(url, stream=True) if response.status_code != 200: print "failed to fetch %s (code %s)" % (url, response.status_code) continue resource_upload = ResourceUpload({'id': id}) assert resource_upload.storage_path, "no storage configured aborting" directory = resource_upload.get_directory(id) filepath = resource_upload.get_path(id) try: os.makedirs(directory) except OSError, e: ## errno 17 is file already exists if e.errno != 17: raise with open(filepath, 'wb+') as out: for chunk in response.iter_content(1024): if chunk: out.write(chunk) Session.execute("update resource set url_type = 'upload', " "url = '%s' where id = '%s'" % (filename, id)) Session.execute( "update resource_revision set url_type = 'upload', " "url = '%s' where id = '%s' and " "revision_id = '%s'" % (filename, id, revision_id)) Session.commit() print "Saved url %s" % url
def update_stat_metameta_table(org_id, metameta_id, state): """ update stat on metameta table with new status """ update_sql = "UPDATE metameta SET state='%s', modified_date='%s' WHERE id ='%s' AND org_id='%s';"%(state, datetime.datetime.utcnow().isoformat(), metameta_id, org_id) result = Session.execute(update_sql) Session.commit()
def insert_metameta_options(org_id, key, option_value): """ creates entries in metameta options table for the organisation. value fields are null """ sql = "INSERT INTO metameta_options(id, org_id, key, option_value, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s');"%(str(uuid.uuid4()), org_id, key, option_value, datetime.datetime.utcnow().isoformat()) result = Session.execute(sql) Session.commit() return
def update_stat_metameta_table(org_id, metameta_id, state): """ update stat on metameta table with new status """ update_sql = "UPDATE metameta SET state='%s', modified_date='%s' WHERE id ='%s' AND org_id='%s';" % ( state, datetime.datetime.utcnow().isoformat(), metameta_id, org_id) result = Session.execute(update_sql) Session.commit()
def init_db(): """ Create board, app, mark tables in the database. Prepopulate category table with default data. """ if not model.package_table.exists(): # during tests? return session = Session() for table in [board_table, app_table, mark_table]: if not table.exists(): table.create(checkfirst=True) log.debug("Apps {} have been created".format(table.name)) for board_name, board_desc in DEFAULT_BOARDS.iteritems(): if not Board.get_by_slug(slugify(board_name)): board = Board() board.name = board_name board.slug = slugify(board_name) board.description = board_desc session.add(board) log.debug("Add {0} to {1} table".format(board_name, board_table.name)) session.commit() if not migration_table.exists(): migration_table.create(checkfirst=True) session.commit() migration_number = session.query(migration_table).count() log.debug('Migration number: %s', migration_number) migration_sql_list = [] for counter, sql in enumerate(migration_sql_list, start=1): if migration_number < counter: try: session.execute(sql) except ProgrammingError: session.rollback() finally: session.execute(migration_table.insert()) session.commit() session.close()
def insert_metameta_table(org_id, data): """ creates entries in metameta table for the organisation. value fields are null """ errors = {} #key if not (data.has_key('key') and len(data['key'].strip())): #TODO validation errors.update({'key': _('%s field is required.' % ('key'))}) return errors key = data['key'].strip() #label: if not set, insert key if data.has_key('label') and len(data['label'].strip()): label = data['label'].strip() else: label = key.title() #convert non-alphanumeric to underscore key = create_search_key(key) if org_has_defaults(org_id, key): #TODO validation errors.update({ 'key': _('%s has been already used by the organization.' % (key)) }) return errors #readonly attribute readonly = str(data.get('readonly', False)).lower() == 'True'.lower() #validator validator = data.get('validator', validator_not_empty) #default_value default_value = data.get('default_value', '') #field_type field_type = data.get('field_type', field_type_text) min_value = max_value = 0.0 if field_type == field_type_text and validator == validator_range: min_value = data.get('min_value', 0.0) max_value = data.get('max_value', 0.0) sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, (CASE WHEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s')>101 THEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s') ELSE 101 END), '%s');" % ( str(uuid.uuid4()), org_id, key, default_value, label, validator, state_active, True, True, readonly, field_type, float(min_value), float(max_value), org_id, org_id, datetime.datetime.utcnow().isoformat()) result = Session.execute(sql) Session.commit() return errors
def version(): u'''Return current version''' log.info(u"Returning current DB version") try: from ckan.model import Session ver = Session.execute(u'select version from ' u'migrate_version;').fetchall() click.secho(u"Latest data schema version: {0}".format(ver[0][0]), bold=True) except Exception as e: error_shout(e)
def select_metametadata_for_org(org_id, metameta_id=None, active_only=True, withPrefix=False, sort_order_from=None, sort_order_to=None): """ parameter: string parameter: metameta_id parameter: boolean withPrefix returns dict of metameta data """ records = [] if org_id is None or len(org_id.strip()) == 0: return records sql = "SELECT id, key, default_value, label, revision_id, state, validator, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date FROM metameta WHERE org_id = '" + org_id + "' " if metameta_id is not None: sql = sql + " AND id = '" + metameta_id + "'" if active_only: sql = sql + " AND state = '" + state_active + "'" if sort_order_from and sort_order_to: sql = sql + " AND sort_order BETWEEN %d and %d " % ( int(sort_order_from), int(sort_order_to)) elif sort_order_from: sql = sql + " AND sort_order >= %d " % (int(sort_order_from)) elif sort_order_to: sql = sql + " AND sort_order <= %d " % (int(sort_order_to)) sql = sql + "ORDER BY sort_order, presettable, custom, key ASC;" rows = Session.execute(sql) for row in rows: key = row['key'] if not withPrefix: key = remove_prefix_key(key) records.append({ 'id': row['id'], 'key': key, 'default_value': row['default_value'], 'label': row['label'], 'revision_id': row['revision_id'], 'state': row['state'], 'validator': row['validator'], 'custom': row['custom'], 'presettable': row['presettable'], 'readonly': row['readonly'], 'field_type': row['field_type'], 'min_value': row['min_value'], 'max_value': row['max_value'], 'sort_order': row['sort_order'], 'modified_date': row['modified_date'], }) return records
def init_tables(engine): if not Table('geometry_columns', meta.metadata).exists() or \ not Table('spatial_ref_sys', meta.metadata).exists(): raise Exception('PostGIS has not been set up in the database. Please ' 'refer to the "Setting up PostGIS" section in the ' 'ckanext-spatial README.') if not organization_extent_table.exists(): try: organization_extent_table.create() except: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if organization_extent_table.exists(): Session.execute('DROP TABLE organization_extent') Session.commit() raise log.debug('organization_extent table created in the db') else: log.debug('organization_extent table already exists in the db')
def remove_metameta_options(org_id, key, option_value=None): """ creates entries in metameta options table for the organisation. value fields are null """ sql = "DELETE FROM metameta_options WHERE org_id = '%s' AND key = '%s' "%(org_id, key) if option_value: sql = sql + " AND option_value='%s' "%(option_value) sql = sql + ";" result = Session.execute(sql) Session.commit() return
def insert_metameta_options(org_id, key, option_value): """ creates entries in metameta options table for the organisation. value fields are null """ sql = "INSERT INTO metameta_options(id, org_id, key, option_value, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s');" % ( str(uuid.uuid4()), org_id, key, option_value, datetime.datetime.utcnow().isoformat()) result = Session.execute(sql) Session.commit() return
def update_metameta_table(org_id, metameta_id, data): """ update metameta table with new values """ errors = {} #key if not (data.has_key('key') and len(data['key'].strip())): #TODO validation errors.update({'key': _('%s field is required.' % ('key'))}) return errors key = data['key'].strip() #label: if not set, insert key if data.has_key('label') and len(data['label'].strip()): label = data['label'].strip() else: label = key.title() #convert non-alphanumeric to underscore key = create_search_key(key) if org_has_defaults(org_id, key, metameta_id): #TODO validation errors.update({ 'key': _('%s has been already used by the organization.' % (key)) }) return errors #readonly attribute readonly = str(data.get('readonly', False)).lower() == 'True'.lower() #validator validator = data.get('validator', validator_not_empty) #default_value default_value = data.get('default_value', '') #field_type field_type = data.get('field_type', field_type_text) min_value = max_value = 0.0 if field_type == field_type_text and validator == validator_range: min_value = data.get('min_value', 0.0) max_value = data.get('max_value', 0.0) update_sql = "UPDATE metameta SET key='%s', default_value='%s', label='%s', validator='%s', readonly=%r, field_type='%s', min_value=%f, max_value=%f, modified_date='%s' WHERE id ='%s' AND org_id='%s';" % ( key, default_value, label, validator, readonly, field_type, float(min_value), float(max_value), datetime.datetime.utcnow().isoformat(), metameta_id, org_id) result = Session.execute(update_sql) Session.commit() return errors
def remove_metameta_options(org_id, key, option_value=None): """ creates entries in metameta options table for the organisation. value fields are null """ sql = "DELETE FROM metameta_options WHERE org_id = '%s' AND key = '%s' " % ( org_id, key) if option_value: sql = sql + " AND option_value='%s' " % (option_value) sql = sql + ";" result = Session.execute(sql) Session.commit() return
def setup(): if lb_dataset_reports is None or lb_dataset_actions is None: define_tables() log.debug('Lightbase report tables defined in memory') if model.repo.are_tables_created(): if not lb_dataset_actions.exists(): try: lb_dataset_actions.create() except Exception, e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if lb_dataset_actions.exists(): Session.execute('DROP TABLE lb_dataset_actions') Session.commit() raise e log.debug('Dataset actions tables created') else: log.debug('Dataset actions tables already exist') # Future migrations go here if not lb_dataset_reports.exists(): try: lb_dataset_reports.create() except Exception, e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if lb_dataset_reports.exists(): Session.execute('DROP TABLE lb_dataset_reports') Session.commit() raise e log.debug('Lightbase report tables created')
def setup(): if lb_dataset_reports is None or lb_dataset_actions is None: define_tables() log.debug('Lightbase report tables defined in memory') if model.repo.are_tables_created(): if not lb_dataset_actions.exists(): try: lb_dataset_actions.create() except Exception,e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if lb_dataset_actions.exists(): Session.execute('DROP TABLE lb_dataset_actions') Session.commit() raise e log.debug('Dataset actions tables created') else: log.debug('Dataset actions tables already exist') # Future migrations go here if not lb_dataset_reports.exists(): try: lb_dataset_reports.create() except Exception,e: # Make sure the table does not remain incorrectly created # (eg without geom column or constraints) if lb_dataset_reports.exists(): Session.execute('DROP TABLE lb_dataset_reports') Session.commit() raise e log.debug('Lightbase report tables created')
def migrate_v2(): log.debug('Migrating apps tables to v2. This may take a while...') statements=''' CREATE TABLE application_tag_temp AS SELECT * FROM application_tag; CREATE TABLE idea_tag_temp AS SELECT * FROM idea_tag; ''' Session.execute(statements) Session.commit() application_tag_table.drop() idea_tag_table.drop() application_tag_table.create() idea_tag_table.create() Session.commit() apps_tags = Session.execute('SELECT application_id,tag_id from application_tag_temp') ideas_tags = Session.execute('SELECT idea_id,tag_id from idea_tag_temp') for app_tag in apps_tags: Session.execute('''INSERT INTO application_tag (id,application_id,tag_id) VALUES ('%s','%s','%s')''' % (make_uuid(), app_tag[0],app_tag[1])) for idea_tag in ideas_tags: Session.execute('''INSERT INTO idea_tag (id,idea_id,tag_id) VALUES ('%s','%s','%s')''' % (make_uuid(), idea_tag[0],idea_tag[1])) statements=''' DROP TABLE application_tag_temp; DROP TABLE idea_tag_temp; ''' Session.execute(statements) Session.commit() log.info('Apps tables migrated to v2')
def select_metameta_options(org_id, key, option_value=None): """ parameter: string org_id parameter: key parameter: option_value returns dict of metameta options """ records = [] sql = "SELECT option_value FROM metameta_options WHERE org_id = '%s' and key = '%s' "%(org_id, key) if option_value: sql = sql + "option_value = '%s' "%(option_value) sql = sql + " order by option_value ASC;" rows = Session.execute(sql) for row in rows: records.append(row['option_value']) return records
def select_metametadata_for_org(org_id, metameta_id=None, active_only=True, withPrefix=False, sort_order_from=None, sort_order_to=None): """ parameter: string parameter: metameta_id parameter: boolean withPrefix returns dict of metameta data """ records = [] if org_id is None or len(org_id.strip())==0: return records sql = "SELECT id, key, default_value, label, revision_id, state, validator, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date FROM metameta WHERE org_id = '" + org_id + "' " if metameta_id is not None: sql = sql + " AND id = '" + metameta_id +"'" if active_only: sql = sql + " AND state = '" + state_active +"'" if sort_order_from and sort_order_to: sql = sql + " AND sort_order BETWEEN %d and %d "%(int(sort_order_from), int(sort_order_to)) elif sort_order_from: sql = sql + " AND sort_order >= %d "%(int(sort_order_from)) elif sort_order_to: sql = sql + " AND sort_order <= %d "%(int(sort_order_to)) sql = sql + "ORDER BY sort_order, presettable, custom, key ASC;" rows = Session.execute(sql) for row in rows: key = row['key'] if not withPrefix: key = remove_prefix_key(key) records.append({ 'id': row['id'], 'key': key, 'default_value': row['default_value'], 'label': row['label'], 'revision_id': row['revision_id'], 'state': row['state'], 'validator': row['validator'], 'custom': row['custom'], 'presettable': row['presettable'], 'readonly' :row['readonly'], 'field_type': row['field_type'], 'min_value': row['min_value'], 'max_value': row['max_value'], 'sort_order': row['sort_order'], 'modified_date': row['modified_date'], }) return records
def update_metameta_table(org_id, metameta_id, data): """ update metameta table with new values """ errors = {} #key if not (data.has_key('key') and len(data['key'].strip())): #TODO validation errors.update({'key': _('%s field is required.'%('key'))}) return errors key = data['key'].strip() #label: if not set, insert key if data.has_key('label') and len(data['label'].strip()): label = data['label'].strip() else: label = key.title() #convert non-alphanumeric to underscore key = create_search_key(key) if org_has_defaults(org_id, key, metameta_id): #TODO validation errors.update({'key': _('%s has been already used by the organization.'%(key))}) return errors #readonly attribute readonly = str(data.get('readonly', False)).lower()=='True'.lower() #validator validator = data.get('validator', validator_not_empty) #default_value default_value = data.get('default_value', '') #field_type field_type = data.get('field_type', field_type_text) min_value = max_value = 0.0 if field_type == field_type_text and validator == validator_range: min_value = data.get('min_value', 0.0) max_value = data.get('max_value', 0.0) update_sql = "UPDATE metameta SET key='%s', default_value='%s', label='%s', validator='%s', readonly=%r, field_type='%s', min_value=%f, max_value=%f, modified_date='%s' WHERE id ='%s' AND org_id='%s';"%(key, default_value, label, validator, readonly, field_type, float(min_value), float(max_value), datetime.datetime.utcnow().isoformat(), metameta_id, org_id) result = Session.execute(update_sql) Session.commit() return errors
def insert_metameta_table(org_id, data): """ creates entries in metameta table for the organisation. value fields are null """ errors = {} #key if not (data.has_key('key') and len(data['key'].strip())): #TODO validation errors.update({'key': _('%s field is required.'%('key'))}) return errors key = data['key'].strip() #label: if not set, insert key if data.has_key('label') and len(data['label'].strip()): label = data['label'].strip() else: label = key.title() #convert non-alphanumeric to underscore key = create_search_key(key) if org_has_defaults(org_id, key): #TODO validation errors.update({'key': _('%s has been already used by the organization.'%(key))}) return errors #readonly attribute readonly = str(data.get('readonly', False)).lower()=='True'.lower() #validator validator = data.get('validator', validator_not_empty) #default_value default_value = data.get('default_value', '') #field_type field_type = data.get('field_type', field_type_text) min_value = max_value = 0.0 if field_type == field_type_text and validator == validator_range: min_value = data.get('min_value', 0.0) max_value = data.get('max_value', 0.0) sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, (CASE WHEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s')>101 THEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s') ELSE 101 END), '%s');"%(str(uuid.uuid4()), org_id, key, default_value, label, validator, state_active, True, True, readonly, field_type, float(min_value), float(max_value), org_id, org_id, datetime.datetime.utcnow().isoformat()) result = Session.execute(sql) Session.commit() return errors
def init_metadata(org_id): #retrieve metameta records for org_id records = select_metametadata_for_org(org_id, metameta_id=None, active_only=False, withPrefix=True) sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES " update_count = 0 for key, values in PRESET_METAMETADATA.iteritems(): if not key or len(key) == 0: continue print '********key=%s*******' % (key) metameta_key = add_prefix_key(key) #check whether key exists or not key_exists = False for record in records: if record['key'] == metameta_key: key_exists = True break if not key_exists: #check field type if values.get('field_type') == field_type_single_choice: presets = get_metameta_options_preset(metameta_key) update_metameta_options(org_id, metameta_key, presets) update_count += 1 #create a record for a text field sql = sql + " ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, %d, '%s')," % ( str(uuid.uuid4()), org_id, metameta_key, values.get('default_value'), values.get('label'), values.get('validator'), values.get('state'), str(values.get('custom')), str(values.get('presettable')), str(values.get('readonly')), values.get('field_type'), 0.0, 0.0, int(values.get('sort_order')), datetime.datetime.utcnow().isoformat()) if update_count > 0: sql = sql[:-1] sql = sql + ';' result = Session.execute(sql) Session.commit()
def select_metameta_options(org_id, key, option_value=None): """ parameter: string org_id parameter: key parameter: option_value returns dict of metameta options """ records = [] sql = "SELECT option_value FROM metameta_options WHERE org_id = '%s' and key = '%s' " % ( org_id, key) if option_value: sql = sql + "option_value = '%s' " % (option_value) sql = sql + " order by option_value ASC;" rows = Session.execute(sql) for row in rows: records.append(row['option_value']) return records
def get_package_id_by_pid(pid, pid_type): """ Find pid by id and type. :param pid: id of the pid :param pid_type: type of the pid (primary, relation) :return: id of the package """ query = select(['key', 'package_id']).where(and_(model.PackageExtra.value == pid, model.PackageExtra.key.like('pids_%_id'), model.PackageExtra.state == 'active')) for key, package_id in [('pids_%s_type' % key.split('_')[1], package_id) for key, package_id in Session.execute(query)]: query = select(['package_id']).where(and_(model.PackageExtra.value == pid_type, model.PackageExtra.key == key, model.PackageExtra.state == 'active', model.PackageExtra.package_id == package_id)) for package_id, in Session.execute(query): return package_id return None
def get_package_id_by_pid(pid, pid_type): """ Find pid by id and type. :param pid: id of the pid :param pid_type: type of the pid :return: id of the package """ query = select(['key', 'package_id']).where(and_(model.PackageExtra.value == pid, model.PackageExtra.key.like('pids_%_id'), model.PackageExtra.state == 'active')) for key, package_id in [('pids_%s_type' % key.split('_')[1], package_id) for key, package_id in Session.execute(query)]: query = select(['package_id']).where(and_(model.PackageExtra.value == pid_type, model.PackageExtra.key == key, model.PackageExtra.state == 'active', model.PackageExtra.package_id == package_id)) for package_id, in Session.execute(query): return package_id return None
def org_has_defaults(org_id, key=None, metameta_id=None): """ check if org has any entries in metameta table (it won't if no defaults have been set before) parameter: key """ sql = "SELECT 1 FROM metameta WHERE org_id = '" + org_id + "'" if key is not None: sql = sql + " AND key = '" + key +"'" if metameta_id is not None: sql = sql + " AND id <> '" + metameta_id +"'" sql = sql + " LIMIT 1;" result = Session.execute(sql) if result.first(): return True else: return False
def org_has_defaults(org_id, key=None, metameta_id=None): """ check if org has any entries in metameta table (it won't if no defaults have been set before) parameter: key """ sql = "SELECT 1 FROM metameta WHERE org_id = '" + org_id + "'" if key is not None: sql = sql + " AND key = '" + key + "'" if metameta_id is not None: sql = sql + " AND id <> '" + metameta_id + "'" sql = sql + " LIMIT 1;" result = Session.execute(sql) if result.first(): return True else: return False
def get_metameta_minmax(org_id, key): """ parameter: org_id parameter: key returns dict of meta metadata """ records = [] if org_id is None or len(org_id.strip())==0: return records sql = "SELECT min_value, max_value FROM metameta WHERE org_id = '%s' AND key = '%s' AND state = '%s' LIMIT 1;"%(org_id, key, state_active) rows = Session.execute(sql) for row in rows: records.append({ 'min_value': row['min_value'], 'max_value': row['max_value'], }) return records
def get_package_contacts(pkg_id): """ Returns contact information for the dataset with the given id. :param pkg_id: the id of the package whose contact information to get :return: a list of contact information dicts :rtype: list of dicts """ contacts_regex = '^(contact)_(\d+)_(.+)$' query = select(['id', 'key', 'value', 'state']).where( and_( model.PackageExtra.package_id == pkg_id, model.PackageExtra.key.like('contact_%_%'), model.PackageExtra.state == 'active' ) ) extras = Session.execute(query) extras = model_dictize.extras_list_dictize(extras, {'model': PackageExtra}) contacts_by_index = {} for extra in extras: key = extra['key'] value = extra['value'] match = re.match(contacts_regex, key) if match: index = match.group(2) type = match.group(3) contact = contacts_by_index.get(index, {}) contact[u'index'] = index contact[type] = value if type == 'email': contact[u'id'] = extra['id'] contacts_by_index[index] = contact contacts = [ c for c in contacts_by_index.values() ] return sorted(contacts, key=lambda c: int(c['index']))
def get_package_id_by_data_pids(data_dict): ''' Try if the provided data PIDs match exactly one dataset. :param data_dict: :return: Package id or None if not found. ''' data_pids = get_pids_by_type('data', data_dict) if len(data_pids) == 0: return None pid_list = [pid.get('id') for pid in data_pids] # Get package ID's with matching PIDS query = Session.query(model.PackageExtra.package_id.distinct()).\ filter(model.PackageExtra.value.in_(pid_list)) pkg_ids = query.all() if len(pkg_ids) != 1: return None # Nothing to do if we get many or zero datasets # Get extras with the received package ID's query = select(['key', 'value', 'state']).where( and_(model.PackageExtra.package_id.in_(pkg_ids), model.PackageExtra.key.like('pids_%'))) extras = Session.execute(query) # Dictize the results extras = model_dictize.extras_list_dictize(extras, {'model': PackageExtra}) # Check that matching PIDS are type 'data'. for extra in extras: key = extra['key'].split('_') # eg. ('pids', '0', 'id') if key[2] == 'id' and extra['value'] in pid_list: type_key = '_'.join(key[:2] + ['type']) if not filter(lambda x: x['key'] == type_key and x['value'] == 'data', extras): return None # Found a hit with wrong type of PID return pkg_ids[0] # No problems found, so use this
def get_package_id_by_primary_pid(data_dict): ''' Try if the provided primary PID matches exactly one dataset. THIS METHOD WAS PREVIOUSLY GET_PACKAGE_ID_BY_DATA_PIDS, is the below correct, or should relation pids also be used? :param data_dict: :return: Package id or None if not found. ''' primary_pid = get_primary_pid(data_dict) if not primary_pid: return None pid_list = [primary_pid] # Get package ID's with matching PIDS query = Session.query(model.PackageExtra.package_id.distinct()).\ filter(model.PackageExtra.value.in_(pid_list)) pkg_ids = query.all() if len(pkg_ids) != 1: return None # Nothing to do if we get many or zero datasets # Get extras with the received package ID's query = select(['key', 'value', 'state']).where( and_(model.PackageExtra.package_id.in_(pkg_ids), model.PackageExtra.key.like('pids_%'))) extras = Session.execute(query) # Dictize the results extras = model_dictize.extras_list_dictize(extras, {'model': PackageExtra}) # Check that matching PIDS are type 'primary'. for extra in extras: key = extra['key'].split('_') # eg. ['pids', '0', 'id'] if key[2] == 'id' and extra['value'] in pid_list: type_key = '_'.join(key[:2] + ['type']) if not filter(lambda x: x['key'] == type_key and (x['value'] == 'primary'), extras): return None # Found a hit with wrong type of PID return pkg_ids[0] # No problems found, so use this
def get_metameta_minmax(org_id, key): """ parameter: org_id parameter: key returns dict of meta metadata """ records = [] if org_id is None or len(org_id.strip()) == 0: return records sql = "SELECT min_value, max_value FROM metameta WHERE org_id = '%s' AND key = '%s' AND state = '%s' LIMIT 1;" % ( org_id, key, state_active) rows = Session.execute(sql) for row in rows: records.append({ 'min_value': row['min_value'], 'max_value': row['max_value'], }) return records
def init_metadata(org_id): #retrieve metameta records for org_id records = select_metametadata_for_org(org_id, metameta_id=None, active_only=False, withPrefix=True) sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES " update_count = 0; for key, values in PRESET_METAMETADATA.iteritems(): if not key or len(key)==0: continue print '********key=%s*******'%(key) metameta_key = add_prefix_key(key) #check whether key exists or not key_exists = False for record in records: if record['key']==metameta_key: key_exists = True break if not key_exists: #check field type if values.get('field_type') == field_type_single_choice: presets = get_metameta_options_preset(metameta_key) update_metameta_options(org_id, metameta_key, presets) update_count += 1 #create a record for a text field sql = sql + " ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, %d, '%s'),"%(str(uuid.uuid4()), org_id, metameta_key, values.get('default_value'), values.get('label'), values.get('validator'), values.get('state'), str(values.get('custom')), str(values.get('presettable')), str(values.get('readonly')), values.get('field_type'), 0.0, 0.0, int(values.get('sort_order')), datetime.datetime.utcnow().isoformat()) if update_count > 0: sql = sql[:-1] sql = sql + ';' result = Session.execute(sql) Session.commit()
def postgis_version(): result = Session.execute('SELECT PostGIS_Lib_Version()') return result.scalar()
def import_stage(self, harvest_object): log = logging.getLogger(__name__ + '.import') log.debug('Import stage for harvest object: %s', harvest_object.id) if not harvest_object: log.error('No harvest object received') return False self._set_source_config(harvest_object.source.config) status = self._get_object_extra(harvest_object, 'status') # Get the last harvested object (if any) previous_object = Session.query(HarvestObject) \ .filter(HarvestObject.guid == harvest_object.guid) \ .filter(HarvestObject.current == True) \ .first() if status == 'delete': # Delete package context = {'model': model, 'session': model.Session, 'user': self._get_user_name()} p.toolkit.get_action('package_delete')(context, {'id': harvest_object.package_id}) log.info('Deleted package {0} with guid {1}'.format(harvest_object.package_id, harvest_object.guid)) return True # Parse ISO document ##try: ##iso_values = ISODocument(harvest_object.content).read_values() ##except Exception, e: ##self._save_object_error('Error parsing ISO document for object {0}: {1}'.format(harvest_object.id, str(e)), ##harvest_object, 'Import') ##return False # Flag previous object as not current anymore if previous_object: previous_object.current = False previous_object.add() # Generate GUID if not present (i.e. it's a manual import) if not harvest_object.guid: self._save_object_error('Missing GUID for object {0}' .format(harvest_object.id), harvest_object, 'Import') return False # Get document modified date #try: #metadata_modified_date = dateutil.parser.parse(iso_values['metadata-date'], ignoretz=True) #except ValueError: #self._save_object_error('Could not extract reference date for object {0} ({1})' #.format(harvest_object.id, iso_values['metadata-date']), harvest_object, 'Import') #return False #harvest_object.metadata_modified_date = metadata_modified_date harvest_object.metadata_modified_date = datetime.date.today() harvest_object.add() # Build the package dict package_dict = self.get_package_dict(harvest_object) if not package_dict: log.error('No package dict returned, aborting import for object {0}'.format(harvest_object.id)) return False # Create / update the package context = {'model': model, 'session': model.Session, 'user': self._get_user_name(), 'extras_as_string': True, 'api_version': '2', 'return_id_only': True} if context['user'] == self._site_user['name']: context['ignore_auth'] = True # The default package schema does not like Upper case tags tag_schema = logic.schema.default_tags_schema() tag_schema['name'] = [not_empty, unicode] # Flag this object as the current one harvest_object.current = True harvest_object.add() if status == 'new': package_schema = logic.schema.default_create_package_schema() package_schema['tags'] = tag_schema context['schema'] = package_schema # We need to explicitly provide a package ID, otherwise ckanext-spatial # won't be be able to link the extent to the package. package_dict['id'] = unicode(uuid.uuid4()) package_schema['id'] = [unicode] # Save reference to the package on the object harvest_object.package_id = package_dict['id'] harvest_object.add() # Defer constraints and flush so the dataset can be indexed with # the harvest object id (on the after_show hook from the harvester # plugin) Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') model.Session.flush() try: package_id = p.toolkit.get_action('package_create')(context, package_dict) log.info('Created new package %s with guid %s', package_id, harvest_object.guid) except p.toolkit.ValidationError as e: self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False elif status == 'change': # Check if the document has changed m = hashlib.md5() m.update(previous_object.content) old_md5 = m.hexdigest() m = hashlib.md5() m.update(harvest_object.content) new_md5 = m.hexdigest() if old_md5 == new_md5: # Assign the previous job id to the new object to # avoid losing history harvest_object.harvest_job_id = previous_object.job.id harvest_object.add() harvest_object.metadata_modified_date = previous_object.metadata_modified_date # Delete the previous object to avoid cluttering the object table previous_object.delete() log.info('Document with GUID %s unchanged, skipping...' % (harvest_object.guid)) else: package_schema = logic.schema.default_update_package_schema() package_schema['tags'] = tag_schema context['schema'] = package_schema package_dict['id'] = harvest_object.package_id try: package_id = p.toolkit.get_action('package_update')(context, package_dict) log.info('Updated package %s with guid %s', package_id, harvest_object.guid) except p.toolkit.ValidationError as e: self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False model.Session.commit() return True
def version(self): from ckan.model import Session print Session.execute('select version from migrate_version;').fetchall()
def import_stage(self, harvest_object): log = logging.getLogger(__name__ + '.import') log.debug('Import stage for harvest object: %s' % harvest_object.id) if not harvest_object: log.error('No harvest object received') return False self._set_source_config(harvest_object.source.config) status = self._get_object_extra(harvest_object, 'status') # Get the last harvested object (if any) previous_object = Session.query(HarvestObject) \ .filter(HarvestObject.guid == harvest_object.guid) \ .filter(HarvestObject.current == True) \ .first() if status == 'delete': # Delete package context = {'model': model, 'session': model.Session, 'user': self._get_user_name()} p.toolkit.get_action('package_delete')(context, {'id': harvest_object.package_id}) log.info('Deleted package {0} with guid {1}'.format(harvest_object.package_id, harvest_object.guid)) return True if previous_object: # Flag previous object as not current anymore previous_object.current = False previous_object.add() # Check if metadata was modified # GeoNode does not offer a "latest modified date". # Let's compare if any value changed content_old = previous_object.content content_new = harvest_object.content is_modified = content_old != content_new prev_job_id = previous_object.job.id else: is_modified = True prev_job_id = None # Error if GUID not present if not harvest_object.guid: self._save_object_error('Missing GUID for object {0}' .format(harvest_object.id), harvest_object, 'Import') return False log.error('Object GUID:%s is modified: %s' % (harvest_object.guid, is_modified)) # Let's set the metadata date according to the import time. Not the best choice, since # we'd like to set the original metadata date. # If geonode provided this info, we could rely on this to find out if a dataset needs to be updated. harvest_object.metadata_modified_date = datetime.now() harvest_object.add() # Build the package dict package_dict = self.get_package_dict(harvest_object) if not package_dict: log.error('No package dict returned, aborting import for object {0}'.format(harvest_object.id)) return False # Create / update the package context = {'model': model, 'session': model.Session, 'user': self._get_user_name(), 'extras_as_string': True, 'api_version': '2', 'return_id_only': True} if context['user'] == self._site_user['name']: context['ignore_auth'] = True # The default package schema does not like Upper case tags tag_schema = logic.schema.default_tags_schema() tag_schema['name'] = [not_empty, unicode] # Flag this object as the current one harvest_object.current = True harvest_object.add() if status == 'new': package_schema = logic.schema.default_create_package_schema() package_schema['tags'] = tag_schema context['schema'] = package_schema # We need to explicitly provide a package ID, otherwise ckanext-spatial # won't be be able to link the extent to the package. package_dict['id'] = unicode(uuid.uuid4()) package_schema['id'] = [unicode] # Save reference to the package on the object harvest_object.package_id = package_dict['id'] harvest_object.add() # Defer constraints and flush so the dataset can be indexed with # the harvest object id (on the after_show hook from the harvester # plugin) Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') model.Session.flush() try: # package_id = p.toolkit.get_action('package_create')(context, package_dict) package_id = self._create_package(context, package_dict, harvest_object) log.info('Created new package %s with guid %s' % (package_id, harvest_object.guid)) self._post_package_create(package_id, harvest_object) except p.toolkit.ValidationError as e: self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False elif status == 'change': # Check if the document has changed if not is_modified: # Assign the previous job id to the new object to # avoid losing history harvest_object.harvest_job_id = prev_job_id harvest_object.add() harvest_object.metadata_modified_date = previous_object.metadata_modified_date # Delete the previous object to avoid cluttering the object table previous_object.delete() log.info('Document with GUID %s unchanged, skipping...', harvest_object.guid) else: package_schema = logic.schema.default_update_package_schema() package_schema['tags'] = tag_schema context['schema'] = package_schema package_dict['id'] = harvest_object.package_id try: #package_id = p.toolkit.get_action('package_update')(context, package_dict) package_id = self._update_package(context, package_dict, harvest_object) log.info('Updated package %s with guid %s', package_id, harvest_object.guid) self._post_package_update(package_id, harvest_object) except p.toolkit.ValidationError as e: self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False model.Session.commit() return True
Session.commit() raise e log.info('Package Multilingual table created') else: log.info('Package Multilingual table already exist') #Setting up group multilang table if not group_multilang_table.exists(): try: group_multilang_table.create() except Exception,e: # Make sure the table does not remain incorrectly created if group_multilang_table.exists(): Session.execute('DROP TABLE group_multilang') Session.commit() raise e log.info('Group Multilingual table created') else: log.info('Group Multilingual table already exist') #Setting up resource multilang table if not resource_multilang_table.exists(): try: resource_multilang_table.create() except Exception,e: # Make sure the table does not remain incorrectly created if resource_multilang_table.exists():
def postgis_version(): result = Session.execute('SELECT postgis_lib_version()') return result.scalar()
def init_db(): """ Create boards, threads and posts tables in the database. Prepopulate board table with default data. """ if not model.package_table.exists(): # during tests? return session = Session() if not board_table.exists(): board_table.create(checkfirst=True) thread_table.create(checkfirst=True) post_table.create(checkfirst=True) log.debug("Forum tables have been created") for board_name, board_desc in DEFAULT_BOARDS.iteritems(): board = Board() board.name = board_name board.slug = slugify(board_name) board.description = board_desc session.add(board) if session.new: log.debug('Default boards created') session.commit() if not migration_table.exists(): migration_table.create(checkfirst=True) session.commit() if not banned_table.exists(): banned_table.create(checkfirst=True) session.commit() if not unsubscription_table.exists(): unsubscription_table.create(checkfirst=True) session.commit() migration_number = session.query(migration_table).count() log.debug('Migration number: %s', migration_number) migration_sql_list = [ "ALTER TABLE forum_post ADD COLUMN active boolean DEFAULT TRUE", "ALTER TABLE forum_thread ADD COLUMN active boolean DEFAULT TRUE", "ALTER TABLE forum_board ADD COLUMN active boolean DEFAULT TRUE", "ALTER TABLE forum_thread DROP COLUMN slug", "ALTER TABLE forum_thread ADD COLUMN can_post boolean DEFAULT TRUE", "ALTER TABLE forum_board ADD COLUMN can_post boolean DEFAULT TRUE", u"INSERT INTO forum_board(\"id\", \"name\", \"slug\", \"description\", \"active\", \"can_post\") " + u"VALUES(DEFAULT, 'Запропонувати набір', 'zaproponuvati-nabir', '', true, false)" ] for counter, sql in enumerate(migration_sql_list, start=1): if migration_number < counter: try: log.debug(sql) session.execute(sql) except ProgrammingError as e: print(e) log.debug('Migration have been rolled back.') session.rollback() finally: session.execute(migration_table.insert()) session.commit() session.close()
def import_stage(self, harvest_object): log = logging.getLogger(__name__ + '.import') log.debug('%s: Import stage for harvest object: %s', self.harvester_name(), harvest_object.id) if not harvest_object: log.error('No harvest object received') return False if not harvest_object.content: log.error('Harvest object contentless') self._save_object_error( 'Empty content for object %s' % harvest_object.id, harvest_object, 'Import') return False self._set_source_config(harvest_object.source.config) status = self._get_object_extra(harvest_object, 'status') # Get the last harvested object (if any) previous_object = Session.query(HarvestObject) \ .filter(HarvestObject.guid == harvest_object.guid) \ .filter(HarvestObject.current == True) \ .first() context = { 'model': model, 'session': model.Session, 'user': self._get_user_name() } if status == 'delete': # Delete package p.toolkit.get_action('package_delete')( context, { 'id': harvest_object.package_id }) log.info('Deleted package {0} with guid {1}'.format( harvest_object.package_id, harvest_object.guid)) return True # Flag previous object as not current anymore if previous_object: previous_object.current = False previous_object.add() # Flag this object as the current one harvest_object.current = True harvest_object.add() # Generate GUID if not present (i.e. it's a manual import) if not harvest_object.guid: self._save_object_error( 'Missing GUID for object {0}'.format(harvest_object.id), harvest_object, 'Import') return False # pre-check to skip resource logic in case no changes occurred remotely if status == 'change': # Check if the document has changed m = hashlib.md5() m.update(previous_object.content.encode()) old_md5 = m.hexdigest() m = hashlib.md5() m.update(harvest_object.content.encode()) new_md5 = m.hexdigest() if old_md5 == new_md5: # Assign the previous job id to the new object to # avoid losing history harvest_object.harvest_job_id = previous_object.job.id harvest_object.add() harvest_object.metadata_modified_date = previous_object.metadata_modified_date harvest_object.add() # Delete the previous object to avoid cluttering the object table previous_object.delete() # Reindex the corresponding package to update the reference to the harvest object context.update({'validate': False, 'ignore_auth': True}) try: package_dict = logic.get_action('package_show')( context, { 'id': harvest_object.package_id }) except p.toolkit.ObjectNotFound: pass else: for extra in package_dict.get('extras', []): if extra['key'] == 'harvest_object_id': extra['value'] = harvest_object.id if package_dict: package_index = PackageSearchIndex() package_index.index_package(package_dict) log.info('%s document with GUID %s unchanged, skipping...', self.harvester_name(), harvest_object.guid) model.Session.commit() return "unchanged" # Build the package dict package_dict, metadata = self.create_package_dict( harvest_object.guid, harvest_object.content) if not package_dict: log.error( 'No package dict returned, aborting import for object {0}'. format(harvest_object.id)) return False package_dict['name'] = self._gen_new_name(package_dict['title']) # We need to get the owner organization (if any) from the harvest source dataset source_dataset = model.Package.get(harvest_object.source.id) if source_dataset.owner_org: package_dict['owner_org'] = source_dataset.owner_org self.attach_resources(metadata, package_dict, harvest_object) # Create / update the package context = { 'model': model, 'session': model.Session, 'user': self._get_user_name(), 'extras_as_string': True, 'api_version': '2', 'return_id_only': True } if context['user'] == self._site_user['name']: context['ignore_auth'] = True # The default package schema does not like Upper case tags tag_schema = logic.schema.default_tags_schema() tag_schema['name'] = [not_empty] if status == 'new': package_schema = logic.schema.default_create_package_schema() package_schema['tags'] = tag_schema context['schema'] = package_schema # We need to explicitly provide a package ID, otherwise ckanext-spatial # won't be be able to link the extent to the package. package_dict['id'] = uuid.uuid4().hex package_schema['id'] = [] # Save reference to the package on the object harvest_object.package_id = package_dict['id'] harvest_object.add() # Defer constraints and flush so the dataset can be indexed with # the harvest object id (on the after_show hook from the harvester # plugin) Session.execute( 'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') model.Session.flush() try: package_id = p.toolkit.get_action('package_create')( context, package_dict) log.info('%s: Created new package %s with guid %s', self.harvester_name(), package_id, harvest_object.guid) except p.toolkit.ValidationError as e: self._save_object_error( 'Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False elif status == 'change': # we know the internal document did change, bc of a md5 hash comparison done above package_schema = logic.schema.default_update_package_schema() package_schema['tags'] = tag_schema context['schema'] = package_schema package_dict['id'] = harvest_object.package_id try: package_id = p.toolkit.get_action('package_update')( context, package_dict) log.info('%s updated package %s with guid %s', self.harvester_name(), package_id, harvest_object.guid) except p.toolkit.ValidationError as e: self._save_object_error( 'Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') return False model.Session.commit() return True
def write_package_from_gemini_string(self, content): '''Create or update a Package based on some content that has come from a URL. Returns the package_dict of the result. If there is an error, it returns None or raises Exception. ''' log = logging.getLogger(__name__ + '.import') package = None gemini_document = GeminiDocument(content) gemini_values = gemini_document.read_values() gemini_guid = gemini_values['guid'] # Save the metadata reference date in the Harvest Object try: metadata_modified_date = datetime.strptime( gemini_values['metadata-date'], '%Y-%m-%d') except ValueError: try: metadata_modified_date = datetime.strptime( gemini_values['metadata-date'], '%Y-%m-%dT%H:%M:%S') except: raise Exception('Could not extract reference date for GUID %s (%s)' \ % (gemini_guid,gemini_values['metadata-date'])) self.obj.metadata_modified_date = metadata_modified_date self.obj.save() last_harvested_object = Session.query(HarvestObject) \ .filter(HarvestObject.guid==gemini_guid) \ .filter(HarvestObject.current==True) \ .all() if len(last_harvested_object) == 1: last_harvested_object = last_harvested_object[0] elif len(last_harvested_object) > 1: raise Exception( 'Application Error: more than one current record for GUID %s' % gemini_guid) reactivate_package = False if last_harvested_object: # We've previously harvested this (i.e. it's an update) # Use metadata modified date instead of content to determine if the package # needs to be updated if last_harvested_object.metadata_modified_date is None \ or last_harvested_object.metadata_modified_date < self.obj.metadata_modified_date \ or self.force_import \ or (last_harvested_object.metadata_modified_date == self.obj.metadata_modified_date and last_harvested_object.source.active is False): if self.force_import: log.info('Import forced for object %s with GUID %s' % (self.obj.id, gemini_guid)) else: log.info( 'Package for object with GUID %s needs to be created or updated' % gemini_guid) package = last_harvested_object.package # If the package has a deleted state, we will only update it and reactivate it if the # new document has a more recent modified date if package.state == u'deleted': if last_harvested_object.metadata_modified_date < self.obj.metadata_modified_date: log.info( 'Package for object with GUID %s will be re-activated' % gemini_guid) reactivate_package = True else: log.info( 'Remote record with GUID %s is not more recent than a deleted package, skipping... ' % gemini_guid) return None else: if last_harvested_object.content != self.obj.content and \ last_harvested_object.metadata_modified_date == self.obj.metadata_modified_date: diff_generator = difflib.unified_diff( last_harvested_object.content.split('\n'), self.obj.content.split('\n')) diff = '\n'.join([line for line in diff_generator]) raise Exception( 'The contents of document with GUID %s changed, but the metadata date has not been updated.\nDiff:\n%s' % (gemini_guid, diff)) else: # The content hasn't changed, no need to update the package log.info('Document with GUID %s unchanged, skipping...' % (gemini_guid)) return None else: log.info( 'No package with GEMINI guid %s found, let\'s create one' % gemini_guid) extras = {'UKLP': 'True', 'harvest_object_id': self.obj.id} # Just add some of the metadata as extras, not the whole lot for name in [ # Essentials 'spatial-reference-system', 'guid', # Usefuls 'dataset-reference-date', 'metadata-language', # Language 'metadata-date', # Released 'coupled-resource', 'contact-email', 'frequency-of-update', 'spatial-data-service-type', ]: extras[name] = gemini_values[name] if len(gemini_values.get('progress', [])): extras['progress'] = gemini_values['progress'][0] else: extras['progress'] = '' extras['resource-type'] = gemini_values['resource-type'][0] # Use-constraints can contain values which are: # * free text # * licence URL # Store all values in extra['licence'] and if there is a # URL in there, store that in extra['licence-url'] extras['licence'] = gemini_values.get('use-constraints', '') if len(extras['licence']): licence_url_extracted = self._extract_first_licence_url( extras['licence']) if licence_url_extracted: extras['licence_url'] = licence_url_extracted extras['access_constraints'] = gemini_values.get( 'limitations-on-public-access', '') if 'temporal-extent-begin' in gemini_values: #gemini_values['temporal-extent-begin'].sort() extras['temporal_coverage-from'] = gemini_values[ 'temporal-extent-begin'] if 'temporal-extent-end' in gemini_values: #gemini_values['temporal-extent-end'].sort() extras['temporal_coverage-to'] = gemini_values[ 'temporal-extent-end'] # Save responsible organization roles provider, responsible_parties = self._process_responsible_organisation( gemini_values['responsible-organisation']) extras['provider'] = provider extras['responsible-party'] = '; '.join(responsible_parties) if len(gemini_values['bbox']) > 0: extras['bbox-east-long'] = gemini_values['bbox'][0]['east'] extras['bbox-north-lat'] = gemini_values['bbox'][0]['north'] extras['bbox-south-lat'] = gemini_values['bbox'][0]['south'] extras['bbox-west-long'] = gemini_values['bbox'][0]['west'] # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry extent_string = self.extent_template.substitute( xmin=extras['bbox-east-long'], ymin=extras['bbox-south-lat'], xmax=extras['bbox-west-long'], ymax=extras['bbox-north-lat']) extras['spatial'] = extent_string.strip() tags = [] for tag in gemini_values['tags']: tag = tag[:50] if len(tag) > 50 else tag tags.append({'name': tag}) package_dict = { 'title': gemini_values['title'], 'notes': gemini_values['abstract'], 'tags': tags, 'resources': [] } if self.obj.source.publisher_id: package_dict['groups'] = [{'id': self.obj.source.publisher_id}] if reactivate_package: package_dict['state'] = u'active' if package is None or package.title != gemini_values['title']: name = self.gen_new_name(gemini_values['title']) if not name: name = self.gen_new_name(six.text_type(gemini_guid)) if not name: raise Exception( 'Could not generate a unique name from the title or the GUID. Please choose a more unique title.' ) package_dict['name'] = name else: package_dict['name'] = package.name resource_locators = gemini_values.get('resource-locator', []) if len(resource_locators): for resource_locator in resource_locators: url = resource_locator.get('url', '') if url: resource_format = '' resource = {} if extras['resource-type'] == 'service': # Check if the service is a view service test_url = url.split('?')[0] if '?' in url else url if self._is_wms(test_url): resource['verified'] = True resource['verified_date'] = datetime.now( ).isoformat() resource_format = 'WMS' resource.update({ 'url': url, 'name': resource_locator.get('name', ''), 'description': resource_locator.get('description') if resource_locator.get('description') else 'Resource locator', 'format': resource_format or None, 'resource_locator_protocol': resource_locator.get('protocol', ''), 'resource_locator_function': resource_locator.get('function', '') }) package_dict['resources'].append(resource) # Guess the best view service to use in WMS preview verified_view_resources = [ r for r in package_dict['resources'] if 'verified' in r and r['format'] == 'WMS' ] if len(verified_view_resources): verified_view_resources[0][ 'ckan_recommended_wms_preview'] = True else: view_resources = [ r for r in package_dict['resources'] if r['format'] == 'WMS' ] if len(view_resources): view_resources[0]['ckan_recommended_wms_preview'] = True extras_as_dict = [] for key, value in extras.items(): if isinstance(value, six.string_types + (Number, )): extras_as_dict.append({'key': key, 'value': value}) else: extras_as_dict.append({'key': key, 'value': json.dumps(value)}) package_dict['extras'] = extras_as_dict if package == None: # Create new package from data. package = self._create_package_from_data(package_dict) log.info('Created new package ID %s with GEMINI guid %s', package['id'], gemini_guid) else: package = self._create_package_from_data(package_dict, package=package) log.info( 'Updated existing package ID %s with existing GEMINI guid %s', package['id'], gemini_guid) # Flag the other objects of this source as not current anymore from ckanext.harvest.model import harvest_object_table u = update(harvest_object_table) \ .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \ .values(current=False) Session.execute(u, params={'b_package_id': package['id']}) Session.commit() # Refresh current object from session, otherwise the # import paster command fails Session.remove() Session.add(self.obj) Session.refresh(self.obj) # Set reference to package in the HarvestObject and flag it as # the current one if not self.obj.package_id: self.obj.package_id = package['id'] self.obj.current = True self.obj.save() return package