Beispiel #1
0
def bbox_query_ordered(bbox, srid=None):
    '''
    Performs a spatial query of a bounding box. Returns packages in order
    of how similar the data\'s bounding box is to the search box (best first).

    bbox - bounding box dict

    Returns a query object of PackageExtents, which each reference a package
    by ID.
    '''

    input_geometry = _bbox_2_wkt(bbox, srid)

    params = {'query_bbox': str(input_geometry),
              'query_srid': input_geometry.srid}

    # First get the area of the query box
    sql = "SELECT ST_Area(ST_GeomFromText(:query_bbox, :query_srid));"
    params['search_area'] = Session.execute(sql, params).fetchone()[0]

    # Uses spatial ranking method from "USGS - 2006-1279" (Lanfear)
    sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom,
                    POWER(ST_Area(ST_Intersection(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking,
                    package_extent.package_id AS package_id
             FROM package_extent, package
             WHERE package_extent.package_id = package.id
                AND ST_Intersects(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))
                AND package.state = 'active'
             ORDER BY spatial_ranking desc"""
    extents = Session.execute(sql, params).fetchall()
    log.debug('Spatial results: %r',
              [('%.2f' % extent.spatial_ranking, extent.package_id) for extent in extents[:20]])
    return extents
Beispiel #2
0
def bbox_query_ordered(bbox, srid=None):
    '''
    Performs a spatial query of a bounding box. Returns packages in order
    of how similar the data\'s bounding box is to the search box (best first).

    bbox - bounding box dict

    Returns a query object of PackageExtents, which each reference a package
    by ID.
    '''

    input_geometry = _bbox_2_wkt(bbox, srid)

    params = {'query_bbox': str(input_geometry),
              'query_srid': input_geometry.srid}

    # First get the area of the query box
    sql = "SELECT ST_Area(GeomFromText(:query_bbox, :query_srid));"
    params['search_area'] = Session.execute(sql, params).fetchone()[0]

    # Uses spatial ranking method from "USGS - 2006-1279" (Lanfear)
    sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom,
                    POWER(ST_Area(ST_Intersection(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking,
                    package_extent.package_id AS package_id
             FROM package_extent, package
             WHERE package_extent.package_id = package.id
                AND ST_Intersects(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid))
                AND package.state = 'active'
             ORDER BY spatial_ranking desc"""
    extents = Session.execute(sql, params).fetchall()
    log.debug('Spatial results: %r',
              [('%.2f' % extent.spatial_ranking, extent.package_id) for extent in extents[:20]])
    return extents
Beispiel #3
0
def setup(srid=None):

    if package_extent_table is None:
        define_spatial_tables(srid)
        log.debug('Spatial tables defined in memory')

    if model.package_table.exists():
        if not Table('geometry_columns', meta.metadata).exists() or \
           not Table('spatial_ref_sys', meta.metadata).exists():
            raise Exception(
                'The spatial extension is enabled, but PostGIS '
                'has not been set up in the database. '
                'Please refer to the "Setting up PostGIS" section in the README.'
            )

        if not package_extent_table.exists():
            try:
                package_extent_table.create()
            except Exception as e:
                # Make sure the table does not remain incorrectly created
                # (eg without geom column or constraints)
                if package_extent_table.exists():
                    Session.execute('DROP TABLE package_extent')
                    Session.commit()

                raise e

            log.debug('Spatial tables created')
        else:
            log.debug('Spatial tables already exist')
            # Future migrations go here

    else:
        log.debug('Spatial tables creation deferred')
 def _refresh_harvest_objects(self, harvest_object, package_id):
     """
     Perform harvester housekeeping:
         - Flag the other objects of the source as not current
         - Set a refernce to the package in the harvest object
         - Flag it as current
         - And save the changes
     """
     # Flag the other objects of this source as not current
     from ckanext.harvest.model import harvest_object_table
     u = update(harvest_object_table) \
         .where(harvest_object_table.c.package_id == bindparam('pkg_id')) \
         .values(current=False)
     Session.execute(u, params={'pkg_id': package_id})
     Session.commit()
     # Refresh current object from session, otherwise the
     # import paster command fails
     # (Copied from the Gemini harvester--not sure if necessary)
     Session.remove()
     Session.add(harvest_object)
     Session.refresh(harvest_object)
     # Set reference to package in the HarvestObject and flag it as
     # the current one
     if not harvest_object.package_id:
         harvest_object.package_id = package_id
     harvest_object.current = True
     harvest_object.save()
Beispiel #5
0
def setup(srid=None):

    if package_extent_table is None:
        define_spatial_tables(srid)
        # log.debug('Spatial tables defined in memory')

    if model.package_table.exists():
        if not Table('geometry_columns',meta.metadata).exists() or \
            not Table('spatial_ref_sys',meta.metadata).exists():
            raise Exception('The spatial extension is enabled, but PostGIS ' + \
                    'has not been set up in the database. ' + \
                    'Please refer to the "Setting up PostGIS" section in the README.')


        if not package_extent_table.exists():
            try:
                package_extent_table.create()
            except Exception,e:
                # Make sure the table does not remain incorrectly created
                # (eg without geom column or constraints)
                if package_extent_table.exists():
                    Session.execute('DROP TABLE package_extent')
                    Session.commit()

                raise e

            log.debug('Spatial tables created')
        else:
            # log.debug('Spatial tables already exist')
			doNothing = "true"
def init_db():
    #Setting up custom fiels table for locales    
    try:
        custom_field_table.create()
    except Exception,e:
        # Make sure the table does not remain incorrectly created
        if custom_field_table.exists():
            Session.execute('DROP TABLE custom_field_table')
            Session.commit()

        raise e
Beispiel #7
0
def init_db():
    #Setting up custom fiels table for locales
    try:
        custom_field_table.create()
    except Exception, e:
        # Make sure the table does not remain incorrectly created
        if custom_field_table.exists():
            Session.execute('DROP TABLE custom_field_table')
            Session.commit()

        raise e
Beispiel #8
0
def bbox_query(bbox, srid=None):
    '''
    Performs a spatial query of a bounding box.

    bbox - bounding box dict

    Returns a query object of PackageExtents, which each reference a package
    by ID.
    '''

    input_geometry = _bbox_2_wkt(bbox, srid)

    params = {
        'query_bbox': str(input_geometry),
        'query_srid': input_geometry.srid
    }

    sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom,
                    package_extent.package_id AS package_id
             FROM package_extent, package
             WHERE package_extent.package_id = package.id
                AND ST_CoveredBy(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid))
                AND package.state = 'active'"""
    extents = Session.execute(sql, params).fetchall()

    #extents = Session.query(PackageExtent) \
    #          .filter(PackageExtent.package_id==Package.id) \
    #          .filter(PackageExtent.the_geom.intersects(input_geometry)) \
    #          .filter(Package.state==u'active')

    return extents
Beispiel #9
0
def bbox_query(bbox,srid=None):
    '''
    Performs a spatial query of a bounding box.

    bbox - bounding box dict

    Returns a query object of PackageExtents, which each reference a package
    by ID.
    '''

    input_geometry = _bbox_2_wkt(bbox, srid)

    params = {'query_bbox': str(input_geometry),
              'query_srid': input_geometry.srid}

    sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom,
                    package_extent.package_id AS package_id
             FROM package_extent, package
             WHERE package_extent.package_id = package.id
                AND ST_CoveredBy(package_extent.the_geom, GeomFromText(:query_bbox, :query_srid))
                AND package.state = 'active'"""
    extents = Session.execute(sql, params).fetchall()

    #extents = Session.query(PackageExtent) \
    #          .filter(PackageExtent.package_id==Package.id) \
    #          .filter(PackageExtent.the_geom.intersects(input_geometry)) \
    #          .filter(Package.state==u'active')

    return extents
Beispiel #10
0
def setup():
    log.debug('Multilingual tables defined in memory')

    #Setting up package multilang table
    if not package_multilang_table.exists():
        try:
            package_multilang_table.create()
        except Exception, e:
            # Make sure the table does not remain incorrectly created
            if package_multilang_table.exists():
                Session.execute('DROP TABLE package_multilang')
                Session.commit()

            raise e

        log.info('Package Multilingual table created')
Beispiel #11
0
def setup():
    log.debug('DCAT_AP-IT tables defined in memory')

    #Setting up tag multilang table
    if not dcatapit_vocabulary_table.exists():
        try:
            dcatapit_vocabulary_table.create()
        except Exception, e:
            # Make sure the table does not remain incorrectly created
            if dcatapit_vocabulary_table.exists():
                Session.execute('DROP TABLE dcatapit_vocabulary')
                Session.commit()

            raise e

        log.info('DCATAPIT Tag Vocabulary table created')
def setup():
    log.debug('Multilingual tables defined in memory')

    #Setting up package multilang table
    if not package_multilang_table.exists():
        try:
            package_multilang_table.create()
        except Exception,e:
            # Make sure the table does not remain incorrectly created
            if package_multilang_table.exists():
                Session.execute('DROP TABLE package_multilang')
                Session.commit()

            raise e

        log.info('Package Multilingual table created')
Beispiel #13
0
    def migrate(self):
        '''
        Migrate filestore over in our very HDXish way :)
        '''
        results = Session.execute(
            "select id, revision_id, url from resource "
            "where resource_type = 'file.upload' "
            "and (url_type <> 'upload' or url_type is null)"
            "and url like '%storage%'")
        for id, revision_id, url in results:
            # Give it a second, would you?
            time.sleep(0.7)
            url_parts = urlparse(url)
            url_parts = url_parts.path.split("/")
            filename = url_parts[len(url_parts) - 1]
            response = requests.get(url, stream=True)
            if response.status_code != 200:
                print "failed to fetch %s (code %s)" % (url,
                                                        response.status_code)
                continue
            resource_upload = ResourceUpload({'id': id})
            assert resource_upload.storage_path, "no storage configured aborting"

            directory = resource_upload.get_directory(id)
            filepath = resource_upload.get_path(id)
            try:
                os.makedirs(directory)
            except OSError, e:
                ## errno 17 is file already exists
                if e.errno != 17:
                    raise

            with open(filepath, 'wb+') as out:
                for chunk in response.iter_content(1024):
                    if chunk:
                        out.write(chunk)

            Session.execute("update resource set url_type = 'upload', "
                            "url = '%s' where id = '%s'" % (filename, id))
            Session.execute(
                "update resource_revision set url_type = 'upload', "
                "url = '%s' where id = '%s' and "
                "revision_id = '%s'" % (filename, id, revision_id))
            Session.commit()
            print "Saved url %s" % url
def update_stat_metameta_table(org_id, metameta_id, state):
    """
     update stat on metameta table with new status
    """

    update_sql = "UPDATE metameta SET state='%s', modified_date='%s' WHERE id ='%s' AND org_id='%s';"%(state, datetime.datetime.utcnow().isoformat(), metameta_id, org_id)

    result = Session.execute(update_sql)
    Session.commit()
def insert_metameta_options(org_id, key, option_value):
    """
     creates entries in metameta options table for the organisation.
     value fields are null
    """
    sql = "INSERT INTO metameta_options(id, org_id, key, option_value, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s');"%(str(uuid.uuid4()), org_id, key, option_value, datetime.datetime.utcnow().isoformat())

    result = Session.execute(sql)
    Session.commit()
    return
def update_stat_metameta_table(org_id, metameta_id, state):
    """
     update stat on metameta table with new status
    """

    update_sql = "UPDATE metameta SET state='%s', modified_date='%s' WHERE id ='%s' AND org_id='%s';" % (
        state, datetime.datetime.utcnow().isoformat(), metameta_id, org_id)

    result = Session.execute(update_sql)
    Session.commit()
Beispiel #17
0
def init_db():
    """
    Create board, app, mark tables in the database.
    Prepopulate category table with default data.
    """
    if not model.package_table.exists():
        # during tests?
        return
    session = Session()
    for table in [board_table, app_table, mark_table]:
        if not table.exists():
            table.create(checkfirst=True)
            log.debug("Apps {} have been created".format(table.name))

    for board_name, board_desc in DEFAULT_BOARDS.iteritems():
        if not Board.get_by_slug(slugify(board_name)):
            board = Board()
            board.name = board_name
            board.slug = slugify(board_name)
            board.description = board_desc
            session.add(board)
            log.debug("Add {0} to {1} table".format(board_name,
                                                    board_table.name))
            session.commit()

    if not migration_table.exists():
        migration_table.create(checkfirst=True)
        session.commit()
    migration_number = session.query(migration_table).count()
    log.debug('Migration number: %s', migration_number)
    migration_sql_list = []
    for counter, sql in enumerate(migration_sql_list, start=1):
        if migration_number < counter:
            try:
                session.execute(sql)
            except ProgrammingError:
                session.rollback()
            finally:
                session.execute(migration_table.insert())
                session.commit()

    session.close()
def insert_metameta_table(org_id, data):
    """
     creates entries in metameta table for the organisation.
     value fields are null
    """
    errors = {}
    #key
    if not (data.has_key('key') and len(data['key'].strip())):
        #TODO validation
        errors.update({'key': _('%s field is required.' % ('key'))})
        return errors
    key = data['key'].strip()

    #label: if not set, insert key
    if data.has_key('label') and len(data['label'].strip()):
        label = data['label'].strip()
    else:
        label = key.title()

    #convert non-alphanumeric to underscore
    key = create_search_key(key)
    if org_has_defaults(org_id, key):
        #TODO validation
        errors.update({
            'key':
            _('%s has been already used by the organization.' % (key))
        })
        return errors

    #readonly attribute
    readonly = str(data.get('readonly', False)).lower() == 'True'.lower()

    #validator
    validator = data.get('validator', validator_not_empty)

    #default_value
    default_value = data.get('default_value', '')

    #field_type
    field_type = data.get('field_type', field_type_text)
    min_value = max_value = 0.0
    if field_type == field_type_text and validator == validator_range:
        min_value = data.get('min_value', 0.0)
        max_value = data.get('max_value', 0.0)

    sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, (CASE WHEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s')>101 THEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s') ELSE 101 END), '%s');" % (
        str(uuid.uuid4()), org_id, key, default_value, label,
        validator, state_active, True, True, readonly, field_type,
        float(min_value), float(max_value), org_id, org_id,
        datetime.datetime.utcnow().isoformat())

    result = Session.execute(sql)
    Session.commit()
    return errors
Beispiel #19
0
def version():
    u'''Return current version'''
    log.info(u"Returning current DB version")
    try:
        from ckan.model import Session
        ver = Session.execute(u'select version from '
                              u'migrate_version;').fetchall()
        click.secho(u"Latest data schema version: {0}".format(ver[0][0]),
                    bold=True)
    except Exception as e:
        error_shout(e)
def select_metametadata_for_org(org_id,
                                metameta_id=None,
                                active_only=True,
                                withPrefix=False,
                                sort_order_from=None,
                                sort_order_to=None):
    """
     parameter: string
     parameter: metameta_id
     parameter: boolean withPrefix
     returns dict of metameta data
    """
    records = []

    if org_id is None or len(org_id.strip()) == 0:
        return records

    sql = "SELECT id, key, default_value, label, revision_id, state, validator, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date FROM metameta WHERE org_id = '" + org_id + "' "
    if metameta_id is not None:
        sql = sql + " AND id = '" + metameta_id + "'"
    if active_only:
        sql = sql + " AND state = '" + state_active + "'"
    if sort_order_from and sort_order_to:
        sql = sql + " AND sort_order BETWEEN %d and %d " % (
            int(sort_order_from), int(sort_order_to))
    elif sort_order_from:
        sql = sql + " AND sort_order >= %d " % (int(sort_order_from))
    elif sort_order_to:
        sql = sql + " AND sort_order <= %d " % (int(sort_order_to))
    sql = sql + "ORDER BY sort_order, presettable, custom, key ASC;"
    rows = Session.execute(sql)
    for row in rows:
        key = row['key']
        if not withPrefix:
            key = remove_prefix_key(key)
        records.append({
            'id': row['id'],
            'key': key,
            'default_value': row['default_value'],
            'label': row['label'],
            'revision_id': row['revision_id'],
            'state': row['state'],
            'validator': row['validator'],
            'custom': row['custom'],
            'presettable': row['presettable'],
            'readonly': row['readonly'],
            'field_type': row['field_type'],
            'min_value': row['min_value'],
            'max_value': row['max_value'],
            'sort_order': row['sort_order'],
            'modified_date': row['modified_date'],
        })
    return records
Beispiel #21
0
def init_tables(engine):
    if not Table('geometry_columns', meta.metadata).exists() or \
       not Table('spatial_ref_sys', meta.metadata).exists():
        raise Exception('PostGIS has not been set up in the database. Please '
                        'refer to the "Setting up PostGIS" section in the '
                        'ckanext-spatial README.')

    if not organization_extent_table.exists():
        try:
            organization_extent_table.create()
        except:
            # Make sure the table does not remain incorrectly created
            # (eg without geom column or constraints)
            if organization_extent_table.exists():
                Session.execute('DROP TABLE organization_extent')
                Session.commit()
            raise

        log.debug('organization_extent table created in the db')
    else:
        log.debug('organization_extent table already exists in the db')
def init_tables(engine):
    if not Table('geometry_columns', meta.metadata).exists() or \
       not Table('spatial_ref_sys', meta.metadata).exists():
        raise Exception('PostGIS has not been set up in the database. Please '
                        'refer to the "Setting up PostGIS" section in the '
                        'ckanext-spatial README.')

    if not organization_extent_table.exists():
        try:
            organization_extent_table.create()
        except:
            # Make sure the table does not remain incorrectly created
            # (eg without geom column or constraints)
            if organization_extent_table.exists():
                Session.execute('DROP TABLE organization_extent')
                Session.commit()
            raise

        log.debug('organization_extent table created in the db')
    else:
        log.debug('organization_extent table already exists in the db')
def remove_metameta_options(org_id, key, option_value=None):
    """
     creates entries in metameta options table for the organisation.
     value fields are null
    """
    sql = "DELETE FROM metameta_options WHERE org_id = '%s' AND key = '%s' "%(org_id, key)
    if option_value:
        sql = sql + " AND option_value='%s' "%(option_value)
    sql = sql + ";"
    result = Session.execute(sql)
    Session.commit()
    return
def insert_metameta_options(org_id, key, option_value):
    """
     creates entries in metameta options table for the organisation.
     value fields are null
    """
    sql = "INSERT INTO metameta_options(id, org_id, key, option_value, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s');" % (
        str(uuid.uuid4()), org_id, key, option_value,
        datetime.datetime.utcnow().isoformat())

    result = Session.execute(sql)
    Session.commit()
    return
def update_metameta_table(org_id, metameta_id, data):
    """
     update metameta table with new values
    """

    errors = {}
    #key
    if not (data.has_key('key') and len(data['key'].strip())):
        #TODO validation
        errors.update({'key': _('%s field is required.' % ('key'))})
        return errors
    key = data['key'].strip()

    #label: if not set, insert key
    if data.has_key('label') and len(data['label'].strip()):
        label = data['label'].strip()
    else:
        label = key.title()

    #convert non-alphanumeric to underscore
    key = create_search_key(key)
    if org_has_defaults(org_id, key, metameta_id):
        #TODO validation
        errors.update({
            'key':
            _('%s has been already used by the organization.' % (key))
        })
        return errors

    #readonly attribute
    readonly = str(data.get('readonly', False)).lower() == 'True'.lower()

    #validator
    validator = data.get('validator', validator_not_empty)

    #default_value
    default_value = data.get('default_value', '')

    #field_type
    field_type = data.get('field_type', field_type_text)
    min_value = max_value = 0.0
    if field_type == field_type_text and validator == validator_range:
        min_value = data.get('min_value', 0.0)
        max_value = data.get('max_value', 0.0)

    update_sql = "UPDATE metameta SET key='%s', default_value='%s', label='%s', validator='%s', readonly=%r, field_type='%s', min_value=%f, max_value=%f, modified_date='%s' WHERE id ='%s' AND org_id='%s';" % (
        key, default_value, label, validator, readonly, field_type,
        float(min_value), float(max_value),
        datetime.datetime.utcnow().isoformat(), metameta_id, org_id)

    result = Session.execute(update_sql)
    Session.commit()
    return errors
def remove_metameta_options(org_id, key, option_value=None):
    """
     creates entries in metameta options table for the organisation.
     value fields are null
    """
    sql = "DELETE FROM metameta_options WHERE org_id = '%s' AND key = '%s' " % (
        org_id, key)
    if option_value:
        sql = sql + " AND option_value='%s' " % (option_value)
    sql = sql + ";"
    result = Session.execute(sql)
    Session.commit()
    return
Beispiel #27
0
def setup():

    if lb_dataset_reports is None or lb_dataset_actions is None:
        define_tables()
        log.debug('Lightbase report tables defined in memory')

    if model.repo.are_tables_created():

        if not lb_dataset_actions.exists():
            try:
                lb_dataset_actions.create()
            except Exception, e:
                # Make sure the table does not remain incorrectly created
                # (eg without geom column or constraints)
                if lb_dataset_actions.exists():
                    Session.execute('DROP TABLE lb_dataset_actions')
                    Session.commit()

                raise e

            log.debug('Dataset actions tables created')
        else:
            log.debug('Dataset actions tables already exist')
            # Future migrations go here

        if not lb_dataset_reports.exists():
            try:
                lb_dataset_reports.create()
            except Exception, e:
                # Make sure the table does not remain incorrectly created
                # (eg without geom column or constraints)
                if lb_dataset_reports.exists():
                    Session.execute('DROP TABLE lb_dataset_reports')
                    Session.commit()

                raise e

            log.debug('Lightbase report tables created')
Beispiel #28
0
def setup():

    if lb_dataset_reports is None or lb_dataset_actions is None:
        define_tables()
        log.debug('Lightbase report tables defined in memory')

    if model.repo.are_tables_created():

        if not lb_dataset_actions.exists():
            try:
                lb_dataset_actions.create()
            except Exception,e:
                # Make sure the table does not remain incorrectly created
                # (eg without geom column or constraints)
                if lb_dataset_actions.exists():
                    Session.execute('DROP TABLE lb_dataset_actions')
                    Session.commit()

                raise e

            log.debug('Dataset actions tables created')
        else:
            log.debug('Dataset actions tables already exist')
            # Future migrations go here

        if not lb_dataset_reports.exists():
            try:
                lb_dataset_reports.create()
            except Exception,e:
                # Make sure the table does not remain incorrectly created
                # (eg without geom column or constraints)
                if lb_dataset_reports.exists():
                    Session.execute('DROP TABLE lb_dataset_reports')
                    Session.commit()

                raise e

            log.debug('Lightbase report tables created')
Beispiel #29
0
def migrate_v2():

    log.debug('Migrating apps tables to v2. This may take a while...')

    statements='''
        CREATE TABLE application_tag_temp AS SELECT * FROM application_tag;
        CREATE TABLE idea_tag_temp AS SELECT * FROM idea_tag;
        '''
    Session.execute(statements)
    Session.commit()

    application_tag_table.drop()
    idea_tag_table.drop()

    application_tag_table.create()
    idea_tag_table.create()
    Session.commit()
    apps_tags = Session.execute('SELECT application_id,tag_id from application_tag_temp')
    ideas_tags = Session.execute('SELECT idea_id,tag_id from idea_tag_temp')

    for app_tag in apps_tags:
        Session.execute('''INSERT INTO application_tag (id,application_id,tag_id) VALUES ('%s','%s','%s')''' %
                        (make_uuid(), app_tag[0],app_tag[1]))

    for idea_tag in ideas_tags:
        Session.execute('''INSERT INTO idea_tag (id,idea_id,tag_id) VALUES ('%s','%s','%s')''' %
                        (make_uuid(), idea_tag[0],idea_tag[1]))

    statements='''
        DROP TABLE application_tag_temp;
        DROP TABLE idea_tag_temp;
        '''
    Session.execute(statements)
    Session.commit()

    log.info('Apps tables migrated to v2')
def select_metameta_options(org_id, key, option_value=None):
    """
     parameter: string org_id
     parameter: key
     parameter: option_value
     returns dict of metameta options
    """
    records = []
    sql = "SELECT option_value FROM metameta_options WHERE org_id = '%s' and key = '%s' "%(org_id, key)
    if option_value:
        sql = sql + "option_value = '%s' "%(option_value)
    sql = sql + " order by option_value ASC;"
    rows = Session.execute(sql)
    for row in rows:
        records.append(row['option_value'])
    return records
def select_metametadata_for_org(org_id, metameta_id=None, active_only=True, withPrefix=False, sort_order_from=None, sort_order_to=None):
    """
     parameter: string
     parameter: metameta_id
     parameter: boolean withPrefix
     returns dict of metameta data
    """
    records = []

    if org_id is None or len(org_id.strip())==0:
        return records

    sql = "SELECT id, key, default_value, label, revision_id, state, validator, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date FROM metameta WHERE org_id = '" + org_id + "' "
    if metameta_id is not None:
        sql = sql + " AND id = '" + metameta_id +"'"
    if active_only:
        sql = sql + " AND state = '" + state_active +"'"
    if sort_order_from and sort_order_to:
        sql = sql + " AND sort_order BETWEEN %d and %d "%(int(sort_order_from), int(sort_order_to))
    elif sort_order_from:
        sql = sql + " AND sort_order >= %d "%(int(sort_order_from))
    elif sort_order_to:
        sql = sql + " AND sort_order <= %d "%(int(sort_order_to))
    sql = sql + "ORDER BY sort_order, presettable, custom, key ASC;"
    rows = Session.execute(sql)
    for row in rows:
        key = row['key']
        if not withPrefix:
            key = remove_prefix_key(key)
        records.append({
            'id': row['id'],
            'key': key,
            'default_value': row['default_value'],
            'label': row['label'],
            'revision_id': row['revision_id'],
            'state': row['state'],
            'validator': row['validator'],
            'custom': row['custom'],
            'presettable': row['presettable'],
            'readonly' :row['readonly'],
            'field_type': row['field_type'],
            'min_value': row['min_value'],
            'max_value': row['max_value'],
            'sort_order': row['sort_order'],
            'modified_date': row['modified_date'],
        })
    return records
def update_metameta_table(org_id, metameta_id, data):
    """
     update metameta table with new values
    """

    errors = {}
    #key
    if not (data.has_key('key') and len(data['key'].strip())):
        #TODO validation
        errors.update({'key': _('%s field is required.'%('key'))})
        return errors
    key = data['key'].strip()

    #label: if not set, insert key
    if data.has_key('label') and len(data['label'].strip()):
        label = data['label'].strip()
    else:
        label = key.title()

    #convert non-alphanumeric to underscore
    key = create_search_key(key)
    if org_has_defaults(org_id, key, metameta_id):
        #TODO validation
        errors.update({'key': _('%s has been already used by the organization.'%(key))})
        return errors

    #readonly attribute
    readonly = str(data.get('readonly', False)).lower()=='True'.lower()

    #validator
    validator = data.get('validator', validator_not_empty)

    #default_value
    default_value = data.get('default_value', '')

    #field_type
    field_type = data.get('field_type', field_type_text)
    min_value = max_value = 0.0
    if field_type == field_type_text and validator == validator_range:
        min_value = data.get('min_value', 0.0)
        max_value = data.get('max_value', 0.0)

    update_sql = "UPDATE metameta SET key='%s', default_value='%s', label='%s', validator='%s', readonly=%r, field_type='%s', min_value=%f, max_value=%f, modified_date='%s' WHERE id ='%s' AND org_id='%s';"%(key, default_value, label, validator, readonly, field_type, float(min_value), float(max_value), datetime.datetime.utcnow().isoformat(), metameta_id, org_id)

    result = Session.execute(update_sql)
    Session.commit()
    return errors
def insert_metameta_table(org_id, data):
    """
     creates entries in metameta table for the organisation.
     value fields are null
    """
    errors = {}
    #key
    if not (data.has_key('key') and len(data['key'].strip())):
        #TODO validation
        errors.update({'key': _('%s field is required.'%('key'))})
        return errors
    key = data['key'].strip()

    #label: if not set, insert key
    if data.has_key('label') and len(data['label'].strip()):
        label = data['label'].strip()
    else:
        label = key.title()

    #convert non-alphanumeric to underscore
    key = create_search_key(key)
    if org_has_defaults(org_id, key):
        #TODO validation
        errors.update({'key': _('%s has been already used by the organization.'%(key))})
        return errors

    #readonly attribute
    readonly = str(data.get('readonly', False)).lower()=='True'.lower()

    #validator
    validator = data.get('validator', validator_not_empty)

    #default_value
    default_value = data.get('default_value', '')

    #field_type
    field_type = data.get('field_type', field_type_text)
    min_value = max_value = 0.0
    if field_type == field_type_text and validator == validator_range:
        min_value = data.get('min_value', 0.0)
        max_value = data.get('max_value', 0.0)

    sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, (CASE WHEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s')>101 THEN (SELECT MAX(sort_order)+1 FROM metameta WHERE org_id='%s') ELSE 101 END), '%s');"%(str(uuid.uuid4()), org_id, key, default_value, label, validator, state_active, True, True, readonly, field_type, float(min_value), float(max_value), org_id, org_id, datetime.datetime.utcnow().isoformat())

    result = Session.execute(sql)
    Session.commit()
    return errors
def init_metadata(org_id):

    #retrieve metameta records for org_id
    records = select_metametadata_for_org(org_id,
                                          metameta_id=None,
                                          active_only=False,
                                          withPrefix=True)
    sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES "
    update_count = 0
    for key, values in PRESET_METAMETADATA.iteritems():
        if not key or len(key) == 0:
            continue
        print '********key=%s*******' % (key)
        metameta_key = add_prefix_key(key)

        #check whether key exists or not
        key_exists = False
        for record in records:
            if record['key'] == metameta_key:
                key_exists = True
                break

        if not key_exists:

            #check field type
            if values.get('field_type') == field_type_single_choice:

                presets = get_metameta_options_preset(metameta_key)
                update_metameta_options(org_id, metameta_key, presets)
            update_count += 1

            #create a record for a text field
            sql = sql + " ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, %d, '%s')," % (
                str(uuid.uuid4()), org_id, metameta_key,
                values.get('default_value'), values.get('label'),
                values.get('validator'), values.get('state'),
                str(values.get('custom')), str(values.get('presettable')),
                str(values.get('readonly')), values.get('field_type'),
                0.0, 0.0, int(values.get('sort_order')),
                datetime.datetime.utcnow().isoformat())

    if update_count > 0:
        sql = sql[:-1]
        sql = sql + ';'

        result = Session.execute(sql)
        Session.commit()
def select_metameta_options(org_id, key, option_value=None):
    """
     parameter: string org_id
     parameter: key
     parameter: option_value
     returns dict of metameta options
    """
    records = []
    sql = "SELECT option_value FROM metameta_options WHERE org_id = '%s' and key = '%s' " % (
        org_id, key)
    if option_value:
        sql = sql + "option_value = '%s' " % (option_value)
    sql = sql + " order by option_value ASC;"
    rows = Session.execute(sql)
    for row in rows:
        records.append(row['option_value'])
    return records
Beispiel #36
0
def get_package_id_by_pid(pid, pid_type):
    """ Find pid by id and type.

    :param pid: id of the pid
    :param pid_type: type of the pid (primary, relation)
    :return: id of the package
    """
    query = select(['key', 'package_id']).where(and_(model.PackageExtra.value == pid, model.PackageExtra.key.like('pids_%_id'),
                                                     model.PackageExtra.state == 'active'))

    for key, package_id in [('pids_%s_type' % key.split('_')[1], package_id) for key, package_id in Session.execute(query)]:
        query = select(['package_id']).where(and_(model.PackageExtra.value == pid_type, model.PackageExtra.key == key,
                                                  model.PackageExtra.state == 'active', model.PackageExtra.package_id == package_id))
        for package_id, in Session.execute(query):
            return package_id

    return None
Beispiel #37
0
def get_package_id_by_pid(pid, pid_type):
    """ Find pid by id and type.

    :param pid: id of the pid
    :param pid_type: type of the pid
    :return: id of the package
    """
    query = select(['key', 'package_id']).where(and_(model.PackageExtra.value == pid, model.PackageExtra.key.like('pids_%_id'),
                                                     model.PackageExtra.state == 'active'))

    for key, package_id in [('pids_%s_type' % key.split('_')[1], package_id) for key, package_id in Session.execute(query)]:
        query = select(['package_id']).where(and_(model.PackageExtra.value == pid_type, model.PackageExtra.key == key,
                                                  model.PackageExtra.state == 'active', model.PackageExtra.package_id == package_id))
        for package_id, in Session.execute(query):
            return package_id

    return None
def org_has_defaults(org_id, key=None, metameta_id=None):
    """
     check if org has any entries in metameta table
     (it won't if no defaults have been set before)
     parameter: key
    """
    sql = "SELECT 1 FROM metameta WHERE org_id = '" + org_id + "'"
    if key is not None:
        sql = sql + " AND key = '" + key +"'"
    if metameta_id is not None:
        sql = sql + " AND id <> '" + metameta_id +"'"
    sql = sql + " LIMIT 1;"
    result = Session.execute(sql)

    if result.first():
        return True
    else:
        return False
def org_has_defaults(org_id, key=None, metameta_id=None):
    """
     check if org has any entries in metameta table
     (it won't if no defaults have been set before)
     parameter: key
    """
    sql = "SELECT 1 FROM metameta WHERE org_id = '" + org_id + "'"
    if key is not None:
        sql = sql + " AND key = '" + key + "'"
    if metameta_id is not None:
        sql = sql + " AND id <> '" + metameta_id + "'"
    sql = sql + " LIMIT 1;"
    result = Session.execute(sql)

    if result.first():
        return True
    else:
        return False
def get_metameta_minmax(org_id, key):
    """
     parameter: org_id
     parameter: key
     returns dict of meta metadata
    """
    records = []

    if org_id is None or len(org_id.strip())==0:
        return records

    sql = "SELECT min_value, max_value FROM metameta WHERE org_id = '%s' AND key = '%s' AND state = '%s' LIMIT 1;"%(org_id, key, state_active)
    rows = Session.execute(sql)
    for row in rows:
        records.append({
            'min_value': row['min_value'],
            'max_value': row['max_value'],
        })
    return records
Beispiel #41
0
def get_package_contacts(pkg_id):
    """
    Returns contact information for the dataset with the given id.

    :param pkg_id: the id of the package whose contact information to get
    :return: a list of contact information dicts
    :rtype: list of dicts
    """

    contacts_regex = '^(contact)_(\d+)_(.+)$'

    query = select(['id', 'key', 'value', 'state']).where(
        and_(
            model.PackageExtra.package_id == pkg_id,
            model.PackageExtra.key.like('contact_%_%'),
            model.PackageExtra.state == 'active'
        )
    )

    extras = Session.execute(query)
    extras = model_dictize.extras_list_dictize(extras, {'model': PackageExtra})

    contacts_by_index = {}
    for extra in extras:
        key = extra['key']
        value = extra['value']

        match = re.match(contacts_regex, key)
        if match:
            index = match.group(2)
            type = match.group(3)

            contact = contacts_by_index.get(index, {})
            contact[u'index'] = index
            contact[type] = value

            if type == 'email':
                contact[u'id'] = extra['id']

            contacts_by_index[index] = contact

    contacts = [ c for c in contacts_by_index.values() ]
    return sorted(contacts, key=lambda c: int(c['index']))
Beispiel #42
0
def get_package_contacts(pkg_id):
    """
    Returns contact information for the dataset with the given id.

    :param pkg_id: the id of the package whose contact information to get
    :return: a list of contact information dicts
    :rtype: list of dicts
    """

    contacts_regex = '^(contact)_(\d+)_(.+)$'

    query = select(['id', 'key', 'value', 'state']).where(
        and_(
            model.PackageExtra.package_id == pkg_id,
            model.PackageExtra.key.like('contact_%_%'),
            model.PackageExtra.state == 'active'
        )
    )

    extras = Session.execute(query)
    extras = model_dictize.extras_list_dictize(extras, {'model': PackageExtra})

    contacts_by_index = {}
    for extra in extras:
        key = extra['key']
        value = extra['value']

        match = re.match(contacts_regex, key)
        if match:
            index = match.group(2)
            type = match.group(3)

            contact = contacts_by_index.get(index, {})
            contact[u'index'] = index
            contact[type] = value

            if type == 'email':
                contact[u'id'] = extra['id']

            contacts_by_index[index] = contact

    contacts = [ c for c in contacts_by_index.values() ]
    return sorted(contacts, key=lambda c: int(c['index']))
Beispiel #43
0
def get_package_id_by_data_pids(data_dict):
    '''
    Try if the provided data PIDs match exactly one dataset.

    :param data_dict:
    :return: Package id or None if not found.
    '''
    data_pids = get_pids_by_type('data', data_dict)

    if len(data_pids) == 0:
        return None

    pid_list = [pid.get('id') for pid in data_pids]

    # Get package ID's with matching PIDS
    query = Session.query(model.PackageExtra.package_id.distinct()).\
        filter(model.PackageExtra.value.in_(pid_list))
    pkg_ids = query.all()

    if len(pkg_ids) != 1:
        return None              # Nothing to do if we get many or zero datasets

    # Get extras with the received package ID's
    query = select(['key', 'value', 'state']).where(
        and_(model.PackageExtra.package_id.in_(pkg_ids), model.PackageExtra.key.like('pids_%')))

    extras = Session.execute(query)

    # Dictize the results
    extras = model_dictize.extras_list_dictize(extras, {'model': PackageExtra})

    # Check that matching PIDS are type 'data'.
    for extra in extras:
        key = extra['key'].split('_')   # eg. ('pids', '0', 'id')

        if key[2] == 'id' and extra['value'] in pid_list:
            type_key = '_'.join(key[:2] + ['type'])

            if not filter(lambda x: x['key'] == type_key and x['value'] == 'data', extras):
                return None      # Found a hit with wrong type of PID

    return pkg_ids[0]    # No problems found, so use this
Beispiel #44
0
def get_package_id_by_primary_pid(data_dict):
    '''
    Try if the provided primary PID matches exactly one dataset.

    THIS METHOD WAS PREVIOUSLY GET_PACKAGE_ID_BY_DATA_PIDS, is the below correct, or should relation pids also be used?

    :param data_dict:
    :return: Package id or None if not found.
    '''
    primary_pid = get_primary_pid(data_dict)
    if not primary_pid:
        return None

    pid_list = [primary_pid]

    # Get package ID's with matching PIDS
    query = Session.query(model.PackageExtra.package_id.distinct()).\
        filter(model.PackageExtra.value.in_(pid_list))
    pkg_ids = query.all()
    if len(pkg_ids) != 1:
        return None              # Nothing to do if we get many or zero datasets

    # Get extras with the received package ID's
    query = select(['key', 'value', 'state']).where(
        and_(model.PackageExtra.package_id.in_(pkg_ids), model.PackageExtra.key.like('pids_%')))

    extras = Session.execute(query)

    # Dictize the results
    extras = model_dictize.extras_list_dictize(extras, {'model': PackageExtra})

    # Check that matching PIDS are type 'primary'.
    for extra in extras:
        key = extra['key'].split('_')   # eg. ['pids', '0', 'id']

        if key[2] == 'id' and extra['value'] in pid_list:
            type_key = '_'.join(key[:2] + ['type'])

            if not filter(lambda x: x['key'] == type_key and (x['value'] == 'primary'), extras):
                return None      # Found a hit with wrong type of PID

    return pkg_ids[0]    # No problems found, so use this
def get_metameta_minmax(org_id, key):
    """
     parameter: org_id
     parameter: key
     returns dict of meta metadata
    """
    records = []

    if org_id is None or len(org_id.strip()) == 0:
        return records

    sql = "SELECT min_value, max_value FROM metameta WHERE org_id = '%s' AND key = '%s' AND state = '%s' LIMIT 1;" % (
        org_id, key, state_active)
    rows = Session.execute(sql)
    for row in rows:
        records.append({
            'min_value': row['min_value'],
            'max_value': row['max_value'],
        })
    return records
def init_metadata(org_id):

    #retrieve metameta records for org_id
    records = select_metametadata_for_org(org_id, metameta_id=None, active_only=False, withPrefix=True)
    sql = "INSERT INTO metameta(id, org_id, key, default_value, label, validator, state, custom, presettable, readonly, field_type, min_value, max_value, sort_order, modified_date) VALUES "
    update_count = 0;
    for key, values in PRESET_METAMETADATA.iteritems():
        if not key or len(key)==0:
            continue
        print '********key=%s*******'%(key)
        metameta_key = add_prefix_key(key)

        #check whether key exists or not
        key_exists = False
        for record in records:
            if record['key']==metameta_key:
                key_exists = True
                break

        if not key_exists:

            #check field type
            if values.get('field_type') == field_type_single_choice:

                presets = get_metameta_options_preset(metameta_key)
                update_metameta_options(org_id, metameta_key, presets)
            update_count += 1

            #create a record for a text field
            sql = sql + " ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %r, %r, %r, '%s', %f, %f, %d, '%s'),"%(str(uuid.uuid4()), org_id, metameta_key, values.get('default_value'), values.get('label'), values.get('validator'), values.get('state'), str(values.get('custom')), str(values.get('presettable')), str(values.get('readonly')), values.get('field_type'), 0.0, 0.0, int(values.get('sort_order')), datetime.datetime.utcnow().isoformat())

    if update_count > 0:
        sql = sql[:-1]
        sql = sql + ';'

        result = Session.execute(sql)
        Session.commit()
def postgis_version():

    result = Session.execute('SELECT PostGIS_Lib_Version()')

    return result.scalar()
    def import_stage(self, harvest_object):

        log = logging.getLogger(__name__ + '.import')
        log.debug('Import stage for harvest object: %s', harvest_object.id)

        if not harvest_object:
            log.error('No harvest object received')
            return False

        self._set_source_config(harvest_object.source.config)

        status = self._get_object_extra(harvest_object, 'status')

        # Get the last harvested object (if any)
        previous_object = Session.query(HarvestObject) \
                          .filter(HarvestObject.guid == harvest_object.guid) \
                          .filter(HarvestObject.current == True) \
                          .first()

        if status == 'delete':
            # Delete package
            context = {'model': model, 'session': model.Session, 'user': self._get_user_name()}

            p.toolkit.get_action('package_delete')(context, {'id': harvest_object.package_id})
            log.info('Deleted package {0} with guid {1}'.format(harvest_object.package_id, harvest_object.guid))

            return True

        # Parse ISO document
        ##try:
            ##iso_values = ISODocument(harvest_object.content).read_values()
        ##except Exception, e:
            ##self._save_object_error('Error parsing ISO document for object {0}: {1}'.format(harvest_object.id, str(e)),
                                    ##harvest_object, 'Import')
            ##return False

        # Flag previous object as not current anymore
        if previous_object:
            previous_object.current = False
            previous_object.add()

        # Generate GUID if not present (i.e. it's a manual import)
        if not harvest_object.guid:
            self._save_object_error('Missing GUID for object {0}'
                        .format(harvest_object.id), harvest_object, 'Import')
            return False

        # Get document modified date
        #try:
            #metadata_modified_date = dateutil.parser.parse(iso_values['metadata-date'], ignoretz=True)
        #except ValueError:
            #self._save_object_error('Could not extract reference date for object {0} ({1})'
                        #.format(harvest_object.id, iso_values['metadata-date']), harvest_object, 'Import')
            #return False

        #harvest_object.metadata_modified_date = metadata_modified_date
        harvest_object.metadata_modified_date = datetime.date.today()
        harvest_object.add()

        # Build the package dict
        package_dict = self.get_package_dict(harvest_object)
        if not package_dict:
            log.error('No package dict returned, aborting import for object {0}'.format(harvest_object.id))
            return False

        # Create / update the package

        context = {'model': model,
                   'session': model.Session,
                   'user': self._get_user_name(),
                   'extras_as_string': True,
                   'api_version': '2',
                   'return_id_only': True}
        if context['user'] == self._site_user['name']:
            context['ignore_auth'] = True

        # The default package schema does not like Upper case tags
        tag_schema = logic.schema.default_tags_schema()
        tag_schema['name'] = [not_empty, unicode]

        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()

        if status == 'new':
            package_schema = logic.schema.default_create_package_schema()
            package_schema['tags'] = tag_schema
            context['schema'] = package_schema

            # We need to explicitly provide a package ID, otherwise ckanext-spatial
            # won't be be able to link the extent to the package.
            package_dict['id'] = unicode(uuid.uuid4())
            package_schema['id'] = [unicode]

            # Save reference to the package on the object
            harvest_object.package_id = package_dict['id']
            harvest_object.add()
            # Defer constraints and flush so the dataset can be indexed with
            # the harvest object id (on the after_show hook from the harvester
            # plugin)
            Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
            model.Session.flush()

            try:
                package_id = p.toolkit.get_action('package_create')(context, package_dict)
                log.info('Created new package %s with guid %s', package_id, harvest_object.guid)
            except p.toolkit.ValidationError as e:
                self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
                return False

        elif status == 'change':

            # Check if the document has changed
            m = hashlib.md5()
            m.update(previous_object.content)
            old_md5 = m.hexdigest()

            m = hashlib.md5()
            m.update(harvest_object.content)
            new_md5 = m.hexdigest()

            if old_md5 == new_md5:

                # Assign the previous job id to the new object to
                # avoid losing history
                harvest_object.harvest_job_id = previous_object.job.id
                harvest_object.add()

                harvest_object.metadata_modified_date = previous_object.metadata_modified_date

                # Delete the previous object to avoid cluttering the object table
                previous_object.delete()

                log.info('Document with GUID %s unchanged, skipping...' % (harvest_object.guid))
            else:
                package_schema = logic.schema.default_update_package_schema()
                package_schema['tags'] = tag_schema
                context['schema'] = package_schema

                package_dict['id'] = harvest_object.package_id
                try:
                    package_id = p.toolkit.get_action('package_update')(context, package_dict)
                    log.info('Updated package %s with guid %s', package_id, harvest_object.guid)
                except p.toolkit.ValidationError as e:
                    self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
                    return False

        model.Session.commit()

        return True
Beispiel #49
0
Datei: cli.py Projekt: arkka/ckan
 def version(self):
     from ckan.model import Session
     print Session.execute('select version from migrate_version;').fetchall()
    def import_stage(self, harvest_object):

        log = logging.getLogger(__name__ + '.import')
        log.debug('Import stage for harvest object: %s' % harvest_object.id)

        if not harvest_object:
            log.error('No harvest object received')
            return False

        self._set_source_config(harvest_object.source.config)

        status = self._get_object_extra(harvest_object, 'status')

        # Get the last harvested object (if any)
        previous_object = Session.query(HarvestObject) \
                          .filter(HarvestObject.guid == harvest_object.guid) \
                          .filter(HarvestObject.current == True) \
                          .first()

        if status == 'delete':
            # Delete package
            context = {'model': model, 'session': model.Session, 'user': self._get_user_name()}

            p.toolkit.get_action('package_delete')(context, {'id': harvest_object.package_id})
            log.info('Deleted package {0} with guid {1}'.format(harvest_object.package_id, harvest_object.guid))

            return True

        if previous_object:
            # Flag previous object as not current anymore
            previous_object.current = False
            previous_object.add()

            # Check if metadata was modified
            # GeoNode does not offer a "latest modified date".
            # Let's compare if any value changed
            content_old = previous_object.content
            content_new = harvest_object.content

            is_modified = content_old != content_new
            prev_job_id = previous_object.job.id
        else:
            is_modified = True
            prev_job_id = None

        # Error if GUID not present
        if not harvest_object.guid:
            self._save_object_error('Missing GUID for object {0}'
                        .format(harvest_object.id), harvest_object, 'Import')
            return False

        log.error('Object GUID:%s is modified: %s' % (harvest_object.guid, is_modified))

        # Let's set the metadata date according to the import time. Not the best choice, since
        # we'd like to set the original metadata date.
        # If geonode provided this info, we could rely on this to find out if a dataset needs to be updated.
        harvest_object.metadata_modified_date = datetime.now()
        harvest_object.add()

        # Build the package dict
        package_dict = self.get_package_dict(harvest_object)
        if not package_dict:
            log.error('No package dict returned, aborting import for object {0}'.format(harvest_object.id))
            return False

        # Create / update the package

        context = {'model': model,
                   'session': model.Session,
                   'user': self._get_user_name(),
                   'extras_as_string': True,
                   'api_version': '2',
                   'return_id_only': True}
        if context['user'] == self._site_user['name']:
            context['ignore_auth'] = True

        # The default package schema does not like Upper case tags
        tag_schema = logic.schema.default_tags_schema()
        tag_schema['name'] = [not_empty, unicode]

        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()

        if status == 'new':
            package_schema = logic.schema.default_create_package_schema()
            package_schema['tags'] = tag_schema
            context['schema'] = package_schema

            # We need to explicitly provide a package ID, otherwise ckanext-spatial
            # won't be be able to link the extent to the package.
            package_dict['id'] = unicode(uuid.uuid4())
            package_schema['id'] = [unicode]

            # Save reference to the package on the object
            harvest_object.package_id = package_dict['id']
            harvest_object.add()
            # Defer constraints and flush so the dataset can be indexed with
            # the harvest object id (on the after_show hook from the harvester
            # plugin)
            Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
            model.Session.flush()

            try:
                # package_id = p.toolkit.get_action('package_create')(context, package_dict)
                package_id = self._create_package(context, package_dict, harvest_object)
                log.info('Created new package %s with guid %s' % (package_id, harvest_object.guid))
                self._post_package_create(package_id, harvest_object)
            except p.toolkit.ValidationError as e:
                self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
                return False

        elif status == 'change':

            # Check if the document has changed

            if not is_modified:

                # Assign the previous job id to the new object to
                # avoid losing history
                harvest_object.harvest_job_id = prev_job_id
                harvest_object.add()

                harvest_object.metadata_modified_date = previous_object.metadata_modified_date

                # Delete the previous object to avoid cluttering the object table
                previous_object.delete()

                log.info('Document with GUID %s unchanged, skipping...', harvest_object.guid)
            else:
                package_schema = logic.schema.default_update_package_schema()
                package_schema['tags'] = tag_schema
                context['schema'] = package_schema

                package_dict['id'] = harvest_object.package_id
                try:
                    #package_id = p.toolkit.get_action('package_update')(context, package_dict)
                    package_id = self._update_package(context, package_dict, harvest_object)
                    log.info('Updated package %s with guid %s', package_id, harvest_object.guid)
                    self._post_package_update(package_id, harvest_object)
                except p.toolkit.ValidationError as e:
                    self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
                    return False

        model.Session.commit()

        return True
                Session.commit()

            raise e

        log.info('Package Multilingual table created')
    else:
        log.info('Package Multilingual table already exist')
    
    #Setting up group multilang table
    if not group_multilang_table.exists():
        try:
            group_multilang_table.create()
        except Exception,e:
            # Make sure the table does not remain incorrectly created
            if group_multilang_table.exists():
                Session.execute('DROP TABLE group_multilang')
                Session.commit()

            raise e

        log.info('Group Multilingual table created')
    else:
        log.info('Group Multilingual table already exist')

    #Setting up resource multilang table
    if not resource_multilang_table.exists():
        try:
            resource_multilang_table.create()
        except Exception,e:
            # Make sure the table does not remain incorrectly created
            if resource_multilang_table.exists():
def postgis_version():

    result = Session.execute('SELECT postgis_lib_version()')

    return result.scalar()
Beispiel #53
0
def init_db():
    """
    Create boards, threads and posts tables in the database.
    Prepopulate board table with default data.
    """
    if not model.package_table.exists():
        # during tests?
        return

    session = Session()
    if not board_table.exists():
        board_table.create(checkfirst=True)
        thread_table.create(checkfirst=True)
        post_table.create(checkfirst=True)
        log.debug("Forum tables have been created")

        for board_name, board_desc in DEFAULT_BOARDS.iteritems():
            board = Board()
            board.name = board_name
            board.slug = slugify(board_name)
            board.description = board_desc
            session.add(board)

        if session.new:
            log.debug('Default boards created')
            session.commit()

    if not migration_table.exists():
        migration_table.create(checkfirst=True)
        session.commit()
    if not banned_table.exists():
        banned_table.create(checkfirst=True)
        session.commit()
    if not unsubscription_table.exists():
        unsubscription_table.create(checkfirst=True)
        session.commit()
    migration_number = session.query(migration_table).count()
    log.debug('Migration number: %s', migration_number)
    migration_sql_list = [
        "ALTER TABLE forum_post ADD COLUMN active boolean DEFAULT TRUE",
        "ALTER TABLE forum_thread ADD COLUMN active boolean DEFAULT TRUE",
        "ALTER TABLE forum_board ADD COLUMN active boolean DEFAULT TRUE",
        "ALTER TABLE forum_thread DROP COLUMN slug",
        "ALTER TABLE forum_thread ADD COLUMN can_post boolean DEFAULT TRUE",
        "ALTER TABLE forum_board ADD COLUMN can_post boolean DEFAULT TRUE",
        u"INSERT INTO forum_board(\"id\", \"name\", \"slug\", \"description\", \"active\", \"can_post\") "
        +
        u"VALUES(DEFAULT, 'Запропонувати набір', 'zaproponuvati-nabir', '', true, false)"
    ]
    for counter, sql in enumerate(migration_sql_list, start=1):
        if migration_number < counter:
            try:
                log.debug(sql)
                session.execute(sql)
            except ProgrammingError as e:
                print(e)
                log.debug('Migration have been rolled back.')
                session.rollback()
            finally:
                session.execute(migration_table.insert())
                session.commit()

    session.close()
    def import_stage(self, harvest_object):

        log = logging.getLogger(__name__ + '.import')
        log.debug('%s: Import stage for harvest object: %s',
                  self.harvester_name(), harvest_object.id)

        if not harvest_object:
            log.error('No harvest object received')
            return False

        if not harvest_object.content:
            log.error('Harvest object contentless')
            self._save_object_error(
                'Empty content for object %s' % harvest_object.id,
                harvest_object, 'Import')
            return False

        self._set_source_config(harvest_object.source.config)

        status = self._get_object_extra(harvest_object, 'status')

        # Get the last harvested object (if any)
        previous_object = Session.query(HarvestObject) \
                          .filter(HarvestObject.guid == harvest_object.guid) \
                          .filter(HarvestObject.current == True) \
                          .first()

        context = {
            'model': model,
            'session': model.Session,
            'user': self._get_user_name()
        }

        if status == 'delete':
            # Delete package
            p.toolkit.get_action('package_delete')(
                context, {
                    'id': harvest_object.package_id
                })
            log.info('Deleted package {0} with guid {1}'.format(
                harvest_object.package_id, harvest_object.guid))

            return True

        # Flag previous object as not current anymore
        if previous_object:
            previous_object.current = False
            previous_object.add()

        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()

        # Generate GUID if not present (i.e. it's a manual import)
        if not harvest_object.guid:
            self._save_object_error(
                'Missing GUID for object {0}'.format(harvest_object.id),
                harvest_object, 'Import')
            return False

        # pre-check to skip resource logic in case no changes occurred remotely
        if status == 'change':

            # Check if the document has changed
            m = hashlib.md5()
            m.update(previous_object.content.encode())
            old_md5 = m.hexdigest()

            m = hashlib.md5()
            m.update(harvest_object.content.encode())
            new_md5 = m.hexdigest()

            if old_md5 == new_md5:

                # Assign the previous job id to the new object to # avoid losing history
                harvest_object.harvest_job_id = previous_object.job.id
                harvest_object.add()

                harvest_object.metadata_modified_date = previous_object.metadata_modified_date
                harvest_object.add()

                # Delete the previous object to avoid cluttering the object table
                previous_object.delete()

                # Reindex the corresponding package to update the reference to the harvest object
                context.update({'validate': False, 'ignore_auth': True})
                try:
                    package_dict = logic.get_action('package_show')(
                        context, {
                            'id': harvest_object.package_id
                        })
                except p.toolkit.ObjectNotFound:
                    pass
                else:
                    for extra in package_dict.get('extras', []):
                        if extra['key'] == 'harvest_object_id':
                            extra['value'] = harvest_object.id
                    if package_dict:
                        package_index = PackageSearchIndex()
                        package_index.index_package(package_dict)

                log.info('%s document with GUID %s unchanged, skipping...',
                         self.harvester_name(), harvest_object.guid)
                model.Session.commit()

                return "unchanged"

        # Build the package dict
        package_dict, metadata = self.create_package_dict(
            harvest_object.guid, harvest_object.content)

        if not package_dict:
            log.error(
                'No package dict returned, aborting import for object {0}'.
                format(harvest_object.id))
            return False

        package_dict['name'] = self._gen_new_name(package_dict['title'])

        # We need to get the owner organization (if any) from the harvest source dataset
        source_dataset = model.Package.get(harvest_object.source.id)
        if source_dataset.owner_org:
            package_dict['owner_org'] = source_dataset.owner_org

        self.attach_resources(metadata, package_dict, harvest_object)

        # Create / update the package

        context = {
            'model': model,
            'session': model.Session,
            'user': self._get_user_name(),
            'extras_as_string': True,
            'api_version': '2',
            'return_id_only': True
        }
        if context['user'] == self._site_user['name']:
            context['ignore_auth'] = True

        # The default package schema does not like Upper case tags
        tag_schema = logic.schema.default_tags_schema()
        tag_schema['name'] = [not_empty]

        if status == 'new':
            package_schema = logic.schema.default_create_package_schema()
            package_schema['tags'] = tag_schema
            context['schema'] = package_schema

            # We need to explicitly provide a package ID, otherwise ckanext-spatial
            # won't be be able to link the extent to the package.
            package_dict['id'] = uuid.uuid4().hex
            package_schema['id'] = []

            # Save reference to the package on the object
            harvest_object.package_id = package_dict['id']
            harvest_object.add()
            # Defer constraints and flush so the dataset can be indexed with
            # the harvest object id (on the after_show hook from the harvester
            # plugin)
            Session.execute(
                'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
            model.Session.flush()

            try:
                package_id = p.toolkit.get_action('package_create')(
                    context, package_dict)
                log.info('%s: Created new package %s with guid %s',
                         self.harvester_name(), package_id,
                         harvest_object.guid)
            except p.toolkit.ValidationError as e:
                self._save_object_error(
                    'Validation Error: %s' % str(e.error_summary),
                    harvest_object, 'Import')
                return False

        elif status == 'change':
            # we know the internal document did change, bc of a md5 hash comparison done above

            package_schema = logic.schema.default_update_package_schema()
            package_schema['tags'] = tag_schema
            context['schema'] = package_schema

            package_dict['id'] = harvest_object.package_id
            try:
                package_id = p.toolkit.get_action('package_update')(
                    context, package_dict)
                log.info('%s updated package %s with guid %s',
                         self.harvester_name(), package_id,
                         harvest_object.guid)
            except p.toolkit.ValidationError as e:
                self._save_object_error(
                    'Validation Error: %s' % str(e.error_summary),
                    harvest_object, 'Import')
                return False

        model.Session.commit()

        return True
Beispiel #55
0
    def write_package_from_gemini_string(self, content):
        '''Create or update a Package based on some content that has
        come from a URL.

        Returns the package_dict of the result.
        If there is an error, it returns None or raises Exception.
        '''
        log = logging.getLogger(__name__ + '.import')
        package = None
        gemini_document = GeminiDocument(content)
        gemini_values = gemini_document.read_values()
        gemini_guid = gemini_values['guid']

        # Save the metadata reference date in the Harvest Object
        try:
            metadata_modified_date = datetime.strptime(
                gemini_values['metadata-date'], '%Y-%m-%d')
        except ValueError:
            try:
                metadata_modified_date = datetime.strptime(
                    gemini_values['metadata-date'], '%Y-%m-%dT%H:%M:%S')
            except:
                raise Exception('Could not extract reference date for GUID %s (%s)' \
                        % (gemini_guid,gemini_values['metadata-date']))

        self.obj.metadata_modified_date = metadata_modified_date
        self.obj.save()

        last_harvested_object = Session.query(HarvestObject) \
                            .filter(HarvestObject.guid==gemini_guid) \
                            .filter(HarvestObject.current==True) \
                            .all()

        if len(last_harvested_object) == 1:
            last_harvested_object = last_harvested_object[0]
        elif len(last_harvested_object) > 1:
            raise Exception(
                'Application Error: more than one current record for GUID %s' %
                gemini_guid)

        reactivate_package = False
        if last_harvested_object:
            # We've previously harvested this (i.e. it's an update)

            # Use metadata modified date instead of content to determine if the package
            # needs to be updated
            if last_harvested_object.metadata_modified_date is None \
                or last_harvested_object.metadata_modified_date < self.obj.metadata_modified_date \
                or self.force_import \
                or (last_harvested_object.metadata_modified_date == self.obj.metadata_modified_date and
                    last_harvested_object.source.active is False):

                if self.force_import:
                    log.info('Import forced for object %s with GUID %s' %
                             (self.obj.id, gemini_guid))
                else:
                    log.info(
                        'Package for object with GUID %s needs to be created or updated'
                        % gemini_guid)

                package = last_harvested_object.package

                # If the package has a deleted state, we will only update it and reactivate it if the
                # new document has a more recent modified date
                if package.state == u'deleted':
                    if last_harvested_object.metadata_modified_date < self.obj.metadata_modified_date:
                        log.info(
                            'Package for object with GUID %s will be re-activated'
                            % gemini_guid)
                        reactivate_package = True
                    else:
                        log.info(
                            'Remote record with GUID %s is not more recent than a deleted package, skipping... '
                            % gemini_guid)
                        return None

            else:
                if last_harvested_object.content != self.obj.content and \
                 last_harvested_object.metadata_modified_date == self.obj.metadata_modified_date:
                    diff_generator = difflib.unified_diff(
                        last_harvested_object.content.split('\n'),
                        self.obj.content.split('\n'))
                    diff = '\n'.join([line for line in diff_generator])
                    raise Exception(
                        'The contents of document with GUID %s changed, but the metadata date has not been updated.\nDiff:\n%s'
                        % (gemini_guid, diff))
                else:
                    # The content hasn't changed, no need to update the package
                    log.info('Document with GUID %s unchanged, skipping...' %
                             (gemini_guid))
                return None
        else:
            log.info(
                'No package with GEMINI guid %s found, let\'s create one' %
                gemini_guid)

        extras = {'UKLP': 'True', 'harvest_object_id': self.obj.id}

        # Just add some of the metadata as extras, not the whole lot
        for name in [
                # Essentials
                'spatial-reference-system',
                'guid',
                # Usefuls
                'dataset-reference-date',
                'metadata-language',  # Language
                'metadata-date',  # Released
                'coupled-resource',
                'contact-email',
                'frequency-of-update',
                'spatial-data-service-type',
        ]:
            extras[name] = gemini_values[name]

        if len(gemini_values.get('progress', [])):
            extras['progress'] = gemini_values['progress'][0]
        else:
            extras['progress'] = ''

        extras['resource-type'] = gemini_values['resource-type'][0]

        # Use-constraints can contain values which are:
        #  * free text
        #  * licence URL
        # Store all values in extra['licence'] and if there is a
        # URL in there, store that in extra['licence-url']
        extras['licence'] = gemini_values.get('use-constraints', '')
        if len(extras['licence']):
            licence_url_extracted = self._extract_first_licence_url(
                extras['licence'])
            if licence_url_extracted:
                extras['licence_url'] = licence_url_extracted

        extras['access_constraints'] = gemini_values.get(
            'limitations-on-public-access', '')
        if 'temporal-extent-begin' in gemini_values:
            #gemini_values['temporal-extent-begin'].sort()
            extras['temporal_coverage-from'] = gemini_values[
                'temporal-extent-begin']
        if 'temporal-extent-end' in gemini_values:
            #gemini_values['temporal-extent-end'].sort()
            extras['temporal_coverage-to'] = gemini_values[
                'temporal-extent-end']

        # Save responsible organization roles
        provider, responsible_parties = self._process_responsible_organisation(
            gemini_values['responsible-organisation'])
        extras['provider'] = provider
        extras['responsible-party'] = '; '.join(responsible_parties)

        if len(gemini_values['bbox']) > 0:
            extras['bbox-east-long'] = gemini_values['bbox'][0]['east']
            extras['bbox-north-lat'] = gemini_values['bbox'][0]['north']
            extras['bbox-south-lat'] = gemini_values['bbox'][0]['south']
            extras['bbox-west-long'] = gemini_values['bbox'][0]['west']

            # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry
            extent_string = self.extent_template.substitute(
                xmin=extras['bbox-east-long'],
                ymin=extras['bbox-south-lat'],
                xmax=extras['bbox-west-long'],
                ymax=extras['bbox-north-lat'])

            extras['spatial'] = extent_string.strip()

        tags = []
        for tag in gemini_values['tags']:
            tag = tag[:50] if len(tag) > 50 else tag
            tags.append({'name': tag})

        package_dict = {
            'title': gemini_values['title'],
            'notes': gemini_values['abstract'],
            'tags': tags,
            'resources': []
        }

        if self.obj.source.publisher_id:
            package_dict['groups'] = [{'id': self.obj.source.publisher_id}]

        if reactivate_package:
            package_dict['state'] = u'active'

        if package is None or package.title != gemini_values['title']:
            name = self.gen_new_name(gemini_values['title'])
            if not name:
                name = self.gen_new_name(six.text_type(gemini_guid))
            if not name:
                raise Exception(
                    'Could not generate a unique name from the title or the GUID. Please choose a more unique title.'
                )
            package_dict['name'] = name
        else:
            package_dict['name'] = package.name

        resource_locators = gemini_values.get('resource-locator', [])

        if len(resource_locators):
            for resource_locator in resource_locators:
                url = resource_locator.get('url', '')
                if url:
                    resource_format = ''
                    resource = {}
                    if extras['resource-type'] == 'service':
                        # Check if the service is a view service
                        test_url = url.split('?')[0] if '?' in url else url
                        if self._is_wms(test_url):
                            resource['verified'] = True
                            resource['verified_date'] = datetime.now(
                            ).isoformat()
                            resource_format = 'WMS'
                    resource.update({
                        'url':
                        url,
                        'name':
                        resource_locator.get('name', ''),
                        'description':
                        resource_locator.get('description')
                        if resource_locator.get('description') else
                        'Resource locator',
                        'format':
                        resource_format or None,
                        'resource_locator_protocol':
                        resource_locator.get('protocol', ''),
                        'resource_locator_function':
                        resource_locator.get('function', '')
                    })
                    package_dict['resources'].append(resource)

            # Guess the best view service to use in WMS preview
            verified_view_resources = [
                r for r in package_dict['resources']
                if 'verified' in r and r['format'] == 'WMS'
            ]
            if len(verified_view_resources):
                verified_view_resources[0][
                    'ckan_recommended_wms_preview'] = True
            else:
                view_resources = [
                    r for r in package_dict['resources']
                    if r['format'] == 'WMS'
                ]
                if len(view_resources):
                    view_resources[0]['ckan_recommended_wms_preview'] = True

        extras_as_dict = []
        for key, value in extras.items():
            if isinstance(value, six.string_types + (Number, )):
                extras_as_dict.append({'key': key, 'value': value})
            else:
                extras_as_dict.append({'key': key, 'value': json.dumps(value)})

        package_dict['extras'] = extras_as_dict

        if package == None:
            # Create new package from data.
            package = self._create_package_from_data(package_dict)
            log.info('Created new package ID %s with GEMINI guid %s',
                     package['id'], gemini_guid)
        else:
            package = self._create_package_from_data(package_dict,
                                                     package=package)
            log.info(
                'Updated existing package ID %s with existing GEMINI guid %s',
                package['id'], gemini_guid)

        # Flag the other objects of this source as not current anymore
        from ckanext.harvest.model import harvest_object_table
        u = update(harvest_object_table) \
                .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \
                .values(current=False)
        Session.execute(u, params={'b_package_id': package['id']})
        Session.commit()

        # Refresh current object from session, otherwise the
        # import paster command fails
        Session.remove()
        Session.add(self.obj)
        Session.refresh(self.obj)

        # Set reference to package in the HarvestObject and flag it as
        # the current one
        if not self.obj.package_id:
            self.obj.package_id = package['id']

        self.obj.current = True
        self.obj.save()

        return package