Esempio n. 1
0
def get_all(db_url=None, connection_func=None):
    """
    Utility function,returning association dictionaries for all table relations

    Keyword Parameters:
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)

    Exceptions:
    ConnectionMissingArguments  -- raised if neither connection or db_url
      parameter is specified.

    >>> get_all()
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection(db_url, connection_func)
    # prepare statement, to get association metadata
    select_statement = (
        'SELECT \n'
        '   t_par.name AS parent \n'
        '  ,a.parent_table_col AS parent_column \n'
        '  ,t.name AS table \n'
        '  ,a.table_col AS column \n'
        '  ,at.type_name AS type \n'
        'FROM {schema}.{association_table} a \n'
        '  INNER JOIN {schema}.{association_type_table} at \n'
        '    ON a.table_relation_type_id = at.table_relation_type_id \n'
        '  INNER JOIN {schema}.{table_table} t \n'
        '    ON a.table_id = t.table_id \n'
        '  INNER JOIN {schema}.{table_table} t_par \n'
        '    ON a.parent_table_id = t_par.table_id \n').format(
            schema=dto_util.SCHEMA,
            association_table=TABLE,
            table_table=table.TABLE,
            association_type_table=association_type.TABLE)
    try:
        result = connection.execute(select_statement)
        associations = []
        for row in result:
            table_association = dict(zip(row.keys(), row.values()))
            validate(table_association)
            associations.append(table_association)
        return associations
    except:
        raise
    finally:
        connection.close()
Esempio n. 2
0
def get_all( db_url=None, connection_func=None):
    """
    retrive the current list of authorization from the db

    Keyword Parameters:
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)

    Exceptions:
    ConnectionMissingArgument  -- raised if neither connection or db_url parameter
      is specified.
    ValidateException -- raised when a problem is encountered validating a dto

    >>> get_all()
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection(db_url, connection_func)
    # prepare statement, to get table metadata
    select_statement = ('SELECT \n'
                        '   a.user_id \n'
                        '  ,p.name AS "project" \n'
                        '  ,t.name AS "table" \n'
                        'FROM {auth_schema}.{authorization_table} a \n'
                        '   INNER JOIN {support_schema}.{table_table} t \n'
                        '     ON a.table_id = t.table_id \n'
                        '   INNER JOIN {support_schema}.{project_table} p \n'
                        '     ON t.project_id = p.project_id \n'
                       ).format(auth_schema=SCHEMA, authorization_table=TABLE
                                ,support_schema=dto_util.SCHEMA
                                ,project_table=project.TABLE
                                ,table_table=table.TABLE)
    try:
        result = connection.execute(select_statement)
        authorizations = list()
        for row in result:
            authorization = dict(row) #make a real dict, so we can pprint() etc.
            validate(authorization)
            authorizations.append(authorization)
        return authorizations
    except:
        raise
    finally:
        connection.close()
Esempio n. 3
0
def get_all(db_url=None, connection_func=None):
    """
    retrive the current list of projects from the warehouse support schema.

    Keyword Parameters:
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)

    Exceptions:
    ConnectionMissingArgument  -- raised if neither connection or db_url parameter
      is specified.
    ValidateException -- raised when a problem is encountered validating a dto

    >>> get_all()
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection(db_url, connection_func)
    # prepare statement, to get project metadata
    select_statement = ('SELECT \n'
                        '   p.name \n'
                        '  ,p.title \n'
                        '  ,p.inport_data_set_id AS inport_id \n'
                        '  ,p.csw_uuid AS uuid \n'
                        'FROM {schema}.{table} p \n').format(
                            schema=dto_util.SCHEMA, table=TABLE)
    try:
        result = connection.execute(select_statement)
        projects = list()
        for row in result:
            project = dict(row)
            validate(project)
            projects.append(project)
        return projects
    except:
        raise
    finally:
        connection.close()
Esempio n. 4
0
def get(table_name=None, db_url=None, connection_func=None):
    """
    retrive the current list of variables from the warehouse support schema.
    (Optionally filtered to just 1x table)

    Keyword Parameters:
    table  -- String, representing the name of a Warehoused table
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)

    Exceptions:
    ConnectionMissingArgument  -- raised if neither connection or db_url parameter
      is specified.
    ValidateException -- raised when a problem is encountered validating a dto

    >>> get()
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection(db_url, connection_func)
    # prepare statement, to get variable metadata
    select_statement = (
        'SELECT \n'
        '   v.column_name AS column \n'
        '  ,v.title \n'
        '  ,v.description \n'
        '  ,t.name AS table\n'
        '  ,pt.constructor_name AS python_type \n'
        '  ,v.column_type AS physical_type \n'
        '  ,v.units \n'
        '  ,v.max_length \n'
        '  ,v.precision \n'
        '  ,v.allowed_values \n'
        'FROM {schema}.{variable_table} v \n'
        '  INNER JOIN {schema}.{python_type_table} pt \n'
        '    ON v.variable_python_type_id = pt.variable_python_type_id \n'
        '  INNER JOIN {schema}.{table_table} t \n'
        '    ON v.table_id = t.table_id \n'
        'WHERE \n'
        '  (CASE WHEN %(name)s IS NOT NULL AND t.name = %(name)s \n'
        '      THEN 1 --if name specified, return row only if name matches \n'
        '    WHEN %(name)s IS NULL \n'
        '      THEN 1 --if no name specified,return all variables(i.e.: 1=1)\n'
        '    ELSE 0 --dont return row if name specified but doesnt match row\n'
        '   END) = 1 \n').format(schema=dto_util.SCHEMA,
                                 variable_table=TABLE,
                                 table_table=table.TABLE,
                                 python_type_table=variable_python_type.TABLE)
    try:
        result = connection.execute(select_statement, name=table_name)
        variables = []
        for row in result:
            variable = dict(row)
            validate(variable)
            variables.append(variable)
        return variables
    except:
        raise
    finally:
        connection.close()
Esempio n. 5
0
def get_by_lookup(table_names, db_url=None, connection_func=None):
    """
    Utility function,returning variable dictionaries associated with named tables.

    Keyword Parameters:
    table_names  --  A collection of table names, for which the table
      variables are to be retrieved.
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)

    Exceptions:
    ConnectionMissingArguments  -- raised if neither connection or db_url
      parameter is specified.

    >>> any_list = ['any_thing']
    >>> get_by_lookup( any_list)
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection #FIXME: connection should really just be an Engine
    discover_connection = dto_util.get_connection(db_url, connection_func)
    # retrieve fields
    select_statement = (
        'SELECT \n'
        '   DISTINCT c.table_name as table \n'
        '  ,c.column_name as column \n'
        'FROM \n'
        '  information_schema.tables t \n'
        '  INNER JOIN information_schema.columns c \n'
        '    ON t.table_schema = c.table_schema \n'
        '    AND t.table_name = c.TABLE_NAME \n'
        '  INNER JOIN pg_catalog.pg_attribute a  --Identify fields used in Foreign key lookups \n'
        '''    ON a.attrelid = (t.table_schema||'."'||t.table_name||'"')::regclass \n'''
        '    AND a.attname = c.column_name \n'
        '  left outer join pg_catalog.pg_constraint con -- match table attribute # to any lookup \n'
        '''    ON con.conrelid = (t.table_schema||'."'||t.table_name||'"')::regclass \n'''
        '''    AND con.contype = 'f' -- only FOREIGN key constraints \n'''
        '    AND a.attnum = ANY(con.conkey) \n'
        '  left outer join pg_catalog.pg_constraint con_f -- match table attribute # to any lookup \n'
        '''    ON con_f.confrelid = (t.table_schema||'."'||t.table_name||'"')::regclass \n'''
        '''    AND con_f.contype = 'f' -- only FOREIGN key constraints \n'''
        '    AND a.attnum = ANY(con_f.confkey) \n'
        'WHERE \n'
        '      t.table_name = %s \n'
        '  AND con.conkey IS Null --a result with no constraint Keys arent used in a fk lookup \n'
        '  AND con_f.confkey IS Null --a result with no constraint Keys isnt used by a fk lookup \n'
    )
    try:
        # build list of variable dicts
        variables = []
        for name in table_names:
            result = discover_connection.execute(select_statement, name)
            for row in result:
                table, column = row['table'], row['column']
                try:
                    python_type = variable_python_type.get_by_lookup(
                        table, column, db_url, connection_func)
                except variable_python_type.LookupNullType as e:
                    logging.info(e, exc_info=True)
                    continue  #skip this table field
                variable = {
                    'table': table,
                    'column': column,
                    'title': None,
                    'description': None,
                    'python_type': python_type,
                    'physical_type': None,
                    'units': None,
                    'max_length': None,
                    'precision': None,
                    'allowed_values': None
                }
                validate(variable)
                variables.append(variable)
        return variables
    except:
        raise
    finally:
        discover_connection.close()
Esempio n. 6
0
def lookup_tables(table_names,
                  table_type='fact',
                  lookup_type='dimension',
                  db_url=None,
                  connection_func=None):
    """
    Utility function,returning table dictionaries associated with named tables.

    Keyword Parameters:
    table_names  --  A collection of Strings representing tables for which
      lists of associated tables are to be retrieved.
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)

    Exceptions:
    ConnectionMissingArguments  -- raised if neither connection or db_url
      parameter is specified.

    >>> any_list = ['any_thing']
    >>> lookup_tables( any_list)
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection(db_url, connection_func)
    # select table info
    select_statement = """
SELECT
   t_base.table_name as "table" --fact table name
  --,c.conkey
  ,a_base.attname as "table_field" --fact table field containing keys to be looked up
  ,t_ref.table_schema as "ref_table_schema" --schema of referenced dimension table
  ,t_ref.table_name as "ref_table"  --referenced dimension table name
  --,c.confkey
  ,a_ref.attname as "ref_table_field" --dimension column containing the keys
  ,pg_catalog.pg_get_constraintdef(c.oid, true) as condef  --pretty constraint text
FROM pg_catalog.pg_constraint c
  inner join information_schema.tables t_base
    on c.conrelid = (t_base.table_schema||'."'||t_base.table_name||'"')::regclass
  inner join pg_attribute a_base
    on c.conrelid = a_base.attrelid
    AND a_base.attnum = ANY(c.conkey)
  inner join information_schema.tables t_ref
    on c.confrelid = (t_ref.table_schema||'."'||t_ref.table_name||'"')::regclass
  inner join pg_attribute a_ref
    on c.confrelid = a_ref.attrelid
    AND a_ref.attnum = ANY(c.confkey)
WHERE c.contype = 'f' --Get only FOREIGN key constraints
  and t_base.table_name = %s
    """
    try:
        # build list of table dicts
        tables = []
        for name in table_names:
            result = connection.execute(select_statement, name)
            ref_table_encountered = []  #track each referenced table we add
            for row in result:
                ref_table = row['ref_table']
                if ref_table not in ref_table_encountered:
                    new_table = {
                        'name': ref_table,
                        'type': lookup_type,
                        'updated': None,
                        'rows': None,
                        'years': None,
                        'project': None,
                        'contact': None
                    }
                    table.validate(new_table)
                    tables.append(new_table)
                    ref_table_encountered.append(
                        ref_table)  #only build 1x dict ea
        # check for Dimensional aliases (Roles)
        table_associations = lookup_associations(
            table_names,
            db_url,
            connection_func=connection_func,
            lookup_roles=False)
        roles_tuple = dto_util.get_roles(table_associations)
        role_tables, replacement_associations, role_associations = roles_tuple
        if replacement_associations:
            # include Dimension"roles" as tables,upon detection of"role" assoc.
            tables.extend(role_tables)
        return tables
    except:
        raise
    finally:
        connection.close()
Esempio n. 7
0
def lookup_associations(table_names,
                        db_url=None,
                        connection_func=None,
                        default_type='fact dimension',
                        lookup_roles=True):
    """
    Utility function,returning association dictionaries associated with named tables.

    Keyword Parameters:
    table_names  --  A collection of table names, for which the table
      associations are to be retrieved.
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)
    default_type  -- String representing the association_type to be
      used for items found to be associated with one of the input tables
    lookup_roles  -- Boolean flag, indicating if the detected associations should
      be inspected for Dimensional aliases (Default: True)

    Exceptions:
    ConnectionMissingArguments  -- raised if neither connection or db_url
      parameter is specified.

    >>> any_list = ['any_thing']
    >>> lookup_associations( any_list)
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection(db_url, connection_func)
    # retrieve associations
    select_statement = (
        'SELECT \n'
        '   t_base.table_name as "table" --table name \n'
        '  --,c.conkey \n'
        '  ,a_base.attname as "table_field" --table field containing keys to be looked up \n'
        '  ,t_ref.table_schema as "ref_table_schema" --schema of referenced table \n'
        '  ,t_ref.table_name as "ref_table"  --referenced table name \n'
        '  --,c.confkey \n'
        '  ,a_ref.attname as "ref_table_field" --referenced table column containing the keys \n'
        '  ,pg_catalog.pg_get_constraintdef(c.oid, true) as condef  --pretty constraint text \n'
        'FROM pg_catalog.pg_constraint c \n'
        '  inner join information_schema.tables t_base \n'
        '''    on c.conrelid = (t_base.table_schema||'."'||t_base.table_name||'"')::regclass \n'''
        '  inner join pg_attribute a_base \n'
        '    on c.conrelid = a_base.attrelid \n'
        '    AND a_base.attnum = ANY(c.conkey) \n'
        '  inner join information_schema.tables t_ref \n'
        '''    on c.confrelid = (t_ref.table_schema||'."'||t_ref.table_name||'"')::regclass \n'''
        '  inner join pg_attribute a_ref \n'
        '    on c.confrelid = a_ref.attrelid \n'
        '    AND a_ref.attnum = ANY(c.confkey) \n'
        '''WHERE c.contype = 'f' --Get only FOREIGN key constraints \n'''
        '  and t_base.table_name = %s \n')
    try:
        # build list of association dicts
        associations = []
        for name in table_names:
            result = connection.execute(select_statement, name)
            for row in result:
                ref_table, ref_field = row['ref_table'], row['ref_table_field']
                table, field = row['table'], row['table_field']
                new_association = {
                    'parent': ref_table,
                    'parent_column': ref_field,
                    'table': table,
                    'column': field,
                    'type': default_type
                }
                association.validate(new_association)
                associations.append(new_association)
        if lookup_roles:  # check for Dimensional aliases (Roles)
            roles_tuple = dto_util.get_roles(associations)
            role_tables, replacement_associations, role_associations = roles_tuple
            if replacement_associations:
                # prepare a map,to replace detected assoc w/new role-aware versions
                detected_assocs_by_table_column_tuple = {}
                for detected_association in associations:
                    detected_table = detected_association['table']
                    detected_column = detected_association['column']
                    key = (detected_table, detected_column)
                    detected_assocs_by_table_column_tuple[
                        key] = detected_association
                for key in replacement_associations.keys():
                    # replace naive assoc.s with Dimension "role"-aware versions
                    replacement = replacement_associations[key]
                    detected_assocs_by_table_column_tuple[key] = replacement
                associations = list(
                    detected_assocs_by_table_column_tuple.values())
                # add additional associations,relating the detected Dimension
                #"roles" back to their base dimensions.
                associations.extend(role_associations)
        return associations
    except:
        raise
    finally:
        connection.close()
Esempio n. 8
0
def get( db_url=None, connection_func=None):
    """
    retrive the current list of tables from the warehouse support schema.

    Keyword Parameters:
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection_func  -- function returning SQLAlchemy connections
      (Optional, if provided, will override db_url)

    Exceptions:
    ConnectionMissingArgument  -- raised if neither connection or db_url parameter
      is specified.
    ValidateException -- raised when a problem is encountered validating a dto

    >>> get()
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection( db_url, connection_func)
    # prepare statement, to get table metadata
    select_statement = ('SELECT \n'
                        '   t.name \n'
                        '  ,tt.type_name as type \n'
                        '  ,t.num_rows as rows \n'
                        '  ,t.aud_beg_dtm as updated \n'
                        '  ,t.data_years as years \n'
                        '  ,pt.name as project \n'
                        '  ,ct.info as contact \n'
                        '  ,t.is_selectable as selectable \n'
                        '  ,t.inport_entity_id as inport_id \n'
                        '  ,t.inport_replacement_project_id \n'
                        '  ,t.description \n'
                        '  ,t.title \n'
                        '  ,t.csw_uuid AS uuid \n'
                        '  ,tuf.iso_maintenance_update_code AS update_frequency \n'
                        '  ,tuc.gmd_code AS restriction \n'
                        '  ,t.usage_notice \n'
                        '  ,t.keywords \n'
                        '  ,t.north_bound \n'
                        '  ,t.east_bound \n'
                        '  ,t.south_bound \n'
                        '  ,t.west_bound \n'
                        '  ,t.is_sensitive as confidential \n'
                        'FROM {schema}.{table} t \n'
                        '   INNER JOIN {schema}.{type_table} tt \n'
                        '     ON t.table_type_id = tt.table_type_id \n'
                        '   INNER JOIN {schema}.{project_table} pt \n'
                        '     ON t.project_id = pt.project_id \n'
                        '   INNER JOIN {schema}.{contact_table} ct \n'
                        '     ON t.contact_id = ct.contact_id \n'
                        '   INNER JOIN {schema}.table_update_frequency tuf \n'
                        '     ON t.table_update_frequency_id = tuf.table_update_frequency_id \n'
                        '   INNER JOIN {schema}.table_use_constraint tuc \n'
                        '     ON t.table_use_constraint_id = tuc.table_use_constraint_id \n'
                       ).format(schema=dto_util.SCHEMA, table=TABLE
                                ,contact_table=contact.TABLE
                                ,type_table=table_type.TABLE
                                ,project_table=project.TABLE)
    try:
        result = connection.execute( select_statement)
        tables = list()
        for row in result:
            table = dict(row) #make a real dict, so we can pprint() etc.
            # condense SQL spatial bounds fields into a simple string
            sql_bounds_values = (table[field] for field in sql_bounds_fields)
            table['bounds'] = '{}, {}, {}, {}'.format(*sql_bounds_values)
            for key in sql_bounds_fields: #remove the SQL fields
                del table[key]
            validate(table)
            tables.append(table)
        return tables
    except:
        raise
    finally:
        connection.close()
Esempio n. 9
0
def get_by_lookup(table_name, column_name, db_url=None, connection=None):
    """
    returns the name of the Python type constructor, that corresponds to the
     referenced table & column.

    Keyword Parameters:
    table_name  --  String representing name of the warehouse table, where
      column is located
    column_name  --  String representing name of the warehouse column who's
      Python type constructor name is to be returned.
    db_url  --  String, representing a SQLAlchemy connection (Required, if
      parameter 'connection' is not provided.
    connection  -- SQLAlchemy connection (Optional, if provided, will override
      db_url)

    Exceptions:
    ConnectionMissingArguments  -- raised if neither connection or db_url
      parameter is specified.

    >>> get_by_lookup( 'any_table', 'some_field')
    Traceback (most recent call last):
       ...
    api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument
    """
    # get db connection
    connection = dto_util.get_connection(db_url, connection)
    # fetch table metadata
    try:
        # obtain python type of column via SQLAlchemy reflection
        metadata = sqlalchemy.MetaData()  #any needed lookup Tables will be
        # listed in metadata.tables
        table = sqlalchemy.Table(
            table_name,
            metadata,
            autoload=True  # reflect columns, as defined in the db
            ,
            autoload_with=connection,
            schema=discover.SCHEMA)
        for column in table.columns:
            if column.name == column_name:
                try:
                    constructor = column.type.python_type
                except NotImplementedError as e:
                    if isinstance(column.type,
                                  sqlalchemy.sql.sqltypes.NullType):
                        msg = ("Unknown database type [maybe PostGIS?]."
                               " Table$Column: '{}${}'").format(
                                   table_name, column_name)
                        raise LookupNullType(msg)
                    raise
                # match SQLAlchemy constructor, to the one we use internally
                python_type_constructor = None
                if issubclass(constructor, int):
                    python_type_constructor = 'int'
                #FIXME: transition away from float.. Decimal is more user friendly
                if issubclass(constructor, (float, decimal.Decimal)):
                    python_type_constructor = 'float'
                if issubclass(constructor, str):
                    python_type_constructor = 'str'
                if issubclass(constructor, (datetime.datetime, datetime.time)):
                    python_type_constructor = 'datetime.datetime'
                if python_type_constructor is None:
                    msg = "Unable to map '{}' to Warehouse type".format(
                        constructor)
                    raise Exception(msg)  #TODO: refactor into custom class
                validate(python_type_constructor)
                return python_type_constructor
        else:
            msg = "No columns defined, for table '{}'".format(table_name)
            raise Exception(msg)  #TODO: refactor into custom class
    except:
        raise
    finally:
        connection.close()