Exemple #1
0
def get_sqlalchemy_col(column_name: str, column_type_name: str,
                       primary_key_col_name: str) -> Column:
    """
    Helper method that returns the sqlalchemy Column object to be used for Table def.
    """
    if column_type_name == "timestamp without time zone":
        col = Column(column_name, TIMESTAMP(timezone=False))
    elif column_type_name == "timestamp with time zone":
        col = Column(column_name, TIMESTAMP(timezone=True))
    elif column_type_name == "date":
        col = Column(column_name, Date)
    elif column_type_name == "real":
        col = Column(column_name, REAL)
    elif column_type_name == "integer":
        col = Column(column_name, Integer)
    elif column_type_name == "smallint":
        col = Column(column_name, SMALLINT)
    elif column_type_name == "text":
        col = Column(column_name, TEXT)
    elif column_type_name == "bigint":
        col = Column(column_name, BIGINT)
    elif column_type_name == "float":
        col = Column(column_name, Float)
    elif column_type_name == "boolean":
        col = Column(column_name, Boolean)
    elif column_type_name == "json":
        col = Column(column_name, JSON)
    else:
        col = Column(column_name, String)

    if column_name == primary_key_col_name:
        col.primary_key = True
    return col
    def tables_md(self, metadata=None, force=False):
        if not metadata:
            metadata = self.metadata

        if not self._md_run or force:

            for schema_table in self.table_list():
                table = Table(schema_table, metadata)
                last_f = schema_table[-4:]
                if last_f == 'fact' or schema_table in DIM_TABLES_WITHOUT_ID:
                    # Add pseudoprimary_key
                    column = Column('{}_rowid'.format(schema_table),
                                    # BIGINT,
                                    BigInteger().with_variant(sqlite.INTEGER(), 'sqlite'),
                                    primary_key=True,
                                    autoincrement=True)
                    table.append_column(column)

                col_num = 0
                for col in self.table_columns(schema_table):
                    col_num += 1
                    col_type = COLUMN_TYPE_MAPPING[col['type']]
                    if col_type == String and col.get('length'):
                        col_type = col_type(col['length'], convert_unicode=True)

                    column = Column(col['name'], col_type)

                    do_primary_key = False
                    autoincrement = True

                    comp_key = DIM_TABLES_WITH_COMPOSITE_KEY.get(schema_table, None)
                    if comp_key and col['name'] in comp_key['keys']:
                        do_primary_key = True
                        # autoincrement not compatible with composite keys
                        autoincrement = False

                    elif col_num == 1 and col['name'] == 'id':
                        do_primary_key = True
                        if schema_table == 'requests':
                            autoincrement = False

                    if col.get('dimension') and 'role' in col['dimension']:
                        if col['dimension']['role'] in self.table_list():
                            foreign_key = '{role}.{id}'.format(**col['dimension'])
                            column = Column(col['name'], col_type, ForeignKey(foreign_key), nullable=True)

                    column.primary_key = do_primary_key
                    if do_primary_key:
                        column.autoincrement = autoincrement

                    table.append_column(column)

            self.base = automap_base(metadata=metadata)
            self.base.prepare()
            self._md_run = True
Exemple #3
0
def ge_suite_to_sqla_columns(suite: str) -> dict:
    expectations = suite["expectations"]

    table_name = suite["expectation_suite_name"].split(".")[0]
    column_names = get_column_names(expectations)

    sqla_columns = []
    for column_name in column_names:
        column = Column(name=column_name)

        all_columns_type_expectations = filter(
            lambda x: x["expectation_type"] ==
            "expect_column_values_to_be_of_type", expectations)
        column_type_expectations = filter(
            lambda x: x["kwargs"]["column"] == column_name,
            all_columns_type_expectations)
        ge_type = list(column_type_expectations)[0]["kwargs"]["type_"]
        kwargs = {}

        if ge_type == "str":
            all_columns_length_expectations = filter(
                lambda x: x["expectation_type"] ==
                "expect_column_value_lengths_to_be_between", expectations)
            column_length_expectations = list(
                filter(lambda x: x["kwargs"]["column"] == column_name,
                       all_columns_length_expectations))

            if len(column_length_expectations) == 1:
                length = column_length_expectations[0]["kwargs"]["max_value"]
            else:
                length = 100

            kwargs = {"length": length}

        column.type = ge_to_sqla_types(ge_type, **kwargs)

        if column_name == "id" or column_name.endswith("_id"):
            column.primary_key = True

        if column_name.endswith(
                "_id") and column_name.split("_")[0] != table_name:
            foreign_table = column_name.split("_")[0]
            column.foreign_keys = [ForeignKey(f"{foreign_table}.id")]

        sqla_columns.append(column)

    return sqla_columns
Exemple #4
0
 def createTable(self, tableJson):
     """
     根据json数据创建数据库表
     :param tableJson: json 表数据
     :return: int, 0-成功, -1-失败
     """
     r = 0
     try:
         t = Table(tableJson['name'], self.metadata)
         import sqlalchemy
         for e in tableJson['columns']:
             type = getattr(sqlalchemy, e['type'])
             if 'length' in e and int(
                     e['length']) > 0 and 'scale' in e and int(
                         e['scale']) > 0:
                 type = type(int(e['length']), int(e['scale']))
             elif 'length' in e and int(e['length']) > 0:
                 type = type(int(e['length']))
             else:
                 type = type()
             c = Column(
                 name=e['name'],
                 type_=type,
                 nullable=e['nullable'],
                 server_default=e['default'] if 'default' in e else None)
             if 'primary_key' in e:
                 c.primary_key = e['primary_key']
             if 'autoincrement' in e:
                 c.autoincrement = 'auto' if e['autoincrement'] else False
             t.append_column(c)
         self.metadata.drop_all(self.engine)
         self.metadata.create_all(self.engine)
     except Exception as err:
         logger = logging.getLogger('rear')
         logger.error(self.__class__.__name__ + '.' +
                      sys._getframe().f_code.co_name + ': ' + str(err))
         r = -1
     return r
def create_tiger_table(shp_metadata, product, drop_existing=False):
    """shp_metadata parameter must be a fiona metadata object"""

    # handle cases where the table already exists
    schema = gv.metadata.schema
    table_name = TIGER_PRODUCT[product].lower()
    if not drop_existing:
        engine = gv.engine
        if engine.dialect.has_table(engine.connect(), table_name, schema):
            full_name = '{0}.{1}'.format(schema, table_name)
            print 'Table {} already exists, ' \
                  'using existing table...'.format(full_name)
            print 'to recreate the table use the "drop_existing" flag'

            return gv.metadata.tables[full_name]

    fiona2db = {
        'int': Integer,
        'float': Float,
        'str': Text
    }

    # it's not possible to make a distinction between polygons and
    # multipolygons within shapefiles, so we must assume geoms of
    # that type are multi's or postgis may throw an error, fiona's
    # metadata always assumes single geoms so multi is appended
    geom_type = shp_metadata['schema']['geometry'].upper()
    if geom_type == 'POLYGON':
        geom_type = 'MULTI{}'.format(geom_type)

    columns = list()
    geom_col = Column(
        name='geom',
        type_=Geometry(
            geometry_type=geom_type,
            srid=gv.epsg))
    columns.append(geom_col)

    for f_name, f_type in shp_metadata['schema']['properties'].items():
        col_name = f_name.lower()
        attr_col = Column(
            name=col_name,
            type_=fiona2db[f_type.split(':')[0]])

        # blocks have a primary key of 'GEOID10' while all others have
        # 'GEOID' as a pk, thus the slicing
        if f_name[:5] == TIGER_PK.upper():
            attr_col.primary_key = True
            pk_col = col_name

        columns.append(attr_col)

    # add a foreign key to the ACS data unless options indicate not to, blocks
    # (pk of 'geoid10') aren't in the ACS so can't have the constraint
    if gv.foreign_key and pk_col == TIGER_PK:
        meta_tables = gv.metadata.tables
        geoheaders = [meta_tables[t] for t in meta_tables if GEOHEADER in t]

        for gh in geoheaders:
            foreign_col = gh.columns[TIGER_GEOID]
            fk = ForeignKeyConstraint([pk_col], [foreign_col])
            columns.append(fk)

    table = Table(
        table_name,
        gv.metadata,
        *columns)
    table.create()

    return table
def create_geoheader():
    """"""

    geo_xls = '{yr}_SFGeoFileTemplate.xls'.format(yr=gv.acs_year)
    geo_schema = join(gv.data_dir, geo_xls)
    book = xlrd.open_workbook(geo_schema)
    sheet = book.sheet_by_index(0)

    columns = []
    blank_counter = 1
    for cx in xrange(sheet.ncols):
        # there are multiple fields called 'blank' that are reserved
        # for future use, but columns in the same table cannot have
        # the same name
        col_name = sheet.cell_value(0, cx).lower()
        if col_name == 'blank':
            col_name += str(blank_counter)
            blank_counter += 1

        cur_col = Column(
            name=col_name,
            type_=Text,
            doc=sheet.cell_value(1, cx).encode('utf8')
        )
        if cur_col.name.lower() in ACS_PRIMARY_KEY:
            cur_col.primary_key = True
        else:
            cur_col.primary = False

        columns.append(cur_col)

    # The 'geoid' field that exists within tiger shapefiles is a
    # truncated version of the full census geoid, this column will hold
    # the truncated version
    tiger_geoid = Column(
        name=TIGER_GEOID,
        type_=Text,
        doc='Truncated version of geoid used to join with '
            'to tables derived from TIGER shapefiles',
        unique=True,
        index=True
    )
    columns.append(tiger_geoid)

    tbl_name = GEOHEADER
    tbl_comment = 'Intermediary table used to join ACS and TIGER data'
    table = Table(
        tbl_name,
        gv.metadata,
        *columns,
        info=tbl_comment)

    print '\ncreating geoheader...'
    table.create()
    add_database_comments(table)

    # prep to populate tiger geoid column
    field_names = [c.name for c in columns]
    geoid_ix = field_names.index(GEOID)
    component_ix = field_names.index('component')
    sumlevel_ix = field_names.index('sumlevel')
    tiger_ix = field_names.index(TIGER_GEOID)

    # these summary levels are excluded from the tiger_geoid because
    # their values are not unique from each other, sumlevels 050 and 160
    # also conflict with these, but remain unique if these are excluded
    sumlev_exclude = [
        '320', '610', '612',
        '620', '622', '795',
        '950', '960', '970'
    ]

    # more info on summary levels here:
    # http://www2.census.gov/programs-surveys/acs/summary_file/2014/
    # documentation/tech_docs/ACS_2014_SF_5YR_Appendices.xls

    geog_dir = join(gv.data_dir, ACS_GEOGRAPHY[0].lower())
    for st in gv.states:
        geo_csv = 'g{yr}{span}{state}.csv'.format(
            yr=gv.acs_year, span=gv.span, state=st.lower()
        )
        with open(join(geog_dir, geo_csv)) as geo_data:
            reader = csv.reader(geo_data)
            for row in reader:
                # a component value of '00' means total population, all
                # other values are subsets of the population
                tiger = None
                comp, sumlev = row[component_ix], row[sumlevel_ix]
                if comp == '00' and sumlev not in sumlev_exclude:
                    tiger = (re.match('\w*US(\w*)', row[geoid_ix]).group(1))

                row.insert(tiger_ix, tiger)

                # null values come in from the csv as empty strings
                # this converts them such that they will be NULL in
                # the database
                null_row = [None if v == '' else v for v in row]
                table.insert(null_row).execute()
def create_tiger_table(shp_metadata, product, drop_existing=False):
    """metadata parameter must be a fiona metadata object"""

    # handle cases where the table already exists
    schema = ops.metadata.schema
    table_name = TIGER_PRODUCT[product].lower()
    if not drop_existing:
        engine = ops.engine
        if engine.dialect.has_table(engine.connect(), table_name, schema):
            full_name = '{0}.{1}'.format(schema, table_name)
            print 'Table {} already exists, ' \
                  'using existing table...'.format(full_name)
            print 'to recreate the table use the "drop_existing" flag'

            return ops.metadata.tables[full_name]

    fiona2db = {
        'int': Integer,
        'float': Float,
        'str': Text
    }

    # it's not possible to make a distinction between polygons and
    # multipolygons within shapefiles, so we must assume geoms of
    # that type are multi's or postgis may throw an error, fiona's
    # metadata always assumes single geoms so multi is appended
    geom_type = shp_metadata['schema']['geometry'].upper()
    if geom_type == 'POLYGON':
        geom_type = 'MULTI{}'.format(geom_type)

    columns = list()
    geom_col = Column(
        name='geom',
        type_=Geometry(
            geometry_type=geom_type,
            srid=int(shp_metadata['crs']['init'].split(':')[1])))
    columns.append(geom_col)

    for f_name, f_type in shp_metadata['schema']['properties'].items():
        col_name = f_name.lower()
        attr_col = Column(
            name=col_name,
            type_=fiona2db[f_type.split(':')[0]])

        if f_name in TIGER_PRIMARY_KEY:
            attr_col.primary_key = True
            pk_col = col_name

        columns.append(attr_col)

    # add a foreign key to the ACS data unless options indicate not to, blocks
    # (pk of 'GEOID10') aren't in the ACS so can't have the constraint
    if ops.foreign_key and pk_col == TIGER_PRIMARY_KEY[0]:
        meta_tables = ops.metadata.tables
        geoheaders = [meta_tables[t] for t in meta_tables if GEOHEADER in t]
        for gh in geoheaders:
            foreign_col = gh.columns[TIGER_GEOID]
            fk = ForeignKeyConstraint([pk_col], [foreign_col])
            columns.append(fk)

    table = Table(
        table_name,
        ops.metadata,
        *columns)
    table.create()

    return table