def get_sqlalchemy_col(column_name: str, column_type_name: str, primary_key_col_name: str) -> Column: """ Helper method that returns the sqlalchemy Column object to be used for Table def. """ if column_type_name == "timestamp without time zone": col = Column(column_name, TIMESTAMP(timezone=False)) elif column_type_name == "timestamp with time zone": col = Column(column_name, TIMESTAMP(timezone=True)) elif column_type_name == "date": col = Column(column_name, Date) elif column_type_name == "real": col = Column(column_name, REAL) elif column_type_name == "integer": col = Column(column_name, Integer) elif column_type_name == "smallint": col = Column(column_name, SMALLINT) elif column_type_name == "text": col = Column(column_name, TEXT) elif column_type_name == "bigint": col = Column(column_name, BIGINT) elif column_type_name == "float": col = Column(column_name, Float) elif column_type_name == "boolean": col = Column(column_name, Boolean) elif column_type_name == "json": col = Column(column_name, JSON) else: col = Column(column_name, String) if column_name == primary_key_col_name: col.primary_key = True return col
def tables_md(self, metadata=None, force=False): if not metadata: metadata = self.metadata if not self._md_run or force: for schema_table in self.table_list(): table = Table(schema_table, metadata) last_f = schema_table[-4:] if last_f == 'fact' or schema_table in DIM_TABLES_WITHOUT_ID: # Add pseudoprimary_key column = Column('{}_rowid'.format(schema_table), # BIGINT, BigInteger().with_variant(sqlite.INTEGER(), 'sqlite'), primary_key=True, autoincrement=True) table.append_column(column) col_num = 0 for col in self.table_columns(schema_table): col_num += 1 col_type = COLUMN_TYPE_MAPPING[col['type']] if col_type == String and col.get('length'): col_type = col_type(col['length'], convert_unicode=True) column = Column(col['name'], col_type) do_primary_key = False autoincrement = True comp_key = DIM_TABLES_WITH_COMPOSITE_KEY.get(schema_table, None) if comp_key and col['name'] in comp_key['keys']: do_primary_key = True # autoincrement not compatible with composite keys autoincrement = False elif col_num == 1 and col['name'] == 'id': do_primary_key = True if schema_table == 'requests': autoincrement = False if col.get('dimension') and 'role' in col['dimension']: if col['dimension']['role'] in self.table_list(): foreign_key = '{role}.{id}'.format(**col['dimension']) column = Column(col['name'], col_type, ForeignKey(foreign_key), nullable=True) column.primary_key = do_primary_key if do_primary_key: column.autoincrement = autoincrement table.append_column(column) self.base = automap_base(metadata=metadata) self.base.prepare() self._md_run = True
def ge_suite_to_sqla_columns(suite: str) -> dict: expectations = suite["expectations"] table_name = suite["expectation_suite_name"].split(".")[0] column_names = get_column_names(expectations) sqla_columns = [] for column_name in column_names: column = Column(name=column_name) all_columns_type_expectations = filter( lambda x: x["expectation_type"] == "expect_column_values_to_be_of_type", expectations) column_type_expectations = filter( lambda x: x["kwargs"]["column"] == column_name, all_columns_type_expectations) ge_type = list(column_type_expectations)[0]["kwargs"]["type_"] kwargs = {} if ge_type == "str": all_columns_length_expectations = filter( lambda x: x["expectation_type"] == "expect_column_value_lengths_to_be_between", expectations) column_length_expectations = list( filter(lambda x: x["kwargs"]["column"] == column_name, all_columns_length_expectations)) if len(column_length_expectations) == 1: length = column_length_expectations[0]["kwargs"]["max_value"] else: length = 100 kwargs = {"length": length} column.type = ge_to_sqla_types(ge_type, **kwargs) if column_name == "id" or column_name.endswith("_id"): column.primary_key = True if column_name.endswith( "_id") and column_name.split("_")[0] != table_name: foreign_table = column_name.split("_")[0] column.foreign_keys = [ForeignKey(f"{foreign_table}.id")] sqla_columns.append(column) return sqla_columns
def createTable(self, tableJson): """ 根据json数据创建数据库表 :param tableJson: json 表数据 :return: int, 0-成功, -1-失败 """ r = 0 try: t = Table(tableJson['name'], self.metadata) import sqlalchemy for e in tableJson['columns']: type = getattr(sqlalchemy, e['type']) if 'length' in e and int( e['length']) > 0 and 'scale' in e and int( e['scale']) > 0: type = type(int(e['length']), int(e['scale'])) elif 'length' in e and int(e['length']) > 0: type = type(int(e['length'])) else: type = type() c = Column( name=e['name'], type_=type, nullable=e['nullable'], server_default=e['default'] if 'default' in e else None) if 'primary_key' in e: c.primary_key = e['primary_key'] if 'autoincrement' in e: c.autoincrement = 'auto' if e['autoincrement'] else False t.append_column(c) self.metadata.drop_all(self.engine) self.metadata.create_all(self.engine) except Exception as err: logger = logging.getLogger('rear') logger.error(self.__class__.__name__ + '.' + sys._getframe().f_code.co_name + ': ' + str(err)) r = -1 return r
def create_tiger_table(shp_metadata, product, drop_existing=False): """shp_metadata parameter must be a fiona metadata object""" # handle cases where the table already exists schema = gv.metadata.schema table_name = TIGER_PRODUCT[product].lower() if not drop_existing: engine = gv.engine if engine.dialect.has_table(engine.connect(), table_name, schema): full_name = '{0}.{1}'.format(schema, table_name) print 'Table {} already exists, ' \ 'using existing table...'.format(full_name) print 'to recreate the table use the "drop_existing" flag' return gv.metadata.tables[full_name] fiona2db = { 'int': Integer, 'float': Float, 'str': Text } # it's not possible to make a distinction between polygons and # multipolygons within shapefiles, so we must assume geoms of # that type are multi's or postgis may throw an error, fiona's # metadata always assumes single geoms so multi is appended geom_type = shp_metadata['schema']['geometry'].upper() if geom_type == 'POLYGON': geom_type = 'MULTI{}'.format(geom_type) columns = list() geom_col = Column( name='geom', type_=Geometry( geometry_type=geom_type, srid=gv.epsg)) columns.append(geom_col) for f_name, f_type in shp_metadata['schema']['properties'].items(): col_name = f_name.lower() attr_col = Column( name=col_name, type_=fiona2db[f_type.split(':')[0]]) # blocks have a primary key of 'GEOID10' while all others have # 'GEOID' as a pk, thus the slicing if f_name[:5] == TIGER_PK.upper(): attr_col.primary_key = True pk_col = col_name columns.append(attr_col) # add a foreign key to the ACS data unless options indicate not to, blocks # (pk of 'geoid10') aren't in the ACS so can't have the constraint if gv.foreign_key and pk_col == TIGER_PK: meta_tables = gv.metadata.tables geoheaders = [meta_tables[t] for t in meta_tables if GEOHEADER in t] for gh in geoheaders: foreign_col = gh.columns[TIGER_GEOID] fk = ForeignKeyConstraint([pk_col], [foreign_col]) columns.append(fk) table = Table( table_name, gv.metadata, *columns) table.create() return table
def create_geoheader(): """""" geo_xls = '{yr}_SFGeoFileTemplate.xls'.format(yr=gv.acs_year) geo_schema = join(gv.data_dir, geo_xls) book = xlrd.open_workbook(geo_schema) sheet = book.sheet_by_index(0) columns = [] blank_counter = 1 for cx in xrange(sheet.ncols): # there are multiple fields called 'blank' that are reserved # for future use, but columns in the same table cannot have # the same name col_name = sheet.cell_value(0, cx).lower() if col_name == 'blank': col_name += str(blank_counter) blank_counter += 1 cur_col = Column( name=col_name, type_=Text, doc=sheet.cell_value(1, cx).encode('utf8') ) if cur_col.name.lower() in ACS_PRIMARY_KEY: cur_col.primary_key = True else: cur_col.primary = False columns.append(cur_col) # The 'geoid' field that exists within tiger shapefiles is a # truncated version of the full census geoid, this column will hold # the truncated version tiger_geoid = Column( name=TIGER_GEOID, type_=Text, doc='Truncated version of geoid used to join with ' 'to tables derived from TIGER shapefiles', unique=True, index=True ) columns.append(tiger_geoid) tbl_name = GEOHEADER tbl_comment = 'Intermediary table used to join ACS and TIGER data' table = Table( tbl_name, gv.metadata, *columns, info=tbl_comment) print '\ncreating geoheader...' table.create() add_database_comments(table) # prep to populate tiger geoid column field_names = [c.name for c in columns] geoid_ix = field_names.index(GEOID) component_ix = field_names.index('component') sumlevel_ix = field_names.index('sumlevel') tiger_ix = field_names.index(TIGER_GEOID) # these summary levels are excluded from the tiger_geoid because # their values are not unique from each other, sumlevels 050 and 160 # also conflict with these, but remain unique if these are excluded sumlev_exclude = [ '320', '610', '612', '620', '622', '795', '950', '960', '970' ] # more info on summary levels here: # http://www2.census.gov/programs-surveys/acs/summary_file/2014/ # documentation/tech_docs/ACS_2014_SF_5YR_Appendices.xls geog_dir = join(gv.data_dir, ACS_GEOGRAPHY[0].lower()) for st in gv.states: geo_csv = 'g{yr}{span}{state}.csv'.format( yr=gv.acs_year, span=gv.span, state=st.lower() ) with open(join(geog_dir, geo_csv)) as geo_data: reader = csv.reader(geo_data) for row in reader: # a component value of '00' means total population, all # other values are subsets of the population tiger = None comp, sumlev = row[component_ix], row[sumlevel_ix] if comp == '00' and sumlev not in sumlev_exclude: tiger = (re.match('\w*US(\w*)', row[geoid_ix]).group(1)) row.insert(tiger_ix, tiger) # null values come in from the csv as empty strings # this converts them such that they will be NULL in # the database null_row = [None if v == '' else v for v in row] table.insert(null_row).execute()
def create_tiger_table(shp_metadata, product, drop_existing=False): """metadata parameter must be a fiona metadata object""" # handle cases where the table already exists schema = ops.metadata.schema table_name = TIGER_PRODUCT[product].lower() if not drop_existing: engine = ops.engine if engine.dialect.has_table(engine.connect(), table_name, schema): full_name = '{0}.{1}'.format(schema, table_name) print 'Table {} already exists, ' \ 'using existing table...'.format(full_name) print 'to recreate the table use the "drop_existing" flag' return ops.metadata.tables[full_name] fiona2db = { 'int': Integer, 'float': Float, 'str': Text } # it's not possible to make a distinction between polygons and # multipolygons within shapefiles, so we must assume geoms of # that type are multi's or postgis may throw an error, fiona's # metadata always assumes single geoms so multi is appended geom_type = shp_metadata['schema']['geometry'].upper() if geom_type == 'POLYGON': geom_type = 'MULTI{}'.format(geom_type) columns = list() geom_col = Column( name='geom', type_=Geometry( geometry_type=geom_type, srid=int(shp_metadata['crs']['init'].split(':')[1]))) columns.append(geom_col) for f_name, f_type in shp_metadata['schema']['properties'].items(): col_name = f_name.lower() attr_col = Column( name=col_name, type_=fiona2db[f_type.split(':')[0]]) if f_name in TIGER_PRIMARY_KEY: attr_col.primary_key = True pk_col = col_name columns.append(attr_col) # add a foreign key to the ACS data unless options indicate not to, blocks # (pk of 'GEOID10') aren't in the ACS so can't have the constraint if ops.foreign_key and pk_col == TIGER_PRIMARY_KEY[0]: meta_tables = ops.metadata.tables geoheaders = [meta_tables[t] for t in meta_tables if GEOHEADER in t] for gh in geoheaders: foreign_col = gh.columns[TIGER_GEOID] fk = ForeignKeyConstraint([pk_col], [foreign_col]) columns.append(fk) table = Table( table_name, ops.metadata, *columns) table.create() return table