def push(self, engine=None, schema=None): """Check data for sql table rules """ if not self.data.index.is_unique: raise AttributeError( f'Table({self.name}) data index must have unique values') if not self.data.columns.is_unique: raise AttributeError( f'Table({self.name}) data columns must have unique values') if engine is not None: self.engine = engine if schema is not None: self.schema = schema if self.name in self.engine.table_names(schema=self.schema): # check if sql table has primary key if primary_key(self.name, self.engine, self.schema) is None: if self.data.index.name is None: if self.key is None: if 'id' in self.data.columns: self.data.set_index('id', inplace=True) self.data.index.name = 'id' self.key = 'id' else: self.data.index.name = self.key else: if self.key is None: self.key = self.data.index.name else: self.index.name = self.key # Without a primary key, we cannot do anything efficiently # Current solution is to completely replace old table to_sql_k(self.data, self.name, self.engine, index=True, if_exists='replace', keys=self.key, schema=self.schema) else: update_sql_with_df(self.data, self.name, self.engine, self.schema) else: self.key = self.data.index.name if self.key is None: to_sql_k(self.data, self.name, engine, keys='id', schema=self.schema) else: to_sql_k(self.data, self.name, engine, keys=self.key, schema=self.schema) self.__init__(self.name, engine=self.engine, schema=self.schema)
def __repr__(self): if self.lazy: names = self.table_names cols = [ ', '.join(get_col_names(name, self.engine)) for name in names ] keys = [primary_key(name, self.engine) for name in names] tables = [ f"Table(name={name}, cols=[{c_names}], key={key})\n" for name, c_names, key in zip(names, cols, keys) ] return f'DataBase({" , ".join(tables)}, lazy=True, url={self.engine.url})' return f'DataBase({", ".join(repr(tbl) for tbl in self.db.values())}, url={self.engine.url})'
def __init__(self, name, data=None, key=None, f_keys=[], types=dict(), engine=None, db=None, schema=None): self.name = name self.key = key self.f_keys = f_keys self.types = types self.engine = engine self.data = data self.db = db self.schema = schema if isinstance(self.data, Engine): self.engine = self.data self.data = None if isinstance(self.engine, Engine): # If engine provided and no key: set key to existing table key if self.key is None: if self.name in self.engine.table_names(self.schema): self.key = primary_key(self.name, self.engine, self.schema) else: pass # # If engine and data provided: if self.data is not None: pass # table probably doesn't already exist? else: # pull data down from table self.data = from_sql_keyindex(self.name, self.engine, self.schema) # If no engine provided but data is: elif self.data is not None: if isinstance(self.data, dict): self.data = DataFrame(data) if isinstance(self.data, DataFrame): self.key = self.data.index.name else: raise TypeError('data can only be DataFrame or dict')
def update_sql_with_df(df, name, engine, schema=None, index_is_key=True, key=None): """Drops all rows then push DataFrame to add data back Creates any new columns and deletes any missing columns """ df = df.copy() if index_is_key: key = df.index.name if key is None: key = 'id' df[key] = df.index with engine.begin() as conn: start_key = primary_key(name, conn, schema=schema) start_key_deleted = False tbl = get_table(name, conn, schema=schema) # Delete data, leave table columns conn.execute(tbl.delete(None)) # get old column names old_names = set(tbl.columns.keys()) # get new column names new_names = set(df.columns) # Add any new columns new_to_add = new_names - old_names for col_name in new_to_add: add_column(get_table(name, conn, schema=schema), col_name, get_type(df, col_name)) # Delete any missing columns old_to_delete = old_names - new_names if len(old_to_delete) > 0: for col_name in old_to_delete: if col_name == start_key: start_key_deleted = True delete_column(get_table(name, conn, schema=schema), col_name) if not has_primary_key(name, conn, schema=schema) or start_key_deleted: add_primary_key(name, conn, key, schema=None) df.to_sql(name, conn, index=False, if_exists='append', schema=schema)