Ejemplo n.º 1
0
    def push(self, engine=None, schema=None):
        """Check data for sql table rules
        """
        if not self.data.index.is_unique:
            raise AttributeError(
                f'Table({self.name}) data index must have unique values')
        if not self.data.columns.is_unique:
            raise AttributeError(
                f'Table({self.name}) data columns must have unique values')

        if engine is not None:
            self.engine = engine

        if schema is not None:
            self.schema = schema

        if self.name in self.engine.table_names(schema=self.schema):
            # check if sql table has primary key
            if primary_key(self.name, self.engine, self.schema) is None:
                if self.data.index.name is None:
                    if self.key is None:
                        if 'id' in self.data.columns:
                            self.data.set_index('id', inplace=True)
                        self.data.index.name = 'id'
                        self.key = 'id'
                    else:
                        self.data.index.name = self.key
                else:
                    if self.key is None:
                        self.key = self.data.index.name
                    else:
                        self.index.name = self.key
                # Without a primary key, we cannot do anything efficiently
                # Current solution is to completely replace old table
                to_sql_k(self.data,
                         self.name,
                         self.engine,
                         index=True,
                         if_exists='replace',
                         keys=self.key,
                         schema=self.schema)
            else:
                update_sql_with_df(self.data, self.name, self.engine,
                                   self.schema)
        else:
            self.key = self.data.index.name
            if self.key is None:
                to_sql_k(self.data,
                         self.name,
                         engine,
                         keys='id',
                         schema=self.schema)
            else:
                to_sql_k(self.data,
                         self.name,
                         engine,
                         keys=self.key,
                         schema=self.schema)

        self.__init__(self.name, engine=self.engine, schema=self.schema)
Ejemplo n.º 2
0
 def __repr__(self):
     if self.lazy:
         names = self.table_names
         cols = [
             ', '.join(get_col_names(name, self.engine)) for name in names
         ]
         keys = [primary_key(name, self.engine) for name in names]
         tables = [
             f"Table(name={name}, cols=[{c_names}], key={key})\n"
             for name, c_names, key in zip(names, cols, keys)
         ]
         return f'DataBase({"       , ".join(tables)}, lazy=True, url={self.engine.url})'
     return f'DataBase({", ".join(repr(tbl) for tbl in self.db.values())}, url={self.engine.url})'
Ejemplo n.º 3
0
    def __init__(self,
                 name,
                 data=None,
                 key=None,
                 f_keys=[],
                 types=dict(),
                 engine=None,
                 db=None,
                 schema=None):
        self.name = name
        self.key = key
        self.f_keys = f_keys
        self.types = types
        self.engine = engine
        self.data = data
        self.db = db
        self.schema = schema

        if isinstance(self.data, Engine):
            self.engine = self.data
            self.data = None

        if isinstance(self.engine, Engine):
            # If engine provided and no key: set key to existing table key
            if self.key is None:
                if self.name in self.engine.table_names(self.schema):
                    self.key = primary_key(self.name, self.engine, self.schema)
            else:
                pass  #
            # If engine and data provided:
            if self.data is not None:
                pass  # table probably doesn't already exist?
            else:
                # pull data down from table
                self.data = from_sql_keyindex(self.name, self.engine,
                                              self.schema)
        # If no engine provided but data is:
        elif self.data is not None:

            if isinstance(self.data, dict):
                self.data = DataFrame(data)

            if isinstance(self.data, DataFrame):
                self.key = self.data.index.name
            else:
                raise TypeError('data can only be DataFrame or dict')
Ejemplo n.º 4
0
def update_sql_with_df(df, name, engine, schema=None, index_is_key=True, key=None):
    """Drops all rows then push DataFrame to add data back
       Creates any new columns and deletes any missing columns
    """
    df = df.copy()

    if index_is_key:
        key = df.index.name
        if key is None:
            key = 'id'
        df[key] = df.index

    with engine.begin() as conn:
        start_key = primary_key(name, conn, schema=schema)
        start_key_deleted = False
        tbl = get_table(name, conn, schema=schema)
        # Delete data, leave table columns
        conn.execute(tbl.delete(None))
        # get old column names
        old_names = set(tbl.columns.keys())
        # get new column names
        new_names = set(df.columns)
        # Add any new columns
        new_to_add = new_names - old_names
        for col_name in new_to_add:
            add_column(get_table(name, conn, schema=schema),
                       col_name, get_type(df, col_name))

        # Delete any missing columns
        old_to_delete = old_names - new_names
        if len(old_to_delete) > 0:
            for col_name in old_to_delete:
                if col_name == start_key:
                    start_key_deleted = True
                delete_column(get_table(name, conn, schema=schema), col_name)
        
        if not has_primary_key(name, conn, schema=schema) or start_key_deleted:
            add_primary_key(name, conn, key, schema=None)

        df.to_sql(name, conn, index=False, if_exists='append', schema=schema)