예제 #1
0
def diff_df(df1: pd.DataFrame, df2: pd.DataFrame, keys: List[Col],
            ignores: List[Col]) -> Tuple[DiffDict, Status]:
    """Find diffs as a DiffDict of changes from df1 to df2."""
    dd: DiffDict = {'adds': None, 'mods': [], 'retires': None}

    dim_status = compare_dims(df1, df2, True, False)
    if dim_status != OK():
        return dd, dim_status

    df1_, df2_, retire_df, new_df = symm_diff_df(df1, df2, keys)
    mods, dim_status = find_mods(df1_, df2_, keys, ignores)
    if dim_status != OK():
        return dd, dim_status

    return {'adds': new_df, 'mods': mods, 'retires': retire_df}, OK()
예제 #2
0
def gen_pk_uniq(pk: List[Col], uniq: List[Col]) -> Tuple[str, Status]:
    """Generate Primary Key or Unique substring."""
    s_pk = '' if not pk else 'PRIMARY KEY({})'.format(', '.join(pk))
    s_uniq = '' if not uniq else 'UNIQUE({})'.format(', '.join(uniq))
    s = ('' if not s_pk and not s_uniq else f'{s_pk}\n' if s_pk and not s_uniq
         else f'{s_uniq}\n' if s_uniq and not s_pk else f'{s_pk},\n{s_uniq}\n')
    return s, OK()
예제 #3
0
def query_df(cur: Cursor, q: str) -> Tuple[pd.DataFrame, Status]:
    """Execute SQL query string and return result as DataFrame."""
    rows, status = query(cur, q, True)
    if status != OK() or not rows:
        logger.error(f'Query {q} returned nvalid status or no data')
        return pd.DataFrame(), status
    df = pd.DataFrame(rows[1:], columns=rows[0])
    return df, status
예제 #4
0
def gen_cols(cols: List[SchemaCol]) -> Tuple[str, Status]:
    """Generate columns substring."""
    status: Status
    if not cols: return '', OK()

    s, status = '', OK()
    for col in cols:
        name, dtype, pk, uniq, not_null = col
        if pk and uniq:
            s, status = '', Error(f'Col {name} specified as both PK and Uniq')
            break
        dtype_ = dtype_to_str(dtype)
        s += f'{name} {dtype_}'
        s += ' PRIMARY KEY' if pk else ' UNIQUE' if uniq else ''
        s += ' NOT NULL,\n' if not_null else ',\n'

    return s, status
예제 #5
0
def gen_create_stmt(td: TableDef) -> Tuple[str, Status]:
    """Translate TableDef into Create Table statement."""
    status: Status
    create, s1 = gen_create(td['if_not_exists'], td['name'])
    cols, s2 = gen_cols(td['cols'])
    fks, s3 = gen_fks(td['fks'])
    pk_uniq, s4 = gen_pk_uniq(td['pk'], td['uniq'])

    if all(s == OK() for s in (s1, s2, s3, s4)):
        spec = strip_comma(f'{cols}{fks}{pk_uniq}')
        stmt = f'{create}({spec});'
        status = OK()
    else:
        stmt = ''
        msg = ' | '.join(s.msg for s in (s1, s2, s3, s4) if s != OK())
        status = Error(msg)

    return stmt, status
예제 #6
0
def create(cur: Cursor, stmt: str) -> Status:
    """Create table."""
    status: Status
    try:
        cur.execute(stmt)
        status = OK()
        logger.info(f'Create statement executed: {stmt}')
    except Exception as e:
        logger.error(f'Create statement exception: {stmt}; {e}')
        status = Error(str(e))
    return status
예제 #7
0
def validate_insert(cur: Cursor, table: str, rows: Rows,
                    schema_cast: bool) -> Tuple[SqliteSchema, RowsPair]:
    """Validate insertion cols and optionally try casting to schema dtypes."""
    q = f'SELECT sql FROM sqlite_master WHERE type="table" and name="{table}"'
    ret, status = query(cur, q)
    if not ret or status != OK():
        msg = f'Schema validation query {q} failed'
        logger.error(msg)
        return [], ([], Error(msg))

    schema = parse_schema(ret[0][0])

    if len(schema) != len(rows[0]):
        msg = f'Insertion validation error: {table} has '
        msg += f'{len(schema)} cols vs input {len(rows[0])} cols'
        logger.error(msg)
        return [], ([], Error(msg))

    return (schema, apply_schema(schema, rows) if schema_cast else
            (rows, OK()))
예제 #8
0
 def __connect__(self) -> None:
     """Establish DB connection."""
     if self.db_type is DB_Type.SQLITE:
         self.conn = sqlite3.connect(self.db_name)
         self.cur = self.conn.cursor()
         self.status = OK()
         self.conn.execute('PRAGMA foreign_keys = 1')
         logger.info('Connected to Sqlite DB: {}'.format(self.db_name))
     else:
         self.status = self.INVALID_STATUS
         logger.error('DB conn failed: {}'.format(self.INVALID_STATUS.msg))
예제 #9
0
def close(conn) -> Status:
    """Close DB connection object."""
    status: Status
    try:
        conn.close()
        status = OK()
        logger.info('Closed DB conn.')
    except Exception as e:
        status = Error(str(e))
        logger.error('Failed to close DB conn: {}'.format(str(e)))
    return status
예제 #10
0
def compare_dims(df1: pd.DataFrame,
                 df2: pd.DataFrame,
                 cols: bool = True,
                 rows: bool = False) -> Status:
    """Compare DF dimensions for compatibility."""
    status: Status
    cols1, cols2 = len(df1.columns), len(df2.columns)
    rows1, rows2 = len(df1), len(df2)

    if not cols and not rows:
        status = OK()
    elif cols and not rows:
        status = (OK() if cols1 == cols2 else
                  Error(f'Cols mismatch: {cols1} vs {cols2}'))
    elif rows and not cols:
        status = (OK() if rows1 == rows2 else
                  Error(f'Rows mismatch: {rows1} vs {rows2}'))
    else:
        status = (OK() if rows1 == rows2 and cols1 == cols2 else
                  Error('Matrix mismatch: ' +
                        f'{rows1} x {cols1} vs {rows2} x {cols2}'))
    return status
예제 #11
0
def find_mods(df1: pd.DataFrame,
              df2: pd.DataFrame,
              keys: List[Col],
              ignores: List[Col] = []) -> Tuple[List[Mod], Status]:
    """Find all changes as Mods from df1 to df2."""
    diff_cols = [
        col for col in df1.columns if col not in keys and col not in ignores
    ]
    dim_status = compare_dims(df1, df2, True, True)
    if dim_status != OK():
        return [], dim_status

    f = lambda x: x.sort_values(by=keys).reset_index(drop=True)
    df1_, df2_ = f(df1), f(df2)

    mask = (df1_ != df2_).any(axis=1)
    df = df1_[mask].merge(df2_[mask], on=keys, suffixes=['_old', '_new'])
    pairs = [
        gen_mod(t._asdict(), keys, diff_cols)
        for t in df.itertuples(index=False)
    ]

    return [pair for pair in pairs if pair[1]], OK()
예제 #12
0
def insert(conn: Conn,
           cur: Cursor,
           table: str,
           rows: Rows,
           schema_cast: bool = True) -> Status:
    """Attempt to execute SQL insertion into specified table."""
    status: Status
    schema, (rows_, v) = validate_insert(cur, table, rows, schema_cast)
    if v != OK(): return v

    try:
        cols = ','.join(name for name, _ in schema)
        vals = ','.join('?' * len(schema))
        i = f'INSERT INTO {table}({cols}) VALUES ({vals})'
        cur.executemany(i, rows_)
        conn.commit()
        status = OK()
        logger.info('Insertion to {} executed: {}'.format(table, i))
    except Exception as e:
        status = Error(str(e))
        logger.error('Insertion exception for {}: {}'.format(i, str(e)))

    return status
예제 #13
0
def apply_schema(schema: SqliteSchema, rows: Rows) -> RowsPair:
    """Attempt to cast rows to primitive types in schema."""
    status: Status = OK()

    try:
        rows_ = []
        for row in rows:
            row_ = [cast(elem) for elem, (_, cast) in zip(row, schema)]
            rows_.append(row_)
    except Exception as e:
        msg = f'Insertion validation error: exception while casting {row}: {e}'
        status = Error(msg)
        logger.error(msg)

    return rows_, status
예제 #14
0
def query(cur: Cursor, q: str, hdr: bool = False) -> RowsPair:
    """Execute SQL query string."""
    status: Status
    try:
        result = cur.execute(q)
        if hdr:
            cols = [d[0] for d in result.description]
            rows = [cols] + [list(row) for row in result.fetchall()]
        else:
            rows = [list(row) for row in result.fetchall()]
        status = OK()
        logger.info(f'Query executed: {q}')
    except Exception as e:
        logger.error(f'Query exception: {q}; {e}')
        rows, status = [], Error(str(e))
    return rows, status
예제 #15
0
def gen_fks(fks: List[SchemaForeignKey]) -> Tuple[str, Status]:
    """Generate Foreign Keys substring."""
    status: Status
    s, status = '', OK()
    for fk in fks:
        fk_cols, fk_ref_cols = fk['cols'], fk['ref_cols']
        if len(fk_cols) != len(fk_ref_cols):
            s, status = '', Error(
                f'Length mismatch {fk_cols} vs {fk_ref_cols}')
            break

        cols, ref_cols = ', '.join(fk_cols), ', '.join(fk_ref_cols)
        ref_table = fk['ref_table']
        s += f'FOREIGN KEY({cols}) REFERENCES {ref_table}({ref_cols}),\n'

    return f'{s}', status
예제 #16
0
def gen_create(if_not_exists: bool, name: str) -> Tuple[str, Status]:
    """Generate Create substring."""
    s = (f'CREATE TABLE {name}'
         if if_not_exists is False else f'CREATE TABLE IF NOT EXISTS {name}')
    return s, OK()