def _does_table_exist(cursor: redshift_connector.Cursor, schema: Optional[str], table: str) -> bool: schema_str = f"TABLE_SCHEMA = '{schema}' AND" if schema else "" cursor.execute(f"SELECT true WHERE EXISTS (" f"SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE " f"{schema_str} TABLE_NAME = '{table}'" f");") return len(cursor.fetchall()) > 0
def _get_primary_keys(cursor: redshift_connector.Cursor, schema: str, table: str) -> List[str]: cursor.execute( f"SELECT indexdef FROM pg_indexes WHERE schemaname = '{schema}' AND tablename = '{table}'" ) result: str = cursor.fetchall()[0][0] rfields: List[str] = result.split("(")[1].strip(")").split(",") fields: List[str] = [field.strip().strip('"') for field in rfields] return fields
def _copy( cursor: redshift_connector.Cursor, path: str, table: str, iam_role: str, schema: Optional[str] = None, ) -> None: if schema is None: table_name: str = table else: table_name = f"{schema}.{table}" sql: str = f"COPY {table_name} FROM '{path}'\nIAM_ROLE '{iam_role}'\nFORMAT AS PARQUET" _logger.debug("copy query:\n%s", sql) cursor.execute(sql)
def _upsert( cursor: redshift_connector.Cursor, table: str, temp_table: str, schema: str, primary_keys: Optional[List[str]] = None, ) -> None: if not primary_keys: primary_keys = _get_primary_keys(cursor=cursor, schema=schema, table=table) _logger.debug("primary_keys: %s", primary_keys) if not primary_keys: raise exceptions.InvalidRedshiftPrimaryKeys() equals_clause: str = f"{table}.%s = {temp_table}.%s" join_clause: str = " AND ".join( [equals_clause % (pk, pk) for pk in primary_keys]) sql: str = f"DELETE FROM {schema}.{table} USING {temp_table} WHERE {join_clause}" _logger.debug(sql) cursor.execute(sql) sql = f"INSERT INTO {schema}.{table} SELECT * FROM {temp_table}" _logger.debug(sql) cursor.execute(sql) _drop_table(cursor=cursor, schema=schema, table=temp_table)
def _drop_table(cursor: redshift_connector.Cursor, schema: Optional[str], table: str) -> None: schema_str = f"{schema}." if schema else "" sql = f"DROP TABLE IF EXISTS {schema_str}{table}" _logger.debug("Drop table query:\n%s", sql) cursor.execute(sql)
def _create_table( df: Optional[pd.DataFrame], path: Optional[Union[str, List[str]]], cursor: redshift_connector.Cursor, table: str, schema: str, mode: str, index: bool, dtype: Optional[Dict[str, str]], diststyle: str, sortstyle: str, distkey: Optional[str], sortkey: Optional[List[str]], primary_keys: Optional[List[str]], varchar_lengths_default: int, varchar_lengths: Optional[Dict[str, int]], parquet_infer_sampling: float = 1.0, use_threads: bool = True, boto3_session: Optional[boto3.Session] = None, s3_additional_kwargs: Optional[Dict[str, str]] = None, ) -> Tuple[str, Optional[str]]: if mode == "overwrite": _drop_table(cursor=cursor, schema=schema, table=table) elif _does_table_exist(cursor=cursor, schema=schema, table=table) is True: if mode == "upsert": guid: str = uuid.uuid4().hex temp_table: str = f"temp_redshift_{guid}" sql: str = f"CREATE TEMPORARY TABLE {temp_table} (LIKE {schema}.{table})" _logger.debug(sql) cursor.execute(sql) return temp_table, None return table, schema diststyle = diststyle.upper() if diststyle else "AUTO" sortstyle = sortstyle.upper() if sortstyle else "COMPOUND" if df is not None: redshift_types: Dict[ str, str] = _data_types.database_types_from_pandas( df=df, index=index, dtype=dtype, varchar_lengths_default=varchar_lengths_default, varchar_lengths=varchar_lengths, converter_func=_data_types.pyarrow2redshift, ) elif path is not None: redshift_types = _redshift_types_from_path( path=path, varchar_lengths_default=varchar_lengths_default, varchar_lengths=varchar_lengths, parquet_infer_sampling=parquet_infer_sampling, use_threads=use_threads, boto3_session=boto3_session, s3_additional_kwargs=s3_additional_kwargs, ) else: raise ValueError("df and path are None.You MUST pass at least one.") _validate_parameters( redshift_types=redshift_types, diststyle=diststyle, distkey=distkey, sortstyle=sortstyle, sortkey=sortkey, ) cols_str: str = "".join([f"{k} {v},\n" for k, v in redshift_types.items()])[:-2] primary_keys_str: str = f",\nPRIMARY KEY ({', '.join(primary_keys)})" if primary_keys else "" distkey_str: str = f"\nDISTKEY({distkey})" if distkey and diststyle == "KEY" else "" sortkey_str: str = f"\n{sortstyle} SORTKEY({','.join(sortkey)})" if sortkey else "" sql = (f"CREATE TABLE IF NOT EXISTS {schema}.{table} (\n" f"{cols_str}" f"{primary_keys_str}" f")\nDISTSTYLE {diststyle}" f"{distkey_str}" f"{sortkey_str}") _logger.debug("Create table query:\n%s", sql) cursor.execute(sql) return table, schema
def execute_ddl_2(cursor: redshift_connector.Cursor) -> None: cursor.execute(xddl2) cursor.execute(ddl2)