def _does_table_exist(cursor: redshift_connector.Cursor, schema: Optional[str],
                      table: str) -> bool:
    schema_str = f"TABLE_SCHEMA = '{schema}' AND" if schema else ""
    cursor.execute(f"SELECT true WHERE EXISTS ("
                   f"SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE "
                   f"{schema_str} TABLE_NAME = '{table}'"
                   f");")
    return len(cursor.fetchall()) > 0
def _get_primary_keys(cursor: redshift_connector.Cursor, schema: str,
                      table: str) -> List[str]:
    cursor.execute(
        f"SELECT indexdef FROM pg_indexes WHERE schemaname = '{schema}' AND tablename = '{table}'"
    )
    result: str = cursor.fetchall()[0][0]
    rfields: List[str] = result.split("(")[1].strip(")").split(",")
    fields: List[str] = [field.strip().strip('"') for field in rfields]
    return fields
def _copy(
    cursor: redshift_connector.Cursor,
    path: str,
    table: str,
    iam_role: str,
    schema: Optional[str] = None,
) -> None:
    if schema is None:
        table_name: str = table
    else:
        table_name = f"{schema}.{table}"
    sql: str = f"COPY {table_name} FROM '{path}'\nIAM_ROLE '{iam_role}'\nFORMAT AS PARQUET"
    _logger.debug("copy query:\n%s", sql)
    cursor.execute(sql)
def _upsert(
    cursor: redshift_connector.Cursor,
    table: str,
    temp_table: str,
    schema: str,
    primary_keys: Optional[List[str]] = None,
) -> None:
    if not primary_keys:
        primary_keys = _get_primary_keys(cursor=cursor,
                                         schema=schema,
                                         table=table)
    _logger.debug("primary_keys: %s", primary_keys)
    if not primary_keys:
        raise exceptions.InvalidRedshiftPrimaryKeys()
    equals_clause: str = f"{table}.%s = {temp_table}.%s"
    join_clause: str = " AND ".join(
        [equals_clause % (pk, pk) for pk in primary_keys])
    sql: str = f"DELETE FROM {schema}.{table} USING {temp_table} WHERE {join_clause}"
    _logger.debug(sql)
    cursor.execute(sql)
    sql = f"INSERT INTO {schema}.{table} SELECT * FROM {temp_table}"
    _logger.debug(sql)
    cursor.execute(sql)
    _drop_table(cursor=cursor, schema=schema, table=temp_table)
def _drop_table(cursor: redshift_connector.Cursor, schema: Optional[str],
                table: str) -> None:
    schema_str = f"{schema}." if schema else ""
    sql = f"DROP TABLE IF EXISTS {schema_str}{table}"
    _logger.debug("Drop table query:\n%s", sql)
    cursor.execute(sql)
def _create_table(
    df: Optional[pd.DataFrame],
    path: Optional[Union[str, List[str]]],
    cursor: redshift_connector.Cursor,
    table: str,
    schema: str,
    mode: str,
    index: bool,
    dtype: Optional[Dict[str, str]],
    diststyle: str,
    sortstyle: str,
    distkey: Optional[str],
    sortkey: Optional[List[str]],
    primary_keys: Optional[List[str]],
    varchar_lengths_default: int,
    varchar_lengths: Optional[Dict[str, int]],
    parquet_infer_sampling: float = 1.0,
    use_threads: bool = True,
    boto3_session: Optional[boto3.Session] = None,
    s3_additional_kwargs: Optional[Dict[str, str]] = None,
) -> Tuple[str, Optional[str]]:
    if mode == "overwrite":
        _drop_table(cursor=cursor, schema=schema, table=table)
    elif _does_table_exist(cursor=cursor, schema=schema, table=table) is True:
        if mode == "upsert":
            guid: str = uuid.uuid4().hex
            temp_table: str = f"temp_redshift_{guid}"
            sql: str = f"CREATE TEMPORARY TABLE {temp_table} (LIKE {schema}.{table})"
            _logger.debug(sql)
            cursor.execute(sql)
            return temp_table, None
        return table, schema
    diststyle = diststyle.upper() if diststyle else "AUTO"
    sortstyle = sortstyle.upper() if sortstyle else "COMPOUND"
    if df is not None:
        redshift_types: Dict[
            str, str] = _data_types.database_types_from_pandas(
                df=df,
                index=index,
                dtype=dtype,
                varchar_lengths_default=varchar_lengths_default,
                varchar_lengths=varchar_lengths,
                converter_func=_data_types.pyarrow2redshift,
            )
    elif path is not None:
        redshift_types = _redshift_types_from_path(
            path=path,
            varchar_lengths_default=varchar_lengths_default,
            varchar_lengths=varchar_lengths,
            parquet_infer_sampling=parquet_infer_sampling,
            use_threads=use_threads,
            boto3_session=boto3_session,
            s3_additional_kwargs=s3_additional_kwargs,
        )
    else:
        raise ValueError("df and path are None.You MUST pass at least one.")
    _validate_parameters(
        redshift_types=redshift_types,
        diststyle=diststyle,
        distkey=distkey,
        sortstyle=sortstyle,
        sortkey=sortkey,
    )
    cols_str: str = "".join([f"{k} {v},\n"
                             for k, v in redshift_types.items()])[:-2]
    primary_keys_str: str = f",\nPRIMARY KEY ({', '.join(primary_keys)})" if primary_keys else ""
    distkey_str: str = f"\nDISTKEY({distkey})" if distkey and diststyle == "KEY" else ""
    sortkey_str: str = f"\n{sortstyle} SORTKEY({','.join(sortkey)})" if sortkey else ""
    sql = (f"CREATE TABLE IF NOT EXISTS {schema}.{table} (\n"
           f"{cols_str}"
           f"{primary_keys_str}"
           f")\nDISTSTYLE {diststyle}"
           f"{distkey_str}"
           f"{sortkey_str}")
    _logger.debug("Create table query:\n%s", sql)
    cursor.execute(sql)
    return table, schema
Exemple #7
0
def execute_ddl_2(cursor: redshift_connector.Cursor) -> None:
    cursor.execute(xddl2)
    cursor.execute(ddl2)