Ejemplo n.º 1
0
 def test_get_credentials_from_login(self, mock_get_connection):
     mock_connection = Connection(login='******',
                                  password='******')
     mock_get_connection.return_value = mock_connection
     hook = AwsHook()
     credentials_from_hook = hook.get_credentials()
     self.assertEqual(credentials_from_hook.access_key, 'aws_access_key_id')
     self.assertEqual(credentials_from_hook.secret_key, 'aws_secret_access_key')
     self.assertIsNone(credentials_from_hook.token)
Ejemplo n.º 2
0
 def test_get_credentials_from_extra(self, mock_get_connection):
     mock_connection = Connection(
         extra='{"aws_access_key_id": "aws_access_key_id",'
         '"aws_secret_access_key": "aws_secret_access_key"}'
     )
     mock_get_connection.return_value = mock_connection
     hook = AwsHook()
     credentials_from_hook = hook.get_credentials()
     self.assertEqual(credentials_from_hook.access_key, 'aws_access_key_id')
     self.assertEqual(credentials_from_hook.secret_key, 'aws_secret_access_key')
     self.assertIsNone(credentials_from_hook.token)
    def _inject_aws_credentials(self):
        if TRANSFER_SPEC not in self.body or AWS_S3_DATA_SOURCE not in self.body[TRANSFER_SPEC]:
            return

        aws_hook = AwsHook(self.aws_conn_id)
        aws_credentials = aws_hook.get_credentials()
        aws_access_key_id = aws_credentials.access_key
        aws_secret_access_key = aws_credentials.secret_key
        self.body[TRANSFER_SPEC][AWS_S3_DATA_SOURCE][AWS_ACCESS_KEY] = {
            ACCESS_KEY_ID: aws_access_key_id,
            SECRET_ACCESS_KEY: aws_secret_access_key,
        }
Ejemplo n.º 4
0
 def test_get_credentials_from_role_arn(self, mock_get_connection):
     mock_connection = Connection(
         extra='{"role_arn":"arn:aws:iam::123456:role/role_arn"}')
     mock_get_connection.return_value = mock_connection
     hook = AwsHook()
     credentials_from_hook = hook.get_credentials()
     self.assertEqual(credentials_from_hook.access_key, 'AKIAIOSFODNN7EXAMPLE')
     self.assertEqual(credentials_from_hook.secret_key,
                      'aJalrXUtnFEMI/K7MDENG/bPxRfiCYzEXAMPLEKEY')
     self.assertEqual(credentials_from_hook.token,
                      'BQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT+FvwqnKwRcOIfrRh'
                      '3c/LTo6UDdyJwOOvEVPvLXCrrrUtdnniCEXAMPLE/IvU1dYUg2RVAJBanLiHb4I'
                      'gRmpRV3zrkuWJOgQs8IZZaIv2BXIa2R4OlgkBN9bkUDNCJiBeb/AXlzBBko7b15'
                      'fjrBs2+cTQtpZ3CYWFXG8C5zqx37wnOE49mRl/+OtkIKGO7fAE')
Ejemplo n.º 5
0
def insert_tweets_sentiment(redshift_conn_id: str, aws_conn_id: str, **kwargs):
    redshift_hook = PostgresHook(postgres_conn_id=redshift_conn_id,
                                 autocommit=True)
    aws_hook = AwsHook(aws_conn_id=aws_conn_id)
    aws_credentials = aws_hook.get_credentials()

    year = kwargs['execution_date'].year
    month = kwargs['execution_date'].month
    day = kwargs['execution_date'].day

    sql = f"""
        COPY tweets_sentiment
        FROM './../../../data/tweets-sentiment/{year:04d}-{month:02d}-{day:02d}.jsonl'
        ACCESS_KEY_ID '{aws_credentials.access_key}'
        SECRET_ACCESS_KEY '{aws_credentials.secret_key}'
        FORMAT AS JSON 'auto'
    """
    redshift_hook.run(sql)
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info(
            f"Copying data from S3 to Redshift staging {self.table} table")
        rendered_key = self.s3_key.format(**context)
        self.log.info(f"Rendered Key: {rendered_key}")
        s3_path = f"s3://{self.s3_bucket}/{rendered_key}"

        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, self.region, self.extra_params)

        self.log.info(
            f"Executing query to copy data from '{s3_path}' to '{self.table}'")
        redshift.run(formatted_sql)
    def execute(self, context):
        self.log.info('StageToRedshiftOperator not implemented yet')
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        #Delete the table
        records = redshift.get_records(f"SELECT COUNT(*) FROM {self.table}")
        num_records = records[0][0]
        if num_records > 0:
            redshift.run("Delete from {}".format(self.table))

        self.log.info("Copying data from S3 to Redshift")
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, self.s3_path, credentials.access_key,
            credentials.secret_key, self.json, self.region)

        redshift.run(formatted_sql)
    def execute(self, context):
        self.log.info('StageToRedshiftOperator not implemented yet')
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info("Clearing data from destination Redshift table")
        redshift.run("DELETE FROM {}".format(self.table))

        self.log.info("Copying data from S3 to Redshift")

        #rendered key was used in the example project
        rendered_key = self.s3_key.format(**context)
        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, self.ignore_headers, self.delimiter)
        redshift.run(formatted_sql)
Ejemplo n.º 9
0
    def execute(self, context):
        self.log.info("Data insertion in Dimension table - {}".format(
            self.table))

        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        if not self.append_only:
            self.log.info("Delete existing {} Dimension table".format(
                self.table))
            redshift.run("DELETE FROM {}".format(self.table))

        self.log.info("Creating New table")
        redshift.run(self.creation_query)

        self.log.info("Executing data insert query")
        redshift.run(self.data_insertion_query)
    def execute(self, context):
        aws_hook = AwsHook("aws_credentials")
        credentials = aws_hook.get_credentials()
        redshift_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        copy_query = """
                    COPY {table}
                    FROM 's3://{s3_bucket}/{s3_prefix}'
                    with credentials
                    'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                    {copy_options};
                """.format(table=self.table,
                           s3_bucket=self.s3_bucket,
                           s3_prefix=self.s3_prefix,
                           access_key=credentials.access_key,
                           secret_key=credentials.secret_key,
                           copy_options=self.copy_options)
        redshift_hook.run(copy_query)
Ejemplo n.º 11
0
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info("Truncating data from destination Redshift table")
        redshift.run("TRUNCATE TABLE {}".format(self.table))

        self.log.info("Copying data from S3 to Redshift")
        rendered_key = self.s3_key.format(**context)
        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)
        s3_json = self.json
        if s3_json != "auto":
            s3_json = "s3://{}/{}".format(self.s3_bucket, self.json)
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, s3_json, self.timeformat)
        redshift.run(formatted_sql)
Ejemplo n.º 12
0
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        self.log.info("Clearing data from Staging tables")
        redshift.run("DELETE FROM {}".format(self.table))

        self.log.info("Copying data from S3 to Redshift")
        s3_path = "s3://{}/{}".format(self.s3_bucket, self.s3_key)
        if self.s3_key == "log_data":
            json_path = "s3://{}/{}".format(self.s3_bucket, self.log_json_path)
        else:
            json_path = 'auto'
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, json_path)
        redshift.run(formatted_sql)
        self.log.info(f"Staging table {self.table} created successfully")
Ejemplo n.º 13
0
    def execute(self, context):
        '''
        This Operator execute a SQL statement to create the fact table on the AWS Redshift Cluster.
        Arguments are passed from the DAG

        args: {
          : redshift_conn_id = parameters of the redshift connection
          : aws_credentials = AWS credentials
          : sql_statement = SQL statement
          : table = table name
        }
        '''
        self.log.info(f"load table: {self.table}")

        aws_hook = AwsHook(self.aws_credentials)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id = self.redshift_conn_id)
        redshift.run(self.sql_statement)
Ejemplo n.º 14
0
    def execute(self, context):
        #IAM_ROLE='arn:aws:iam::972068528963:user/dwhadmin'
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        redshift.run(self.create_stmt)
        rendered_key = self.s3_key.format(**context)
        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)

        if self.file_format == 'parquet':
            formatted_sql_parquet = StageToRedshiftOperator.copy_sql_parquet.format(
                self.table, s3_path, self.IAM_ROLE)
            redshift.run(formatted_sql_parquet)
        else:
            aws_hook = AwsHook(self.aws_credentials_id)
            credentials = aws_hook.get_credentials()
            formatted_sql = StageToRedshiftOperator.copy_sql.format(
                self.table, s3_path, credentials.access_key,
                credentials.secret_key, self.ignore_headers)
            redshift.run(formatted_sql)
Ejemplo n.º 15
0
    def execute(self, context):
        self.log.info('StageToRedshiftOperator not implemented yet')
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()

        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info("Copying data from S3 bucket to Redshift")

        rendered_key = self.s3_key.format(
            **context)  ### to be interpreted based on context

        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)

        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, self.json)
        redshift.run(formatted_sql)
Ejemplo n.º 16
0
 def test_get_credentials_from_role_arn_with_external_id(
         self, mock_get_connection):
     mock_connection = Connection(
         extra='{"role_arn":"arn:aws:iam::123456:role/role_arn",'
         ' "external_id":"external_id"}')
     mock_get_connection.return_value = mock_connection
     hook = AwsHook()
     credentials_from_hook = hook.get_credentials()
     self.assertEqual(credentials_from_hook.access_key,
                      'AKIAIOSFODNN7EXAMPLE')
     self.assertEqual(credentials_from_hook.secret_key,
                      'aJalrXUtnFEMI/K7MDENG/bPxRfiCYzEXAMPLEKEY')
     self.assertEqual(
         credentials_from_hook.token,
         'BQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT+FvwqnKwRcOIfrRh'
         '3c/LTo6UDdyJwOOvEVPvLXCrrrUtdnniCEXAMPLE/IvU1dYUg2RVAJBanLiHb4I'
         'gRmpRV3zrkuWJOgQs8IZZaIv2BXIa2R4OlgkBN9bkUDNCJiBeb/AXlzBBko7b15'
         'fjrBs2+cTQtpZ3CYWFXG8C5zqx37wnOE49mRl/+OtkIKGO7fAE')
Ejemplo n.º 17
0
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        # from helper
        if self.query_table == "users_table":
            query_content = SqlQueries.user_table_insert
        if self.query_table == "songs_table":
            query_content = SqlQueries.song_table_insert
        if self.query_table == "artists_table":
            query_content = SqlQueries.song_table_insert
        if self.query_table == "time_table":
            query_content = SqlQueries.time_table_insert

        formatted_sql = LoadDimensionOperator.insert_sql.format(
            self.table, self.columns, query_content)
        redshift.run(formatted_sql)
Ejemplo n.º 18
0
    def execute(self, context):
        self._log_formatted("Settings connections")
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self._log_formatted(f"Clearing data from staging table : {self.table}")
        redshift.run(f"DELETE FROM {self.table}")

        rendered_key = self.s3_key.format(**context)
        s3_path = f"s3://{self.s3_bucket}/{rendered_key}"
        formatted_query = StageToRedshiftOperator.copy_query.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, self.region, self.json_format,
            self.time_format)
        self._log_formatted(
            f"Copying data from {s3_path} to table {self.table}")
        redshift.run(formatted_query)
Ejemplo n.º 19
0
    def execute(self, context):
        aws_hook = AwsHook(self.aws_cred)
        credentials = aws_hook.get_credentials()
        aws_access_key = credentials.access_key
        aws_secret_key = credentials.secret_key

        postgrs_hook = PostgresHook(postgres_conn_id=self.redshift_id)
        postgrs_hook.run("DROP TABLE {} IF EXISTS".format(self.table_name),
                         self.autocommit)

        aws_s3_path = "s3://{}/{}".format(self.aws_s3_bucket,
                                          self.aws_s3_key.format(**context))

        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table_name, aws_s3_path, aws_access_key, aws_secret_key,
            self.ignore_header, self.json)

        postgrs_hook.run(formatted_sql, self.autocommit)
Ejemplo n.º 20
0
    def execute(self, context):
        self.log.info('Starting StageToRedshiftOperator..')

        self.log.info('Creating hooks for S3 and Redshift..')
        aws_hook = AwsHook(aws_creds)
        credentials = aws_hook.get_credentials()
        redshift_hook = PostgresHook(postgres_conn=self.redshift_conn)

        self.log.info('Clearing data from Redshift tables..')
        redshift.run(f'DELETE FROM {self.target_table}')

        self.log.info('Moving data to Redshift tables..')
        key = self.s3_key.format(**context)
        path = f's3://{self.s3_bucket}//{key}'

        query = StageToRedshiftOperator.copy_sql.format(
            self.target_table, path, credentials.access_key,
            credentials.secret_key, self.json_path)
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info("Clearing data from destination Redshift table")
        redshift.run("DELETE FROM {}".format(self.table))
        
        self.log.info("Copying data from S3 to Redshift")
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table,
            self.s3_path,
            credentials.access_key,
            credentials.secret_key,
            self.region,
            self.json_path
        )
        redshift.run(formatted_sql)
Ejemplo n.º 22
0
    def execute(self, context):
        self.log.info("Connect to AWS")
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()

        self.log.info("Connecting to Redshift")
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        rendered_key = self.s3_key.format(**context)
        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)

        self.log.info("Truncating table {}...".format(self.table))
        redshift.run("TRUNCATE {}".format(self.table))

        self.log.info("Creating table {}...".format(self.table))
        redshift.run(
            StageToRedshiftOperator.copy_sql.format(self.table, s3_path,
                                                    credentials.access_key,
                                                    credentials.secret_key))
Ejemplo n.º 23
0
def create_spark_context(aws_conn_id):
    """creates the spark session"""
    spark = (SparkSession.builder.config(
        "spark.jars.packages",
        "org.apache.hadoop:hadoop-aws:2.7.0").getOrCreate())
    spark.sparkContext.setLogLevel("INFO")

    sc = spark.sparkContext
    sc = pyspark.SQLContext(sc)

    aws_hook = AwsHook(aws_conn_id)
    credentials = aws_hook.get_credentials()
    spark.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.access.key",
                                                      credentials.access_key)
    spark.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.secret.key",
                                                      credentials.secret_key)

    return sc
 def execute(self, context):
     """
     redshift_conn_id: redshift cluster connection info.
     aws_credentials_id: necessary info needed to make AWS connection
     s3_bucket: source data in S3 bucket that has the files we want to copy from.
     """
     self.log.info('StageToRedshiftOperator not implemented yet')
     hook = S3Hook(self.aws_credentials_id)
     bucket = self.s3bucket
     keys = hook.list_keys(bucket)
     aws_hook = AwsHook(self.aws_credentials_id)
     credentials = aws_hook.get_credentials()
     session = Session(aws_access_key_id=credentials.access_key,
                       aws_secret_access_key=credentials.secret_key)
     for key in keys:
         logging.info(f"--------------- s3://{bucket}/{key} -----------")
         session.resource('s3').Bucket(bucket).download_file(
             key, '/home/workspace/uk-data/' + key)
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        aws_arn = Variable.get(self.aws_arn_id)
        redshift_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info(f"Staging {self.destination_table} from S3 to Redshift")
        rendered_key = self.s3_key.format(**context)
        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)
        formatted_sql = StageToRedshiftOperator.staging_sql.format(
            credentials.access_key,
            credentials.secret_key,
            aws_arn,
            destination_table=self.destination_table,
            s3_path=s3_path,
            json_format=self.json_format)
        redshift_hook.run(formatted_sql)
        self.log.info(f"Staging of {self.destination_table} complete!")
Ejemplo n.º 26
0
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info(
            f"Clearing data from destination Redshift table {self.table}")
        redshift.run("DELETE FROM {}".format(self.table))

        rendered_key = self.s3_key.format(**context)
        self.log.info(
            f"Copying data from S3 file {rendered_key} to Redshift table {self.table}"
        )
        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, self.json_copy_mode, self.aws_region)
        redshift.run(formatted_sql)
Ejemplo n.º 27
0
 def execute(self, context):
     
     aws_hook = AwsHook(self.aws_credentials_id)
     credentials = aws_hook.get_credentials()
     redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
     
     self.log.info("Copying data from S3 to Redshift")
     rendered_key = self.s3_key.format(**context)
     s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)
     formatted_sql = StageToRedshiftOperator.copy_query.format(
         self.table,
         s3_path,
         credentials.access_key,
         credentials.secret_key,
         self.json_path
         
     )
     redshift.run(formatted_sql)   
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        logging.info(f"Copying table {self.table} to Redshift ....")

        if not (self.format == "json" or self.format == 'csv'):
            raise ValueError(" The file format should be JSON or CSV !.")
        elif self.format == 'json':
            self.file_format = "format json '{}' ".format(self.extract_format)
        else:
            self.file_format = "format CSV"

        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.create_sql_stmt, self.table, self.data_path, self.region,
            self.file_format, credentials.access_key, credentials.secret_key)
        redshift.run(formatted_sql)
    def execute(self, context):
        self.log.info('Loading data from Amazon S3 to Redshift...')

        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        if self.clear_table:
            self.log.info(f'Clearing data from {self.table}')
            redshift.run('DELETE FROM {}'.format(self.table))

        self.log.info(f'Copying data from S3 to table {self.table} Redshift')
        rendered_key = self.s3_key.format(**context)
        s3_path = 's3://{}/{}'.format(self.s3_bucket, rendered_key)
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table, s3_path, credentials.access_key,
            credentials.secret_key, self.extra_info_sql)
        self.log.info(f"Executing {formatted_sql}")
        redshift.run(formatted_sql)
def load_songs_to_redshift(*args, **kwargs):
    """
    Loads songs data from S3 to redshift using parameters supplied by the 
    calling airflow task
    """

    aws_hook_name = kwargs["params"]["aws_hook"]
    redshift_hook_name = kwargs["params"]["redshift_hook"]
    songs_data_location = kwargs["params"]["songs_data_location"]

    aws_hook = AwsHook(aws_hook_name)
    credentials = aws_hook.get_credentials()
    redshift_hook = PostgresHook(redshift_hook_name)

    sql_statement = sql.stage_songs.format(songs_data_location,
                                           credentials.access_key,
                                           credentials.secret_key)
    redshift_hook.run(sql_statement)
Ejemplo n.º 31
0
def load_trip_data_to_redshift(*args, **kwargs):
    aws_hook = AwsHook("aws_credentials")
    credentials = aws_hook.get_credentials()
    redshift_hook = PostgresHook("redshift")

    # # #
    # TODO: How do we get the execution_date from our context?
    execution_date=kwargs["execution_date"]
    #execution_date = datetime.datetime.utcnow()
    # # #

    sql_stmt = sql_statements.COPY_MONTHLY_TRIPS_SQL.format(
        credentials.access_key,
        credentials.secret_key,
        year=execution_date.year,
        month=execution_date.month
    )
    redshift_hook.run(sql_stmt)
    def execute(self, context):
        redshift_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        aws_hook = AwsHook(aws_conn_id=self.aws_conn_id)
        credentials = aws_hook.get_credentials()

        stage_sql = """
                    COPY {table}
                    FROM {s3_source}
                    with credentials
                    'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                    {json_format};
                """.format(table=self.table,
                           s3_source=self.s3_source,
                           access_key=credentials.access_key,
                           secret_key=credentials.secret_key,
                           json_format=self.json_format)
        self.log.info("Staging data to Redshift")
        redshift_hook.run(stage_sql)
    def execute(self, context):

        # define aws creds and redshift conn
        aws_hook = AwsHook(self.aws_credentials)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info('Copying data from s3 to Redshift')
        redshift.run("""COPY {}
                        FROM '{}'
                        ACCESS_KEY_ID '{}'
                        SECRET_ACCESS_KEY '{}'
                        REGION 'us-west-2'
                        FORMAT AS JSON '{}'""".format(self.table,
                                                      self.s3_bucket,
                                                      credentials.access_key,
                                                      credentials.secret_key,
                                                      self.copy_json_option))
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id)
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info("Clearing data from destination Redshift table")
        redshift.run("DELETE FROM {}".format(self.table))

        self.log.info("Copying data from S3 to Redshift")
        s3_path = "s3://{}/{}".format(self.s3_bucket, self.s3_key)
        sql_w_format = StageToRedshiftOperator.copy_sql.format(
            table=self.table,
            s3_path=s3_path,
            access_key=credentials.access_key,
            secret_key=credentials.secret_key,
            json_format=self.json_format)
        redshift.run(sql_w_format)
        self.log.info('StageToRedshiftOperator executed')