コード例 #1
0
 def test_poke(self):
     op = AwsRedshiftClusterSensor(task_id='test_cluster_sensor',
                                   poke_interval=1,
                                   timeout=5,
                                   aws_conn_id='aws_default',
                                   cluster_identifier='test_cluster',
                                   target_status='available')
     self.assertTrue(op.poke(None))
コード例 #2
0
 def test_poke(self):
     op = AwsRedshiftClusterSensor(task_id='test_cluster_sensor',
                                   poke_interval=1,
                                   timeout=5,
                                   aws_conn_id='aws_default',
                                   cluster_identifier='test_cluster',
                                   target_status='available')
     self.assertTrue(op.poke(None))
    def test_poke_cluster_not_found(self):
        self._create_cluster()
        op = AwsRedshiftClusterSensor(task_id='test_cluster_sensor',
                                      poke_interval=1,
                                      timeout=5,
                                      aws_conn_id='aws_default',
                                      cluster_identifier='test_cluster_not_found',
                                      target_status='cluster_not_found')

        self.assertTrue(op.poke(None))
コード例 #4
0
    def test_poke_cluster_not_found(self):
        self._create_cluster()
        op = AwsRedshiftClusterSensor(task_id='test_cluster_sensor',
                                      poke_interval=1,
                                      timeout=5,
                                      aws_conn_id='aws_default',
                                      cluster_identifier='test_cluster_not_found',
                                      target_status='cluster_not_found')

        self.assertTrue(op.poke(None))
コード例 #5
0
    schedule_interval=None,
    description="Create Redshift cluster and tables.",
    start_date=datetime.utcnow(),
)

start_dag_task = DummyOperator(task_id="start_dag", dag=dag)

create_redshift_task = CreateRedshiftClusterOperator(
    task_id="create_redshift_cluster",
    dag=dag,
    config=redshift_config,
)

wait_for_redshift_task = AwsRedshiftClusterSensor(
    task_id="wait_for_redshift_cluster",
    cluster_identifier=redshift_cluster_id,
    dag=dag,
)

save_redshift_endpoint_task = SaveRedshiftHostOperator(
    task_id="save_redshift_endpoint",
    cluster_identifier=redshift_cluster_id,
    dag=dag,
    config=redshift_config,
)

create_schemas_task = PostgresOperator(
    task_id="create_schemas",
    sql=[
        "create schema if not exists stage;",
        "create schema if not exists analytics;",
コード例 #6
0
# Create a Redshift cluster
create_redshift_cluster = AWSRedshiftOperator(
    task_id="create_redshift_cluster",
    dag=dag,
    conn_id=AWS_CONN_ID,
    redshift_conn_id=AWS_REDSHIFT_CONN_ID,
    time_zone=local_tz,
    cluster_identifier=
    f"news-nlp-redshift-{datetime.now(local_tz).strftime('%Y-%m-%d-%H-%M')}",
)
# Wait for Redshift cluster to be ready
redshift_ready_sensor = AwsRedshiftClusterSensor(
    task_id="sense_redshift_cluster",
    dag=dag,
    cluster_identifier=
    "{{ task_instance.xcom_pull('create_redshift_cluster', key='return_value')[0] }}",
    target_status='available',
    aws_conn_id=AWS_CONN_ID,
)

# Load the data in star schema format from S3 to Redshift
tables = ['dim_date', 'dim_title', 'dim_ner', 'fact_news']
table_load_ops = [
    S3ToRedshiftTransfer(task_id=f"upload_{table}_to_redshift",
                         dag=dag,
                         redshift_conn_id=AWS_REDSHIFT_CONN_ID,
                         aws_conn_id=AWS_CONN_ID,
                         schema=os.environ.get('AWS_REDSHIFT_SCHEMA'),
                         table=table,
                         s3_bucket=os.environ.get('AWS_S3_BUCKET'),
                         s3_key=f'{table}.csv',