Exemple #1
0
 def test_file_present(self, sftp_hook_mock):
     sftp_hook_mock.return_value.get_mod_time.return_value = '19700101000000'
     sftp_sensor = SFTPSensor(task_id='unit_test',
                              path='/path/to/file/1970-01-01.txt')
     context = {'ds': '1970-01-01'}
     output = sftp_sensor.poke(context)
     sftp_hook_mock.return_value.get_mod_time.assert_called_with(
         '/path/to/file/1970-01-01.txt')
     self.assertTrue(output)
Exemple #2
0
 def test_sftp_failure(self, sftp_hook_mock):
     sftp_hook_mock.return_value.get_mod_time.side_effect = OSError(
         SFTP_FAILURE, 'SFTP failure')
     sftp_sensor = SFTPSensor(task_id='unit_test',
                              path='/path/to/file/1970-01-01.txt')
     context = {'ds': '1970-01-01'}
     with self.assertRaises(OSError):
         sftp_sensor.poke(context)
         sftp_hook_mock.return_value.get_mod_time.assert_called_with(
             '/path/to/file/1970-01-01.txt')
Exemple #3
0
 def test_file_absent(self, sftp_hook_mock):
     sftp_hook_mock.return_value.get_mod_time.side_effect = OSError(
         SFTP_NO_SUCH_FILE, 'File missing')
     sftp_sensor = SFTPSensor(task_id='unit_test',
                              path='/path/to/file/1970-01-01.txt')
     context = {'ds': '1970-01-01'}
     output = sftp_sensor.poke(context)
     sftp_hook_mock.return_value.get_mod_time.assert_called_with(
         '/path/to/file/1970-01-01.txt')
     self.assertFalse(output)
    'email': [my_email_address],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG(dag_name,
          catchup=False,
          default_args=default_args,
          schedule_interval="30 8 * * * *")

# the following tasks are created by instantiating operators
detect_file = SFTPSensor(task_id='detect_file',
                         poke_interval=10,
                         timeout=3600,
                         sftp_conn_id='sftp_default',
                         path=source_path + filename,
                         dag=dag)

update_nb_of_chunks = SFTPUpdateNbOfChunksOperator(
    task_id='update_nb_of_chunks',
    conn_id='sftp_default',
    file_path=source_path + filename,
    master_variable=dag_name,
    chunks_variable_name="number_of_chunks",
    chunk_size=chunk_size,
    dag=dag)

# These are passed in as args. Seems that they are not sent : airflow bug.
dag.start_date = default_args['start_date']
}

dag = DAG(
    'sftpSensorTest',
    default_args=default_args,
    schedule_interval=timedelta(days=1),
    dagrun_timeout=timedelta(minutes=5),
)

start = DummyOperator(task_id='run_this_first', dag=dag)

sftp = SFTPSensor(
    task_id='sftp_check',
    path='data/filelist.txt',
    sftp_conn_id='sftp_beefy',
    poke_interval=10,
    mode='poke',
    soft_fail=False,
    dag=dag,
)

nextStep = KubernetesPodOperator(
    namespace='airflow',
    image="python:3.6-stretch",
    image_pull_policy="Always",
    cmds=["python", "-c"],
    arguments=["print('hello world')"],
    name="python",
    task_id="startPython",
    is_delete_operator_pod=True,
    hostnetwork=False,
def load_file_subdag(parent_dag_name, child_dag_name, sftp_conn_id, args):
    """This dag will iteratively call the listed tasks."""
    dag_subdag_subdag = DAG(
        dag_id='{0}.{1}'.format(parent_dag_name, child_dag_name),
        default_args=args,
    )
    with dag_subdag_subdag:
        file_check = \
            SFTPSensor(
                task_id='file_check',
                sftp_conn_id=sftp_conn_id,
                poke_interval=60,
                timeout=600,
                soft_fail=False,
                path='{}{}{}{}{}'.format(
                    SFTP_PATH_DICT[child_dag_name],
                    parent_dag_name.split(".")[1],
                    SFTP_FILE_NAME[child_dag_name], '%s',
                    SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED)
            )
        file_transfer_raw = \
            SFTPToS3Operator(
                task_id='file_transfer_raw',
                sftp_conn_id=sftp_conn_id,
                sftp_path='{}{}{}{}{}'.format(
                    SFTP_PATH_DICT[child_dag_name],
                    parent_dag_name.split(".")[1],
                    SFTP_FILE_NAME[child_dag_name], '%s',
                    SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED),
                s3_conn_id=JOB_ARGS['s3_conn_id'],
                s3_bucket=BUCKET_NAME_RAW[parent_dag_name.split(".")[1]],
                s3_key='{}{}{}{}{}'.format(
                    S3_KEY_DICT[child_dag_name],
                    parent_dag_name.split(".")[1],
                    SFTP_FILE_NAME[child_dag_name],
                    '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED)
            )
        abc_validations = \
            SFTPS3FileSizeOperator(
                task_id='abc_validations',
                sftp_conn_id=sftp_conn_id,
                sftp_path='{}{}{}{}{}'.format(
                    SFTP_PATH_DICT[child_dag_name],
                    parent_dag_name.split(".")[1],
                    SFTP_FILE_NAME[child_dag_name],
                    '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED),
                s3_conn_id=JOB_ARGS['s3_conn_id'],
                s3_bucket=BUCKET_NAME_RAW[parent_dag_name.split(".")[1]],
                s3_key='{}{}{}{}{}'.format(
                    S3_KEY_DICT[child_dag_name],
                    parent_dag_name.split(".")[1],
                    SFTP_FILE_NAME[child_dag_name],
                    '%s', SFTP_FILE_EXTN[child_dag_name]) % (DATE_STR_FORMATTED)
            )
        file_stage_copy = \
            SSHOperator(
                task_id='file_stage_copy',
                ssh_conn_id=ADSALES_EMR,
                command='{}{}{}'.format(
                    JOB_ARGS['spark_submit'],
                    JOB_ARGS['spark_jars'],
                    FW_STAGE_CODE_PATH
                ) + '{}{}{}{}{}{}'.format(
                    S3_RAW_BUCKET[parent_dag_name.split(".")[1]],
                    S3_KEY_DICT[child_dag_name],
                    parent_dag_name.split(".")[1],
                    SFTP_FILE_NAME[child_dag_name], '%s',
                    STAGE_SFTP_FILE_EXTN[child_dag_name]
                ) % (DATE_STR_FORMATTED) + '{}{}{}{}{}'.format(
                    S3_STAGE_BUCKET[parent_dag_name.split(".")[1]],
                    S3_KEY_DICT_STAGE[child_dag_name],
                    parent_dag_name.split(".")[1],
                    STAGE_SFTP_FILE_NAME[child_dag_name],
                    '/%s/ ') % (DATE_STR_FORMATTED
                                ) + SRC_SYS_ID[parent_dag_name.split(".")[1]]
            )
        dq_check = \
            SSHOperator(
                task_id='dq_check',
                ssh_conn_id=ADSALES_EMR,
                command='{}{}{}'.format(
                    JOB_ARGS['spark_submit'],
                    JOB_ARGS['spark_jars'],
                    FW_STAGE_DQ_CODE_PATH
                ) + ' ' + '{}{}{}{}{}'.format(
                    S3_STAGE_BUCKET[parent_dag_name.split(".")[1]],
                    S3_KEY_DICT_STAGE[child_dag_name],
                    parent_dag_name.split(".")[1],
                    STAGE_SFTP_FILE_NAME[child_dag_name],
                    '/%s/'
                ) % (DATE_STR_FORMATTED) + ' ' + '{}{}{}{}{}'.format(
                    S3_STAGE_BUCKET[parent_dag_name.split(".")[1]],
                    S3_KEY_DICT_STAGE[child_dag_name],
                    parent_dag_name.split(".")[1],
                    STAGE_SFTP_FILE_NAME[child_dag_name],
                    '/%s/'
                ) % (DATE_STR_FORMATTED) + ' ' + '{}{}{}{}{}'.format(
                    S3_STAGE_BUCKET[parent_dag_name.split(".")[1]],
                    S3_KEY_DICT_STAGE_OUT[child_dag_name],
                    parent_dag_name.split(".")[1],
                    STAGE_SFTP_FILE_NAME[child_dag_name],
                    '/%s/'
                ) % (DATE_STR_FORMATTED) + ' ' + '{}{}{}{}'.format(
                    DUPLICATE_COLUMN_LIST[child_dag_name],
                    NULL_COLUMN_LIST[child_dag_name],
                    str(JOB_ARGS['stage_dup_check']),
                    str(JOB_ARGS['stage_null_check'])
                )
            )
        file_check >> file_transfer_raw >> abc_validations >> file_stage_copy >> dq_check
    return dag_subdag_subdag
Exemple #7
0
 def test_hook_not_created_during_init(self):
     sftp_sensor = SFTPSensor(task_id='unit_test',
                              path='/path/to/file/1970-01-01.txt')
     self.assertIsNone(sftp_sensor.hook)