def test_upload_file(self, mock_lib, mock_uploader): from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook hook = AzureDataLakeHook(azure_data_lake_conn_id='adl_test_key') hook.upload_file(local_path='tests/hooks/test_adl_hook.py', remote_path='/test_adl_hook.py', nthreads=64, overwrite=True, buffersize=4194304, blocksize=4194304) mock_uploader.assert_called_once_with(hook.connection, lpath='tests/hooks/test_adl_hook.py', rpath='/test_adl_hook.py', nthreads=64, overwrite=True, buffersize=4194304, blocksize=4194304)
def execute(self, context): oracle_hook = OracleHook(oracle_conn_id=self.oracle_conn_id) azure_data_lake_hook = AzureDataLakeHook( azure_data_lake_conn_id=self.azure_data_lake_conn_id) self.log.info("Dumping Oracle query results to local file") conn = oracle_hook.get_conn() cursor = conn.cursor() cursor.execute(self.sql, self.sql_params) with TemporaryDirectory(prefix='airflow_oracle_to_azure_op_') as temp: self._write_temp_file(cursor, os.path.join(temp, self.filename)) self.log.info("Uploading local file to Azure Data Lake") azure_data_lake_hook.upload_file( os.path.join(temp, self.filename), os.path.join(self.azure_data_lake_path, self.filename)) cursor.close() conn.close()
def execute(self, context): oracle_hook = OracleHook(oracle_conn_id=self.oracle_conn_id) azure_data_lake_hook = AzureDataLakeHook( azure_data_lake_conn_id=self.azure_data_lake_conn_id) self.log.info("Dumping Oracle query results to local file") conn = oracle_hook.get_conn() cursor = conn.cursor() cursor.execute(self.sql, self.sql_params) with TemporaryDirectory(prefix='airflow_oracle_to_azure_op_') as temp: self._write_temp_file(cursor, os.path.join(temp, self.filename)) self.log.info("Uploading local file to Azure Data Lake") azure_data_lake_hook.upload_file(os.path.join(temp, self.filename), os.path.join(self.azure_data_lake_path, self.filename)) cursor.close() conn.close()
def execute(self, context): source_hook = WasbHook(wasb_conn_id=self.azure_blob_conn_id) # Assumption: there is sufficient disk space to download the blob in question with NamedTemporaryFile(mode='wb', delete=True) as f: source_hook.get_file(file_path=f.name, container_name=self.src_blob_container, blob_name=self.src_blob) f.flush() self.log.info("Saving file to %s", f.name) if self.adls_gen == 1: self.log.info("Uploading to ADLS Gen 1") adls_hook = AzureDataLakeHook( azure_data_lake_conn_id=self.azure_data_lake_conn_id) adls_hook.upload_file(local_path=f.name, remote_path=f.name) else: self.log.info("Uploading to ADLS Gen 2") adls_hook = WasbHook(wasb_conn_id=self.azure_data_lake_conn_id) adls_hook.load_file(f.name, container_name=self.dest_adls_container, blob_name=self.dest_adls) self.log.info("All done, uploaded files to Azure Data Lake Store")