def test_execute_with_failing_transform_script(self, mock_popen): process = mock_popen.return_value process.stdout.readline.side_effect = [] process.wait.return_value = None process.returncode = 42 bucket = "bucket" input_key = "foo" output_key = "bar" bio = io.BytesIO(b"input") conn = boto3.client('s3') conn.create_bucket(Bucket=bucket) conn.upload_fileobj(Bucket=bucket, Key=input_key, Fileobj=bio) s3_url = "s3://{0}/{1}" op = S3FileTransformOperator( source_s3_key=s3_url.format(bucket, input_key), dest_s3_key=s3_url.format(bucket, output_key), transform_script=self.transform_script, replace=True, task_id="task_id") with self.assertRaises(AirflowException) as e: op.execute(None) self.assertEqual('Transform script failed: 42', str(e.exception))
def test_execute_with_transform_script(self, mock_log, mock_popen): process_output = [b"Foo", b"Bar", b"Baz"] process = mock_popen.return_value process.stdout.readline.side_effect = process_output process.wait.return_value = None process.returncode = 0 bucket = "bucket" input_key = "foo" output_key = "bar" bio = io.BytesIO(b"input") conn = boto3.client('s3') conn.create_bucket(Bucket=bucket) conn.upload_fileobj(Bucket=bucket, Key=input_key, Fileobj=bio) s3_url = "s3://{0}/{1}" op = S3FileTransformOperator( source_s3_key=s3_url.format(bucket, input_key), dest_s3_key=s3_url.format(bucket, output_key), transform_script=self.transform_script, replace=True, task_id="task_id") op.execute(None) mock_log.info.assert_has_calls([ mock.call(line.decode(sys.getdefaultencoding())) for line in process_output ])
def test_execute_with_select_expression(self, mock_select_key): input_path, output_path = self.s3_paths() select_expression = "SELECT * FROM s3object s" op = S3FileTransformOperator(source_s3_key=input_path, dest_s3_key=output_path, select_expression=select_expression, replace=True, task_id="task_id") op.execute(None) mock_select_key.assert_called_once_with(key=input_path, expression=select_expression)
def test_execute_with_transform_script_args(self, mock_popen): self.mock_process(mock_popen, process_output=[b"Foo", b"Bar", b"Baz"]) input_path, output_path = self.s3_paths() script_args = ['arg1', 'arg2'] op = S3FileTransformOperator(source_s3_key=input_path, dest_s3_key=output_path, transform_script=self.transform_script, script_args=script_args, replace=True, task_id="task_id") op.execute(None) self.assertEqual(script_args, mock_popen.call_args[0][0][3:])
def test_execute_with_failing_transform_script(self, mock_popen): self.mock_process(mock_popen, return_code=42) input_path, output_path = self.s3_paths() op = S3FileTransformOperator(source_s3_key=input_path, dest_s3_key=output_path, transform_script=self.transform_script, replace=True, task_id="task_id") with self.assertRaises(AirflowException) as e: op.execute(None) self.assertEqual('Transform script failed: 42', str(e.exception))
def test_execute_with_failing_transform_script(self, mock_popen): self.mock_process(mock_popen, return_code=42) input_path, output_path = self.s3_paths() op = S3FileTransformOperator( source_s3_key=input_path, dest_s3_key=output_path, transform_script=self.transform_script, replace=True, task_id="task_id", ) with pytest.raises(AirflowException) as ctx: op.execute(None) assert 'Transform script failed: 42' == str(ctx.value)
def test_execute_with_transform_script(self, mock_log, mock_popen): process_output = [b"Foo", b"Bar", b"Baz"] self.mock_process(mock_popen, process_output=process_output) input_path, output_path = self.s3_paths() op = S3FileTransformOperator(source_s3_key=input_path, dest_s3_key=output_path, transform_script=self.transform_script, replace=True, task_id="task_id") op.execute(None) mock_log.info.assert_has_calls([ mock.call(line.decode(sys.getdefaultencoding())) for line in process_output ])
def test_execute_with_select_expression(self, mock_select_key): input_path, output_path = self.s3_paths() select_expression = "SELECT * FROM s3object s" op = S3FileTransformOperator(source_s3_key=input_path, dest_s3_key=output_path, select_expression=select_expression, replace=True, task_id="task_id") op.execute(None) mock_select_key.assert_called_once_with(key=input_path, expression=select_expression) conn = boto3.client('s3') result = conn.get_object(Bucket=self.bucket, Key=self.output_key) self.assertEqual(self.content, result['Body'].read())
def test_execute_with_select_expression(self, mock_select_key): bucket = "bucket" input_key = "foo" output_key = "bar" bio = io.BytesIO(b"input") conn = boto3.client('s3') conn.create_bucket(Bucket=bucket) conn.upload_fileobj(Bucket=bucket, Key=input_key, Fileobj=bio) s3_url = "s3://{0}/{1}" select_expression = "SELECT * FROM S3Object s" op = S3FileTransformOperator( source_s3_key=s3_url.format(bucket, input_key), dest_s3_key=s3_url.format(bucket, output_key), select_expression=select_expression, replace=True, task_id="task_id") op.execute(None) mock_select_key.assert_called_once_with(key=s3_url.format( bucket, input_key), expression=select_expression)