Beispiel #1
0
    def test_execute_with_failing_transform_script(self, mock_popen):
        process = mock_popen.return_value
        process.stdout.readline.side_effect = []
        process.wait.return_value = None
        process.returncode = 42

        bucket = "bucket"
        input_key = "foo"
        output_key = "bar"
        bio = io.BytesIO(b"input")

        conn = boto3.client('s3')
        conn.create_bucket(Bucket=bucket)
        conn.upload_fileobj(Bucket=bucket, Key=input_key, Fileobj=bio)

        s3_url = "s3://{0}/{1}"
        op = S3FileTransformOperator(
            source_s3_key=s3_url.format(bucket, input_key),
            dest_s3_key=s3_url.format(bucket, output_key),
            transform_script=self.transform_script,
            replace=True,
            task_id="task_id")

        with self.assertRaises(AirflowException) as e:
            op.execute(None)

        self.assertEqual('Transform script failed: 42', str(e.exception))
Beispiel #2
0
    def test_execute_with_transform_script(self, mock_log, mock_popen):
        process_output = [b"Foo", b"Bar", b"Baz"]

        process = mock_popen.return_value
        process.stdout.readline.side_effect = process_output
        process.wait.return_value = None
        process.returncode = 0

        bucket = "bucket"
        input_key = "foo"
        output_key = "bar"
        bio = io.BytesIO(b"input")

        conn = boto3.client('s3')
        conn.create_bucket(Bucket=bucket)
        conn.upload_fileobj(Bucket=bucket, Key=input_key, Fileobj=bio)

        s3_url = "s3://{0}/{1}"
        op = S3FileTransformOperator(
            source_s3_key=s3_url.format(bucket, input_key),
            dest_s3_key=s3_url.format(bucket, output_key),
            transform_script=self.transform_script,
            replace=True,
            task_id="task_id")
        op.execute(None)

        mock_log.info.assert_has_calls([
            mock.call(line.decode(sys.getdefaultencoding()))
            for line in process_output
        ])
    def test_execute_with_select_expression(self, mock_select_key):
        input_path, output_path = self.s3_paths()
        select_expression = "SELECT * FROM s3object s"

        op = S3FileTransformOperator(source_s3_key=input_path,
                                     dest_s3_key=output_path,
                                     select_expression=select_expression,
                                     replace=True,
                                     task_id="task_id")
        op.execute(None)

        mock_select_key.assert_called_once_with(key=input_path,
                                                expression=select_expression)
    def test_execute_with_transform_script_args(self, mock_popen):
        self.mock_process(mock_popen, process_output=[b"Foo", b"Bar", b"Baz"])
        input_path, output_path = self.s3_paths()
        script_args = ['arg1', 'arg2']

        op = S3FileTransformOperator(source_s3_key=input_path,
                                     dest_s3_key=output_path,
                                     transform_script=self.transform_script,
                                     script_args=script_args,
                                     replace=True,
                                     task_id="task_id")
        op.execute(None)

        self.assertEqual(script_args, mock_popen.call_args[0][0][3:])
    def test_execute_with_failing_transform_script(self, mock_popen):
        self.mock_process(mock_popen, return_code=42)
        input_path, output_path = self.s3_paths()

        op = S3FileTransformOperator(source_s3_key=input_path,
                                     dest_s3_key=output_path,
                                     transform_script=self.transform_script,
                                     replace=True,
                                     task_id="task_id")

        with self.assertRaises(AirflowException) as e:
            op.execute(None)

        self.assertEqual('Transform script failed: 42', str(e.exception))
    def test_execute_with_failing_transform_script(self, mock_popen):
        self.mock_process(mock_popen, return_code=42)
        input_path, output_path = self.s3_paths()

        op = S3FileTransformOperator(
            source_s3_key=input_path,
            dest_s3_key=output_path,
            transform_script=self.transform_script,
            replace=True,
            task_id="task_id",
        )

        with pytest.raises(AirflowException) as ctx:
            op.execute(None)

        assert 'Transform script failed: 42' == str(ctx.value)
    def test_execute_with_transform_script(self, mock_log, mock_popen):
        process_output = [b"Foo", b"Bar", b"Baz"]
        self.mock_process(mock_popen, process_output=process_output)
        input_path, output_path = self.s3_paths()

        op = S3FileTransformOperator(source_s3_key=input_path,
                                     dest_s3_key=output_path,
                                     transform_script=self.transform_script,
                                     replace=True,
                                     task_id="task_id")
        op.execute(None)

        mock_log.info.assert_has_calls([
            mock.call(line.decode(sys.getdefaultencoding()))
            for line in process_output
        ])
    def test_execute_with_select_expression(self, mock_select_key):
        input_path, output_path = self.s3_paths()
        select_expression = "SELECT * FROM s3object s"

        op = S3FileTransformOperator(source_s3_key=input_path,
                                     dest_s3_key=output_path,
                                     select_expression=select_expression,
                                     replace=True,
                                     task_id="task_id")
        op.execute(None)

        mock_select_key.assert_called_once_with(key=input_path,
                                                expression=select_expression)

        conn = boto3.client('s3')
        result = conn.get_object(Bucket=self.bucket, Key=self.output_key)
        self.assertEqual(self.content, result['Body'].read())
Beispiel #9
0
    def test_execute_with_select_expression(self, mock_select_key):
        bucket = "bucket"
        input_key = "foo"
        output_key = "bar"
        bio = io.BytesIO(b"input")

        conn = boto3.client('s3')
        conn.create_bucket(Bucket=bucket)
        conn.upload_fileobj(Bucket=bucket, Key=input_key, Fileobj=bio)

        s3_url = "s3://{0}/{1}"
        select_expression = "SELECT * FROM S3Object s"
        op = S3FileTransformOperator(
            source_s3_key=s3_url.format(bucket, input_key),
            dest_s3_key=s3_url.format(bucket, output_key),
            select_expression=select_expression,
            replace=True,
            task_id="task_id")
        op.execute(None)

        mock_select_key.assert_called_once_with(key=s3_url.format(
            bucket, input_key),
                                                expression=select_expression)