Example #1
0
def test_sql_select_csv_no_header(client, log_output):
    json_testcontent = """val1,val2,val3
val4,val5,val6
"""
    tests = [
        ("select_1", "SELECT s._2 FROM S3Object as s", b'val2\nval5\n'),
    ]

    input_serialization = InputSerialization(csv=CSVInput(
        file_header_info="NONE",
        allow_quoted_record_delimiter="FALSE",
    ), )

    output_serialization = OutputSerialization(csv=CSVOutput())
    try:
        test_sql_expressions_custom_input_output(client, json_testcontent,
                                                 input_serialization,
                                                 output_serialization, tests,
                                                 log_output)
    except Exception as select_err:
        raise select_err
        # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
        # pass

    # Test passes
    print(log_output.json_report())
Example #2
0
def test_sql_select_json(client, log_output):
    json_testcontent = """{ "Rules": [ {"id": "1"}, {"expr": "y > x"}, {"id": "2", "expr": "z = DEBUG"} ]}
{ "created": "June 27", "modified": "July 6" }
"""
    tests = [
        ("select_1", "SELECT id FROM S3Object[*].Rules[*].id",
         b'{"id":"1"}\n{}\n{"id":"2"}\n{}\n'),
        ("select_2",
         "SELECT id FROM S3Object[*].Rules[*].id WHERE id IS NOT MISSING",
         b'{"id":"1"}\n{"id":"2"}\n'),
        ("select_3", "SELECT d.created, d.modified FROM S3Object[*] d",
         b'{}\n{"created":"June 27","modified":"July 6"}\n'),
        ("select_4", "SELECT _1.created, _1.modified FROM S3Object[*]",
         b'{}\n{"created":"June 27","modified":"July 6"}\n'),
        ("select_5", "Select s.rules[1].expr from S3Object s",
         b'{"expr":"y > x"}\n{}\n'),
    ]

    input_serialization = InputSerialization(json=JSONInput(
        json_type="DOCUMENT"))
    output_serialization = OutputSerialization(json=JSONOutput())
    try:
        test_sql_expressions_custom_input_output(client, json_testcontent,
                                                 input_serialization,
                                                 output_serialization, tests,
                                                 log_output)
    except Exception as select_err:
        raise select_err
        # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
        # pass

    # Test passes
    print(log_output.json_report())
Example #3
0
def test_csv_output_custom_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # UTF-8 quote character
        ("''", "''", b'col1,col2,col3\n', Exception()),
        ("'", "'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("", '"', b'col1,col2,col3\n',
         b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
        ('"', '"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
        ('"', '"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
        ('"', '"', b'""""\n', b'""""\n'),
        ('"', '"', b'\n', b''),
        ("'", "\\", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("'", "\\", b'col""1,col2,col3\n', b"'col\"\"1','col2','col3'\n"),
        ("'", "\\", b'col\'1,col2,col3\n', b"'col\\'1','col2','col3'\n"),
        ("'", "\\", b'"col\'1","col2","col3"\n', b"'col\\'1','col2','col3'\n"),
        ("'", "\\", b'col\'\n', b"'col\\''\n"),
        # Two consecutive escaped quotes
        ("'", "\\", b'"a"""""\n', b"'a\"\"'\n"),
    ]

    client.make_bucket(bucket_name)

    try:
        for idx, (quote_char, escape_char, input_data,
                  expected_output) in enumerate(tests):
            sql_opts = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter='"',
                        QuoteEscapeCharacter='"',
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(csv=CSVOutput(
                    QuoteFields="ALWAYS",
                    RecordDelimiter="\n",
                    FieldDelimiter=",",
                    QuoteCharacter=quote_char,
                    QuoteEscapeCharacter=escape_char,
                )),
                request_progress=RequestProgress(enabled="False"))

            test_sql_api(f'test_{idx}', client, bucket_name, input_data,
                         sql_opts, expected_output)
    finally:
        client.remove_bucket(bucket_name)

    # Test passes
    print(log_output.json_report())
Example #4
0
    def test_xml_marshal_select(self):
        expected_string = (b'<SelectObjectContentRequest>'
                           b'<Expression>select * from s3object</Expression>'
                           b'<ExpressionType>SQL</ExpressionType>'
                           b'<InputSerialization>'
                           b'<CompressionType>NONE</CompressionType>'
                           b'<CSV><FileHeaderInfo>USE</FileHeaderInfo>'
                           b'<RecordDelimiter>\n</RecordDelimiter>'
                           b'<FieldDelimiter>,</FieldDelimiter>'
                           b'<QuoteCharacter>"</QuoteCharacter>'
                           b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>'
                           b'<Comments>#</Comments>'
                           b'<AllowQuotedRecordDelimiter>false'
                           b'</AllowQuotedRecordDelimiter></CSV>'
                           b'</InputSerialization>'
                           b'<OutputSerialization><CSV>'
                           b'<QuoteFields>ASNEEDED</QuoteFields>'
                           b'<RecordDelimiter>\n</RecordDelimiter>'
                           b'<FieldDelimiter>,</FieldDelimiter>'
                           b'<QuoteCharacter>"</QuoteCharacter>'
                           b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>'
                           b'</CSV></OutputSerialization>'
                           b'<RequestProgress>'
                           b'<Enabled>true</Enabled>'
                           b'</RequestProgress>'
                           b'</SelectObjectContentRequest>')

        options = SelectObjectOptions(
            expression="select * from s3object",
            input_serialization=InputSerialization(
                compression_type="NONE",
                csv=CSVInput(FileHeaderInfo="USE",
                             RecordDelimiter="\n",
                             FieldDelimiter=",",
                             QuoteCharacter='"',
                             QuoteEscapeCharacter='"',
                             Comments="#",
                             AllowQuotedRecordDelimiter="FALSE"),
            ),

            output_serialization=OutputSerialization(
                csv=CSVOutput(QuoteFields="ASNEEDED",
                              RecordDelimiter="\n",
                              FieldDelimiter=",",
                              QuoteCharacter='"',
                              QuoteEscapeCharacter='"')
            ),
            request_progress=RequestProgress(
                enabled="TRUE"
            )
        )
        actual_string = xml_marshal_select(options)
        eq_(expected_string, actual_string)
Example #5
0
def test_sql_expressions(client, input_json_bytes, tests, log_output):
    input_serialization = InputSerialization(
        compression_type="NONE",
        json=JSONInput(json_type="DOCUMENT"),
    )

    output_serialization = OutputSerialization(csv=CSVOutput(
        quote_fields="ASNEEDED"))

    test_sql_expressions_custom_input_output(client, input_json_bytes,
                                             input_serialization,
                                             output_serialization, tests,
                                             log_output)
Example #6
0
def test_csv_input_custom_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # Invalid quote character, should fail
        ('""', '"', b'col1,col2,col3\n', Exception()),
        # UTF-8 quote character
        ('ع', '"', 'عcol1ع,عcol2ع,عcol3ع\n'.encode(),
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        # Only one field is quoted
        ('"', '"', b'"col1",col2,col3\n',
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        ('"', '"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
        ('\'', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1","col2","col3"\n',
         b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
        ('"', '"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
        ('"', '"', b'A",B\n', b'{"_1":"A\\"","_2":"B"}\n'),
        ('"', '"', b'A"",B\n', b'{"_1":"A\\"\\"","_2":"B"}\n'),
        ('"', '\\', b'A\\B,C\n', b'{"_1":"A\\\\B","_2":"C"}\n'),
        ('"', '"', b'"A""B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
        ('"', '\\', b'"A\\B","CD"\n', b'{"_1":"AB","_2":"CD"}\n'),
        ('"', '\\', b'"A\\,","CD"\n', b'{"_1":"A,","_2":"CD"}\n'),
        ('"', '\\', b'"A\\"B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
        ('"', '\\', b'"A\\""\n', b'{"_1":"A\\""}\n'),
        ('"', '\\', b'"A\\"\\"B"\n', b'{"_1":"A\\"\\"B"}\n'),
        ('"', '\\', b'"A\\"","\\"B"\n', b'{"_1":"A\\"","_2":"\\"B"}\n'),
    ]

    client.make_bucket(bucket_name)

    try:
        for idx, (quote_char, escape_char, data,
                  expected_output) in enumerate(tests):
            sql_opts = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter=quote_char,
                        QuoteEscapeCharacter=escape_char,
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(
                    json=JsonOutput(RecordDelimiter="\n", )),
                request_progress=RequestProgress(enabled="False"))

            test_sql_api(f'test_{idx}', client, bucket_name, data, sql_opts,
                         expected_output)
    finally:
        client.remove_bucket(bucket_name)

    # Test passes
    print(log_output.json_report())
Example #7
0
def test_csv_output_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # UTF-8 quote character
        ("''", b'col1,col2,col3\n', Exception()),
        ("'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("", b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
        ('"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
        ('"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
        ('"', b'\n', b''),
    ]

    try:
        client.make_bucket(bucket_name)

        for idx, (quote_char, object_content,
                  expected_output) in enumerate(tests):
            options = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter='"',
                        QuoteEscapeCharacter='"',
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(csv=CSVOutput(
                    QuoteFields="ALWAYS",
                    RecordDelimiter="\n",
                    FieldDelimiter=",",
                    QuoteCharacter=quote_char,
                    QuoteEscapeCharacter=quote_char,
                )),
                request_progress=RequestProgress(enabled="False"))

            got_output = b''

            try:
                got_output = exec_select(client, bucket_name, object_content,
                                         options, log_output)
            except Exception as select_err:
                if not isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {} unexpectedly failed with: {}'.format(
                            idx + 1, select_err))
            else:
                if isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {}: expected an exception, got {}'.format(
                            idx + 1, got_output))
                if got_output != expected_output:
                    raise ValueError(
                        'Test {}: data mismatch. Expected : {}. Received: {}.'.
                        format(idx + 1, expected_output, got_output))

    except Exception as err:
        raise Exception(err)
    finally:
        try:
            client.remove_bucket(bucket_name)
        except Exception as err:
            raise Exception(err)

    # Test passes
    print(log_output.json_report())
Example #8
0
def test_csv_input_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # Invalid quote character, should fail
        ('""', b'col1,col2,col3\n', Exception()),
        # UTF-8 quote character
        ('ع',
         b'\xd8\xb9col1\xd8\xb9,\xd8\xb9col2\xd8\xb9,\xd8\xb9col3\xd8\xb9\n',
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        # Only one field is quoted
        ('"', b'"col1",col2,col3\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'
         ),
        ('"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
        ('\'', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1","col2","col3"\n',
         b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
        ('"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
    ]

    try:
        client.make_bucket(bucket_name)

        for idx, (quote_char, object_content,
                  expected_output) in enumerate(tests):
            options = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter=quote_char,
                        QuoteEscapeCharacter=quote_char,
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(
                    json=JsonOutput(RecordDelimiter="\n", )),
                request_progress=RequestProgress(enabled="False"))

            got_output = b''

            try:
                got_output = exec_select(client, bucket_name, object_content,
                                         options, log_output)
            except Exception as select_err:
                if not isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {} unexpectedly failed with: {}'.format(
                            idx + 1, select_err))
            else:
                if isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {}: expected an exception, got {}'.format(
                            idx + 1, got_output))
                if got_output != expected_output:
                    raise ValueError(
                        'Test {}: data mismatch. Expected : {}, Received {}'.
                        format(idx + 1, expected_output, got_output))

    except Exception as err:
        raise Exception(err)
    finally:
        try:
            client.remove_bucket(bucket_name)
        except Exception as err:
            raise Exception(err)

    # Test passes
    print(log_output.json_report())
Example #9
0
                                  InputSerialization, OutputSerialization,
                                  CSVOutput, JsonOutput)

client = Minio('s3.amazonaws.com',
               access_key='YOUR-ACCESSKEY',
               secret_key='YOUR-SECRETKEY')

options = SelectObjectOptions(
    expression="select * from s3object",
    input_serialization=InputSerialization(
        compression_type="NONE",
        csv=CSVInput(
            FileHeaderInfo="USE",
            RecordDelimiter="\n",
            FieldDelimiter=",",
            QuoteCharacter='"',
            QuoteEscapeCharacter='"',
            Comments="#",
            AllowQuotedRecordDelimiter="FALSE",
        ),
        # If input is JSON
        # json=JSONInput(Type="DOCUMENT",)
    ),
    output_serialization=OutputSerialization(
        csv=CSVOutput(
            QuoteFields="ASNEEDED",
            RecordDelimiter="\n",
            FieldDelimiter=",",
            QuoteCharacter='"',
            QuoteEscapeCharacter='"',
        )
Example #10
0
# from minio.select.options import JsonInput
# from minio.select.options import ParquetInput

client = Minio('s3.amazonaws.com',
               access_key='YOUR-ACCESSKEY',
               secret_key='YOUR-SECRETKEY')

options = SelectObjectOptions(
    expression="select * from s3object",
    input_serialization=InputSerialization(
        compression_type="NONE",
        csv=CSVInput(
            file_header_info="USE",
            record_delimiter="\n",
            field_delimiter=",",
            quote_character='"',
            quote_escape_character='"',
            comments="#",
            allow_quoted_record_delimiter="FALSE",
        ),
        # If input is JSON
        # json=JSONInput(json_type="DOCUMENT")
    ),
    output_serialization=OutputSerialization(
        csv=CSVOutput(
            quote_fields="ASNEEDED",
            record_delimiter="\n",
            field_delimiter=",",
            quote_character='"',
            quote_escape_character='"',
        ),