Exemple #1
0
def test_csv_output_custom_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # UTF-8 quote character
        ("''", "''", b'col1,col2,col3\n', Exception()),
        ("'", "'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("", '"', b'col1,col2,col3\n',
         b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
        ('"', '"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
        ('"', '"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
        ('"', '"', b'""""\n', b'""""\n'),
        ('"', '"', b'\n', b''),
        ("'", "\\", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("'", "\\", b'col""1,col2,col3\n', b"'col\"\"1','col2','col3'\n"),
        ("'", "\\", b'col\'1,col2,col3\n', b"'col\\'1','col2','col3'\n"),
        ("'", "\\", b'"col\'1","col2","col3"\n', b"'col\\'1','col2','col3'\n"),
        ("'", "\\", b'col\'\n', b"'col\\''\n"),
        # Two consecutive escaped quotes
        ("'", "\\", b'"a"""""\n', b"'a\"\"'\n"),
    ]

    client.make_bucket(bucket_name)

    try:
        for idx, (quote_char, escape_char, input_data,
                  expected_output) in enumerate(tests):
            sql_opts = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter='"',
                        QuoteEscapeCharacter='"',
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(csv=CSVOutput(
                    QuoteFields="ALWAYS",
                    RecordDelimiter="\n",
                    FieldDelimiter=",",
                    QuoteCharacter=quote_char,
                    QuoteEscapeCharacter=escape_char,
                )),
                request_progress=RequestProgress(enabled="False"))

            test_sql_api(f'test_{idx}', client, bucket_name, input_data,
                         sql_opts, expected_output)
    finally:
        client.remove_bucket(bucket_name)

    # Test passes
    print(log_output.json_report())
Exemple #2
0
    def test_xml_marshal_select(self):
        expected_string = (b'<SelectObjectContentRequest>'
                           b'<Expression>select * from s3object</Expression>'
                           b'<ExpressionType>SQL</ExpressionType>'
                           b'<InputSerialization>'
                           b'<CompressionType>NONE</CompressionType>'
                           b'<CSV><FileHeaderInfo>USE</FileHeaderInfo>'
                           b'<RecordDelimiter>\n</RecordDelimiter>'
                           b'<FieldDelimiter>,</FieldDelimiter>'
                           b'<QuoteCharacter>"</QuoteCharacter>'
                           b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>'
                           b'<Comments>#</Comments>'
                           b'<AllowQuotedRecordDelimiter>false'
                           b'</AllowQuotedRecordDelimiter></CSV>'
                           b'</InputSerialization>'
                           b'<OutputSerialization><CSV>'
                           b'<QuoteFields>ASNEEDED</QuoteFields>'
                           b'<RecordDelimiter>\n</RecordDelimiter>'
                           b'<FieldDelimiter>,</FieldDelimiter>'
                           b'<QuoteCharacter>"</QuoteCharacter>'
                           b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>'
                           b'</CSV></OutputSerialization>'
                           b'<RequestProgress>'
                           b'<Enabled>true</Enabled>'
                           b'</RequestProgress>'
                           b'</SelectObjectContentRequest>')

        options = SelectObjectOptions(
            expression="select * from s3object",
            input_serialization=InputSerialization(
                compression_type="NONE",
                csv=CSVInput(FileHeaderInfo="USE",
                             RecordDelimiter="\n",
                             FieldDelimiter=",",
                             QuoteCharacter='"',
                             QuoteEscapeCharacter='"',
                             Comments="#",
                             AllowQuotedRecordDelimiter="FALSE"),
            ),

            output_serialization=OutputSerialization(
                csv=CSVOutput(QuoteFields="ASNEEDED",
                              RecordDelimiter="\n",
                              FieldDelimiter=",",
                              QuoteCharacter='"',
                              QuoteEscapeCharacter='"')
            ),
            request_progress=RequestProgress(
                enabled="TRUE"
            )
        )
        actual_string = xml_marshal_select(options)
        eq_(expected_string, actual_string)
Exemple #3
0
def test_sql_expressions_custom_input_output(client, input_bytes, sql_input,
                                             sql_output, tests, log_output):
    bucket_name = generate_bucket_name()
    object_name = generate_object_name()

    log_output.args['total_tests'] = 0
    log_output.args['total_success'] = 0

    client.make_bucket(bucket_name)
    try:
        content = io.BytesIO(bytes(input_bytes, 'utf-8'))
        client.put_object(bucket_name, object_name, content, len(input_bytes))

        for idx, (test_name, select_expression,
                  expected_output) in enumerate(tests):
            if select_expression == '':
                continue
            try:
                log_output.args['total_tests'] += 1
                options = SelectObjectOptions(
                    expression=select_expression,
                    input_serialization=sql_input,
                    output_serialization=sql_output,
                    request_progress=RequestProgress(enabled="False"))

                data = client.select_object_content(bucket_name, object_name,
                                                    options)

                # Get the records
                records = io.BytesIO()
                for d in data.stream(10 * 1024):
                    records.write(d.encode('utf-8'))
                got_output = records.getvalue()

                if got_output != expected_output:
                    if type(expected_output) == datetime:
                        # Attempt to parse the date which will throw an exception for any issue
                        datetime.strptime(
                            got_output.decode("utf-8").strip(),
                            '%Y-%m-%dT%H:%M:%S.%f%z')
                    else:
                        raise ValueError(
                            'Test {}: data mismatch. Expected : {}. Received: {}.'
                            .format(idx + 1, expected_output, got_output))

                log_output.args['total_success'] += 1
            except Exception as err:
                continue  # TODO, raise instead
                # raise Exception(err)
    finally:
        client.remove_object(bucket_name, object_name)
        client.remove_bucket(bucket_name)
Exemple #4
0
def test_csv_input_custom_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # Invalid quote character, should fail
        ('""', '"', b'col1,col2,col3\n', Exception()),
        # UTF-8 quote character
        ('ع', '"', 'عcol1ع,عcol2ع,عcol3ع\n'.encode(),
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        # Only one field is quoted
        ('"', '"', b'"col1",col2,col3\n',
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        ('"', '"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
        ('\'', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1","col2","col3"\n',
         b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
        ('"', '"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
        ('"', '"', b'A",B\n', b'{"_1":"A\\"","_2":"B"}\n'),
        ('"', '"', b'A"",B\n', b'{"_1":"A\\"\\"","_2":"B"}\n'),
        ('"', '\\', b'A\\B,C\n', b'{"_1":"A\\\\B","_2":"C"}\n'),
        ('"', '"', b'"A""B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
        ('"', '\\', b'"A\\B","CD"\n', b'{"_1":"AB","_2":"CD"}\n'),
        ('"', '\\', b'"A\\,","CD"\n', b'{"_1":"A,","_2":"CD"}\n'),
        ('"', '\\', b'"A\\"B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
        ('"', '\\', b'"A\\""\n', b'{"_1":"A\\""}\n'),
        ('"', '\\', b'"A\\"\\"B"\n', b'{"_1":"A\\"\\"B"}\n'),
        ('"', '\\', b'"A\\"","\\"B"\n', b'{"_1":"A\\"","_2":"\\"B"}\n'),
    ]

    client.make_bucket(bucket_name)

    try:
        for idx, (quote_char, escape_char, data,
                  expected_output) in enumerate(tests):
            sql_opts = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter=quote_char,
                        QuoteEscapeCharacter=escape_char,
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(
                    json=JsonOutput(RecordDelimiter="\n", )),
                request_progress=RequestProgress(enabled="False"))

            test_sql_api(f'test_{idx}', client, bucket_name, data, sql_opts,
                         expected_output)
    finally:
        client.remove_bucket(bucket_name)

    # Test passes
    print(log_output.json_report())
Exemple #5
0
def test_csv_output_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # UTF-8 quote character
        ("''", b'col1,col2,col3\n', Exception()),
        ("'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("", b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
        ('"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
        ('"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
        ('"', b'\n', b''),
    ]

    try:
        client.make_bucket(bucket_name)

        for idx, (quote_char, object_content,
                  expected_output) in enumerate(tests):
            options = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter='"',
                        QuoteEscapeCharacter='"',
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(csv=CSVOutput(
                    QuoteFields="ALWAYS",
                    RecordDelimiter="\n",
                    FieldDelimiter=",",
                    QuoteCharacter=quote_char,
                    QuoteEscapeCharacter=quote_char,
                )),
                request_progress=RequestProgress(enabled="False"))

            got_output = b''

            try:
                got_output = exec_select(client, bucket_name, object_content,
                                         options, log_output)
            except Exception as select_err:
                if not isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {} unexpectedly failed with: {}'.format(
                            idx + 1, select_err))
            else:
                if isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {}: expected an exception, got {}'.format(
                            idx + 1, got_output))
                if got_output != expected_output:
                    raise ValueError(
                        'Test {}: data mismatch. Expected : {}. Received: {}.'.
                        format(idx + 1, expected_output, got_output))

    except Exception as err:
        raise Exception(err)
    finally:
        try:
            client.remove_bucket(bucket_name)
        except Exception as err:
            raise Exception(err)

    # Test passes
    print(log_output.json_report())
Exemple #6
0
def test_csv_input_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # Invalid quote character, should fail
        ('""', b'col1,col2,col3\n', Exception()),
        # UTF-8 quote character
        ('ع',
         b'\xd8\xb9col1\xd8\xb9,\xd8\xb9col2\xd8\xb9,\xd8\xb9col3\xd8\xb9\n',
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        # Only one field is quoted
        ('"', b'"col1",col2,col3\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'
         ),
        ('"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
        ('\'', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1","col2","col3"\n',
         b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
        ('"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
    ]

    try:
        client.make_bucket(bucket_name)

        for idx, (quote_char, object_content,
                  expected_output) in enumerate(tests):
            options = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter=quote_char,
                        QuoteEscapeCharacter=quote_char,
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(
                    json=JsonOutput(RecordDelimiter="\n", )),
                request_progress=RequestProgress(enabled="False"))

            got_output = b''

            try:
                got_output = exec_select(client, bucket_name, object_content,
                                         options, log_output)
            except Exception as select_err:
                if not isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {} unexpectedly failed with: {}'.format(
                            idx + 1, select_err))
            else:
                if isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {}: expected an exception, got {}'.format(
                            idx + 1, got_output))
                if got_output != expected_output:
                    raise ValueError(
                        'Test {}: data mismatch. Expected : {}, Received {}'.
                        format(idx + 1, expected_output, got_output))

    except Exception as err:
        raise Exception(err)
    finally:
        try:
            client.remove_bucket(bucket_name)
        except Exception as err:
            raise Exception(err)

    # Test passes
    print(log_output.json_report())
Exemple #7
0
        # json=JSONInput(Type="DOCUMENT",)
    ),
    output_serialization=OutputSerialization(
        csv=CSVOutput(
            QuoteFields="ASNEEDED",
            RecordDelimiter="\n",
            FieldDelimiter=",",
            QuoteCharacter='"',
            QuoteEscapeCharacter='"',
        )

        # json = JsonOutput(
        #     RecordDelimiter="\n",
        #     )
    ),
    request_progress=RequestProgress(enabled="False"))

try:
    data = client.select_object_content('your-bucket', 'your-object', options)

    # Get the records
    with open('my-record-file', 'w') as record_data:
        for d in data.stream(10 * 1024):
            record_data.write(d)

    # Get the stats
    print(data.stats())

except SelectMessageError as err:
    print(err)