Esempio n. 1
0
    def test_simple_case(self):
        input_table = list()
        input_table.append(['name', 'value'])
        input_table.append(['abc', '1234'])
        input_table.append(['abc', '1234'])
        input_table.append(['efg', '100'])
        input_table.append(['abc', '100'])
        input_table.append(['cde', '12999'])
        input_table.append(['aaa', '2000'])
        input_table.append(['abc', '100'])

        expected_table = list()
        expected_table.append(['abc', '12340'])
        expected_table.append(['abc', '12340'])
        expected_table.append(['abc', '1000'])
        expected_table.append(['abc', '1000'])

        delim = ','
        policy = 'quoted'
        csv_data = table_to_csv_string_random(input_table, delim, policy)
        input_stream, encoding = string_to_randomly_encoded_stream(csv_data)

        input_iterator = rbql_csv.CSVRecordIterator(input_stream, True, encoding, delim=delim, policy=policy)

        output_stream = io.BytesIO() if encoding is not None else io.StringIO()
        output_writer = rbql_csv.CSVWriter(output_stream, False, encoding, delim, policy)

        error_info, warnings = rbql.generic_run('select a.name, int(a.value) * 10 where NR > 1 and a.name == "abc"', input_iterator, output_writer)
        self.assertEqual(error_info, None)
        self.assertEqual(warnings, [])

        output_stream.seek(0)
        output_iterator = rbql_csv.CSVRecordIterator(output_stream, True, encoding, delim=delim, policy=policy)
        output_table = output_iterator.get_all_records()
        self.assertEqual(expected_table, output_table)
Esempio n. 2
0
def write_and_parse_back(table, encoding, delim, policy):
    writer_stream = io.BytesIO() if encoding is not None else io.StringIO()
    line_separator = random.choice(line_separators)
    writer = rbql_csv.CSVWriter(writer_stream, False, encoding, delim, policy, line_separator)
    writer._write_all(table)
    assert not len(writer.get_warnings())
    writer_stream.seek(0)
    record_iterator = rbql_csv.CSVRecordIterator(writer_stream, True, encoding, delim=delim, policy=policy)
    parsed_table = record_iterator.get_all_records()
    return parsed_table
Esempio n. 3
0
 def test_monocolumn_write_failure(self):
     encoding = None
     writer_stream =  io.StringIO()
     delim = None
     policy = 'monocolumn'
     table = [["this will not", "work"], ["as monocolumn", "table"]]
     writer = rbql_csv.CSVWriter(writer_stream, True, encoding, delim, policy, '\n')
     with self.assertRaises(Exception) as cm:
         writer._write_all(table)
     e = cm.exception
     self.assertTrue(str(e).find('some records have more than one field') != -1)
Esempio n. 4
0
 def test_output_warnings(self):
     encoding = None
     writer_stream = io.StringIO()
     delim = ','
     policy = 'simple'
     table = [["hello,world", None], ["hello", "world"]]
     writer = rbql_csv.CSVWriter(writer_stream, False, encoding, delim, policy, '\n')
     writer._write_all(table)
     writer_stream.seek(0)
     actual_data = writer_stream.getvalue()
     expected_data = 'hello,world,\nhello,world\n'
     self.assertEqual(expected_data, actual_data)
     actual_warnings = writer.get_warnings()
     expected_warnings = ['None values in output were replaced by empty strings', 'Some output fields contain separator']
     self.assertEqual(expected_warnings, actual_warnings)
Esempio n. 5
0
    def _do_test_random_headers(self):
        num_rows = natural_random(0, 10)
        num_cols = natural_random(2, 10)
        input_table = list()
        expected_table = list()

        header_row = list()
        for col in range(num_cols):
            while True:
                if random.choice([True, False]):
                    field_name_len = natural_random(1, 10)
                    field_name_bytes = []
                    for c in range(field_name_len):
                        field_name_bytes.append(random.randint(32, 126))
                    field_name = bytes(
                        bytearray(field_name_bytes)).decode('ascii')
                else:
                    field_name = random.choice(
                        ['_foo', 'bar', 'Bar', '__foo', 'a', 'b', 'A', 'B'])
                if field_name not in header_row:
                    header_row.append(field_name)
                    break
        input_table.append(header_row[:])
        expected_table.append(header_row[:])
        all_col_nums = list(range(num_cols))
        query_col_1 = random.choice(all_col_nums)
        all_col_nums.remove(query_col_1)
        query_col_2 = random.choice(all_col_nums)
        for row_id in range(num_rows):
            is_good_row = True
            row = list()
            for col_id in range(num_cols):
                if col_id == query_col_1:
                    field_value = random.choice(
                        ['foo bar good', 'foo bar bad'])
                    if field_value != 'foo bar good':
                        is_good_row = False
                elif col_id == query_col_2:
                    field_value = random.choice(['10', '0'])
                    if field_value != '10':
                        is_good_row = False
                else:
                    field_value = make_random_decoded_binary_csv_entry(
                        0, 10, restricted_chars=['\r', '\n'])
                row.append(field_value)
            input_table.append(row[:])
            if is_good_row:
                expected_table.append(row[:])
        query_col_name_1 = make_column_variable(header_row[query_col_1])
        query_col_name_2 = make_column_variable(header_row[query_col_2])
        query = 'select * where ({}.endswith("good") and int({}) * 2 == 20)'.format(
            query_col_name_1, query_col_name_2)

        delim = ','
        policy = 'quoted'
        csv_data = table_to_csv_string_random(input_table, delim, policy)
        encoding = 'latin-1'
        stream = io.BytesIO(csv_data.encode(encoding))
        input_stream, encoding = string_to_randomly_encoded_stream(csv_data)

        input_iterator = rbql_csv.CSVRecordIterator(input_stream,
                                                    encoding,
                                                    delim=delim,
                                                    policy=policy,
                                                    has_header=True)

        output_stream = io.BytesIO() if encoding is not None else io.StringIO()
        output_writer = rbql_csv.CSVWriter(output_stream, False, encoding,
                                           delim, policy)

        warnings = []
        rbql.query(query, input_iterator, output_writer, warnings)
        input_stream.close()
        self.assertEqual(warnings, [])

        output_stream.seek(0)
        output_iterator = rbql_csv.CSVRecordIterator(output_stream,
                                                     encoding,
                                                     delim=delim,
                                                     policy=policy)
        output_table = output_iterator.get_all_records()
        output_stream.close()
        self.assertEqual(expected_table, output_table)