Esempio n. 1
0
    def test_table_run_wrong_from(self):
        input_table = [('Roosevelt', 1858), ('Napoleon', 1769),
                       ('Confucius', -551)]
        query_text = 'select a2 // 10, "name " + a1 from input_table_nonexistent order by a2'
        expected_output_table = [[-56,
                                  'name Confucius'], [176, 'name Napoleon'],
                                 [185, 'name Roosevelt']]
        output_table = []
        warnings = []

        input_iterator = None
        output_writer = rbql_engine.TableWriter(output_table)
        tables_registry = rbql_engine.ListTableRegistry(
            [rbql_engine.ListTableInfo('input_table', input_table, None)],
            normalize_column_names=True)
        with self.assertRaises(Exception) as cm:
            rbql.query(query_text,
                       input_iterator,
                       output_writer,
                       warnings,
                       tables_registry,
                       user_init_code='')
        e = cm.exception
        self.assertEqual(
            'Unable to find input table: "input_table_nonexistent"', str(e))
Esempio n. 2
0
    def test_table_run_simple_join(self):
        input_table = [('Roosevelt', 1858, 'USA'),
                       ('Napoleon', 1769, 'France'),
                       ('Confucius', -551, 'China')]
        join_table = [('China', 1386), ('France', 67), ('USA', 327),
                      ('Russia', 140)]
        query_text = 'select a2 // 10, b2, "name " + a1 FROM my_input_table order by a2 JOIN my_join_table on a3 == b1'
        expected_output_table = [[-56, 1386, 'name Confucius'],
                                 [176, 67, 'name Napoleon'],
                                 [185, 327, 'name Roosevelt']]
        output_table = []
        warnings = []

        input_iterator = None
        output_writer = rbql_engine.TableWriter(output_table)
        tables_registry = rbql_engine.ListTableRegistry(
            [
                rbql_engine.ListTableInfo('my_input_table', input_table, None),
                rbql_engine.ListTableInfo('my_join_table', join_table, None),
                rbql_engine.ListTableInfo('unused_table', [], None)
            ],
            normalize_column_names=True)

        rbql.query(query_text,
                   input_iterator,
                   output_writer,
                   warnings,
                   tables_registry,
                   user_init_code='')
        self.assertEqual(warnings, [])
        self.assertEqual(expected_output_table, output_table)
Esempio n. 3
0
    def test_table_run_simple(self):
        input_table = [('Roosevelt', 1858), ('Napoleon', 1769),
                       ('Confucius', -551)]
        query_text = 'select a2 // 10, "name " + a1 from input_table order by a2'
        expected_output_table = [[-56,
                                  'name Confucius'], [176, 'name Napoleon'],
                                 [185, 'name Roosevelt']]
        output_table = []
        warnings = []

        input_iterator = None
        output_writer = rbql_engine.TableWriter(output_table)
        tables_registry = rbql_engine.ListTableRegistry(
            [
                rbql_engine.ListTableInfo('input_table', input_table, None),
                rbql_engine.ListTableInfo('unused_table', [], None)
            ],
            normalize_column_names=True)

        rbql.query(query_text,
                   input_iterator,
                   output_writer,
                   warnings,
                   tables_registry,
                   user_init_code='')
        self.assertEqual(warnings, [])
        self.assertEqual(expected_output_table, output_table)
Esempio n. 4
0
    def test_simple_case(self):
        input_table = list()
        input_table.append(['name', 'value'])
        input_table.append(['abc', '1234'])
        input_table.append(['abc', '1234'])
        input_table.append(['efg', '100'])
        input_table.append(['abc', '100'])
        input_table.append(['cde', '12999'])
        input_table.append(['aaa', '2000'])
        input_table.append(['abc', '100'])

        expected_table = list()
        expected_table.append(['name', 'col2'])
        expected_table.append(['abc', '12340'])
        expected_table.append(['abc', '12340'])
        expected_table.append(['abc', '1000'])
        expected_table.append(['abc', '1000'])

        delim = ','
        policy = 'quoted'
        csv_data = table_to_csv_string_random(input_table, delim, policy)
        input_stream, encoding = string_to_randomly_encoded_stream(csv_data)

        input_iterator = rbql_csv.CSVRecordIterator(input_stream,
                                                    encoding,
                                                    delim=delim,
                                                    policy=policy,
                                                    has_header=True)

        output_stream = io.BytesIO() if encoding is not None else io.StringIO()
        output_writer = rbql_csv.CSVWriter(output_stream, False, encoding,
                                           delim, policy)

        warnings = []
        rbql.query('select a.name, int(a.value) * 10 where a.name == "abc"',
                   input_iterator, output_writer, warnings)
        input_stream.close()
        self.assertEqual(warnings, [])

        output_stream.seek(0)
        output_iterator = rbql_csv.CSVRecordIterator(output_stream,
                                                     encoding,
                                                     delim=delim,
                                                     policy=policy)
        output_table = output_iterator.get_all_records()
        output_stream.close()
        self.assertEqual(expected_table, output_table)
Esempio n. 5
0
    def _do_test_random_headers(self):
        num_rows = natural_random(0, 10)
        num_cols = natural_random(2, 10)
        input_table = list()
        expected_table = list()

        header_row = list()
        for col in range(num_cols):
            while True:
                if random.choice([True, False]):
                    field_name_len = natural_random(1, 10)
                    field_name_bytes = []
                    for c in range(field_name_len):
                        field_name_bytes.append(random.randint(32, 126))
                    field_name = bytes(
                        bytearray(field_name_bytes)).decode('ascii')
                else:
                    field_name = random.choice(
                        ['_foo', 'bar', 'Bar', '__foo', 'a', 'b', 'A', 'B'])
                if field_name not in header_row:
                    header_row.append(field_name)
                    break
        input_table.append(header_row[:])
        expected_table.append(header_row[:])
        all_col_nums = list(range(num_cols))
        query_col_1 = random.choice(all_col_nums)
        all_col_nums.remove(query_col_1)
        query_col_2 = random.choice(all_col_nums)
        for row_id in range(num_rows):
            is_good_row = True
            row = list()
            for col_id in range(num_cols):
                if col_id == query_col_1:
                    field_value = random.choice(
                        ['foo bar good', 'foo bar bad'])
                    if field_value != 'foo bar good':
                        is_good_row = False
                elif col_id == query_col_2:
                    field_value = random.choice(['10', '0'])
                    if field_value != '10':
                        is_good_row = False
                else:
                    field_value = make_random_decoded_binary_csv_entry(
                        0, 10, restricted_chars=['\r', '\n'])
                row.append(field_value)
            input_table.append(row[:])
            if is_good_row:
                expected_table.append(row[:])
        query_col_name_1 = make_column_variable(header_row[query_col_1])
        query_col_name_2 = make_column_variable(header_row[query_col_2])
        query = 'select * where ({}.endswith("good") and int({}) * 2 == 20)'.format(
            query_col_name_1, query_col_name_2)

        delim = ','
        policy = 'quoted'
        csv_data = table_to_csv_string_random(input_table, delim, policy)
        encoding = 'latin-1'
        stream = io.BytesIO(csv_data.encode(encoding))
        input_stream, encoding = string_to_randomly_encoded_stream(csv_data)

        input_iterator = rbql_csv.CSVRecordIterator(input_stream,
                                                    encoding,
                                                    delim=delim,
                                                    policy=policy,
                                                    has_header=True)

        output_stream = io.BytesIO() if encoding is not None else io.StringIO()
        output_writer = rbql_csv.CSVWriter(output_stream, False, encoding,
                                           delim, policy)

        warnings = []
        rbql.query(query, input_iterator, output_writer, warnings)
        input_stream.close()
        self.assertEqual(warnings, [])

        output_stream.seek(0)
        output_iterator = rbql_csv.CSVRecordIterator(output_stream,
                                                     encoding,
                                                     delim=delim,
                                                     policy=policy)
        output_table = output_iterator.get_all_records()
        output_stream.close()
        self.assertEqual(expected_table, output_table)