コード例 #1
0
  def test_aggregation(self, vector):
    exec_option = vector.get_value('exec_option')
    disable_codegen = exec_option['disable_codegen']
    data_type, agg_func = (vector.get_value('data_type'), vector.get_value('agg_func'))

    query = 'select %s(%s_col) from alltypesagg where day is not null' % (agg_func,
        data_type)
    result = self.execute_query(query, exec_option,
       table_format=vector.get_value('table_format'))
    assert len(result.data) == 1
    self.verify_agg_result(agg_func, data_type, False, result.data[0]);

    if not disable_codegen:
      # Verify codegen was enabled for the preaggregation.
      # It is deliberately disabled for the merge aggregation.
      assert_codegen_enabled(result.runtime_profile, [1])

    query = 'select %s(DISTINCT(%s_col)) from alltypesagg where day is not null' % (
        agg_func, data_type)
    result = self.execute_query(query, vector.get_value('exec_option'))
    assert len(result.data) == 1
    self.verify_agg_result(agg_func, data_type, True, result.data[0]);

    if not disable_codegen:
      # Verify codegen was enabled for all stages of the aggregation.
      assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
コード例 #2
0
  def test_group_concat(self, vector):
    """group_concat distinct tests
       Required to run directly in python because the order in which results will be
       merged at the final, single-node aggregation step is non-deterministic (if the
       first phase is running on multiple nodes). Need to pull the result apart and
   compare the actual items)"""
    exec_option = vector.get_value('exec_option')
    disable_codegen = exec_option['disable_codegen']
    table_format = vector.get_value('table_format')
    # Test group_concat distinct with other aggregate function and groupings.
    # expected result is the row: 2010,'1, 2, 3, 4','1-2-3-4','2|3|1|4',40,4
    query = """select year, group_concat(distinct string_col),
    group_concat(distinct string_col, '-'), group_concat(distinct string_col, '|'),
    count(string_col), count(distinct string_col)
    from alltypesagg where int_col < 5 and year = 2010 group by year"""
    result = self.execute_query(query, exec_option, table_format=table_format)
    row = (result.data)[0].split("\t")
    assert(len(row) == 6)
    assert(row[0] == '2010')
    delimiter = [', ', '-', '|']
    for i in range(1, 4):
      assert(set(row[i].split(delimiter[i-1])) == set(['1', '2', '3', '4']))
    assert(row[4] == '40')
    assert(row[5] == '4')
    check_codegen_enabled = not disable_codegen and not USING_OLD_AGGS_JOINS
    if check_codegen_enabled:
      # Verify codegen was enabled for all three stages of the aggregation.
      assert_codegen_enabled(result.runtime_profile, [1, 2, 4])

    # Test group_concat distinct with arrow delimiter, with multiple rows
    query = """select day, group_concat(distinct string_col, "->")
    from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
    group by day order by day"""
    result = self.execute_query(query, exec_option, table_format=table_format)
    string_col = []
    string_col.append(set(['1','101','201','301','401','501','601','701','801','901']))
    string_col.append(set(['2','102','202','302','402','502','602','702','802','902']))
    string_col.append(set(['3','103','203','303','403','503','603','703','803','903']))
    string_col.append(set(['4','104','204','304','404','504','604','704','804','904']))
    string_col.append(set(['5','105','205','305','405','505','605','705','805','905']))
    string_col.append(set(['6','106','206','306','406','506','606','706','806','906']))
    string_col.append(set(['7','107','207','307','407','507','607','707','807','907']))
    string_col.append(set(['8','108','208','308','408','508','608','708','808','908']))
    string_col.append(set(['9','109','209','309','409','509','609','709','809','909']))
    string_col.append(set(['10','110','210','310','410','510','610','710','810','910']))
    assert(len(result.data) == 10)
    for i in range(10):
      row = (result.data)[i].split("\t")
      assert(len(row) == 2)
      assert(row[0] == str(i+1))
      assert(set(row[1].split("->")) == string_col[i])

    # Test group_concat distinct with merge node
    query = """select group_concat(distinct string_col, ' ') from alltypesagg
    where int_col < 10"""
    result = self.execute_query(query, exec_option, table_format=table_format)
    assert(set((result.data)[0].split(" ")) == set(['1','2','3','4','5','6','7','8','9']))
    if check_codegen_enabled:
      # Verify codegen was enabled for all four stages of the aggregation.
      assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
コード例 #3
0
  def test_aggregation(self, vector):
    exec_option = vector.get_value('exec_option')
    disable_codegen = exec_option['disable_codegen']
    # The old aggregation node does not support codegen for all aggregate functions.
    check_codegen_enabled = not disable_codegen and not USING_OLD_AGGS_JOINS
    data_type, agg_func = (vector.get_value('data_type'), vector.get_value('agg_func'))

    query = 'select %s(%s_col) from alltypesagg where day is not null' % (agg_func,
        data_type)
    result = self.execute_query(query, exec_option,
       table_format=vector.get_value('table_format'))
    assert len(result.data) == 1
    self.verify_agg_result(agg_func, data_type, False, result.data[0]);

    if check_codegen_enabled:
      # Verify codegen was enabled for the preaggregation.
      # It is deliberately disabled for the merge aggregation.
      assert_codegen_enabled(result.runtime_profile, [1])

    query = 'select %s(DISTINCT(%s_col)) from alltypesagg where day is not null' % (
        agg_func, data_type)
    result = self.execute_query(query, vector.get_value('exec_option'))
    assert len(result.data) == 1
    self.verify_agg_result(agg_func, data_type, True, result.data[0]);

    if check_codegen_enabled:
      # Verify codegen was enabled for all stages of the aggregation.
      assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
コード例 #4
0
  def test_group_concat(self, vector):
    """group_concat distinct tests
       Required to run directly in python because the order in which results will be
       merged at the final, single-node aggregation step is non-deterministic (if the
       first phase is running on multiple nodes). Need to pull the result apart and
   compare the actual items)"""
    exec_option = vector.get_value('exec_option')
    disable_codegen = exec_option['disable_codegen']
    table_format = vector.get_value('table_format')
    # Test group_concat distinct with other aggregate function and groupings.
    # expected result is the row: 2010,'1, 2, 3, 4','1-2-3-4','2|3|1|4',40,4
    query = """select year, group_concat(distinct string_col),
    group_concat(distinct string_col, '-'), group_concat(distinct string_col, '|'),
    count(string_col), count(distinct string_col)
    from alltypesagg where int_col < 5 and year = 2010 group by year"""
    result = self.execute_query(query, exec_option, table_format=table_format)
    row = (result.data)[0].split("\t")
    assert(len(row) == 6)
    assert(row[0] == '2010')
    delimiter = [', ', '-', '|']
    for i in range(1, 4):
      assert(set(row[i].split(delimiter[i-1])) == set(['1', '2', '3', '4']))
    assert(row[4] == '40')
    assert(row[5] == '4')
    if not disable_codegen:
      # Verify codegen was enabled for all three stages of the aggregation.
      assert_codegen_enabled(result.runtime_profile, [1, 2, 4])

    # Test group_concat distinct with arrow delimiter, with multiple rows
    query = """select day, group_concat(distinct string_col, "->")
    from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
    group by day order by day"""
    result = self.execute_query(query, exec_option, table_format=table_format)
    string_col = []
    string_col.append(set(['1','101','201','301','401','501','601','701','801','901']))
    string_col.append(set(['2','102','202','302','402','502','602','702','802','902']))
    string_col.append(set(['3','103','203','303','403','503','603','703','803','903']))
    string_col.append(set(['4','104','204','304','404','504','604','704','804','904']))
    string_col.append(set(['5','105','205','305','405','505','605','705','805','905']))
    string_col.append(set(['6','106','206','306','406','506','606','706','806','906']))
    string_col.append(set(['7','107','207','307','407','507','607','707','807','907']))
    string_col.append(set(['8','108','208','308','408','508','608','708','808','908']))
    string_col.append(set(['9','109','209','309','409','509','609','709','809','909']))
    string_col.append(set(['10','110','210','310','410','510','610','710','810','910']))
    assert(len(result.data) == 10)
    for i in range(10):
      row = (result.data)[i].split("\t")
      assert(len(row) == 2)
      assert(row[0] == str(i+1))
      assert(set(row[1].split("->")) == string_col[i])

    # Test group_concat distinct with merge node
    query = """select group_concat(distinct string_col, ' ') from alltypesagg
    where int_col < 10"""
    result = self.execute_query(query, exec_option, table_format=table_format)
    assert(set((result.data)[0].split(" ")) == set(['1','2','3','4','5','6','7','8','9']))
    if not disable_codegen:
      # Verify codegen was enabled for all four stages of the aggregation.
      assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
コード例 #5
0
 def test_select_node_codegen(self, vector):
   """Test that select node is codegened"""
   result = self.execute_query('select * from (select * from functional.alltypes '
       'limit 1000000) t1 where int_col > 10 limit 10')
   exec_options = get_node_exec_options(result.runtime_profile, 1)
   # Make sure test fails if there are no exec options in the profile for the node
   assert len(exec_options) > 0
   assert_codegen_enabled(result.runtime_profile, [1])
コード例 #6
0
 def test_select_node_codegen(self, vector):
   """Test that select node is codegened"""
   result = self.execute_query('select * from (select * from functional.alltypes '
       'limit 1000000) t1 where int_col > 10 limit 10')
   exec_options = get_node_exec_options(result.runtime_profile, 1)
   # Make sure test fails if there are no exec options in the profile for the node
   assert len(exec_options) > 0
   assert_codegen_enabled(result.runtime_profile, [1])