Esempio n. 1
0
 def create_random_table(self,
     table_name,
     min_number_of_cols,
     max_number_of_cols,
     allowed_storage_formats):
   '''Create and return a Table with a random number of cols.'''
   col_count = randint(min_number_of_cols, max_number_of_cols)
   storage_format = choice(allowed_storage_formats)
   table = Table(table_name)
   table.storage_format = storage_format
   for col_idx in xrange(col_count):
     col_type = choice(TYPES)
     col_type = choice(filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES))
     if issubclass(col_type, VarChar) and not issubclass(col_type, String):
       col_type = get_varchar_class(randint(1, VarChar.MAX))
     elif issubclass(col_type, Char) and not issubclass(col_type, String):
       col_type = get_char_class(randint(1, Char.MAX))
     elif issubclass(col_type, Decimal):
       max_digits = randint(1, Decimal.MAX_DIGITS)
       col_type = get_decimal_class(max_digits, randint(1, max_digits))
     col = Column(
         table,
         '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1),
         col_type)
     table.cols.append(col)
   return table
Esempio n. 2
0
 def _create_random_table(self, table_name, min_col_count, max_col_count,
                          allowed_storage_formats):
     '''Create and return a Table with a random number of cols.'''
     col_count = randint(min_col_count, max_col_count)
     storage_format = choice(allowed_storage_formats)
     table = Table(table_name)
     table.storage_format = storage_format
     allowed_types = list(TYPES)
     # Avro doesn't support timestamps yet.
     if table.storage_format == 'AVRO':
         allowed_types.remove(Timestamp)
     # TODO: 'table.cols' returns a copy of all scalar cols, so 'table.cols.append()'
     #       doesn't actually modify the table's columns. 'table.cols' should be changed
     #       to allow access to the real columns.
     cols = table.cols
     for col_idx in xrange(col_count):
         col_type = choice(allowed_types)
         col_type = choice(
             filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES))
         if issubclass(col_type,
                       VarChar) and not issubclass(col_type, String):
             col_type = get_varchar_class(randint(1, VarChar.MAX))
         elif issubclass(col_type,
                         Char) and not issubclass(col_type, String):
             col_type = get_char_class(randint(1, Char.MAX))
         elif issubclass(col_type, Decimal):
             max_digits = randint(1, Decimal.MAX_DIGITS)
             col_type = get_decimal_class(max_digits,
                                          randint(1, max_digits))
         col = Column(
             table, '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1),
             col_type)
         cols.append(col)
     table.cols = cols
     return table
Esempio n. 3
0
 def _create_random_table(self,
     table_name,
     min_col_count,
     max_col_count,
     allowed_storage_formats):
   '''Create and return a Table with a random number of cols.'''
   col_count = randint(min_col_count, max_col_count)
   storage_format = choice(allowed_storage_formats)
   table = Table(table_name)
   table.storage_format = storage_format
   allowed_types = list(TYPES)
   # Avro doesn't support timestamps yet.
   if table.storage_format == 'AVRO':
     allowed_types.remove(Timestamp)
   # TODO: 'table.cols' returns a copy of all scalar cols, so 'table.cols.append()'
   #       doesn't actually modify the table's columns. 'table.cols' should be changed
   #       to allow access to the real columns.
   cols = table.cols
   for col_idx in xrange(col_count):
     col_type = choice(allowed_types)
     col_type = choice(filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES))
     if issubclass(col_type, VarChar) and not issubclass(col_type, String):
       col_type = get_varchar_class(randint(1, VarChar.MAX))
     elif issubclass(col_type, Char) and not issubclass(col_type, String):
       col_type = get_char_class(randint(1, Char.MAX))
     elif issubclass(col_type, Decimal):
       max_digits = randint(1, Decimal.MAX_DIGITS)
       col_type = get_decimal_class(max_digits, randint(1, max_digits))
     col = Column(
         table,
         '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1),
         col_type)
     cols.append(col)
   table.cols = cols
   return table
Esempio n. 4
0
 def create_random_table(self, table_name, min_number_of_cols,
                         max_number_of_cols, allowed_storage_formats):
     '''Create and return a Table with a random number of cols.'''
     col_count = randint(min_number_of_cols, max_number_of_cols)
     storage_format = choice(allowed_storage_formats)
     table = Table(table_name)
     table.storage_format = storage_format
     for col_idx in xrange(col_count):
         col_type = choice(TYPES)
         col_type = choice(
             filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES))
         if issubclass(col_type,
                       VarChar) and not issubclass(col_type, String):
             col_type = get_varchar_class(randint(1, VarChar.MAX))
         elif issubclass(col_type,
                         Char) and not issubclass(col_type, String):
             col_type = get_char_class(randint(1, Char.MAX))
         elif issubclass(col_type, Decimal):
             max_digits = randint(1, Decimal.MAX_DIGITS)
             col_type = get_decimal_class(max_digits,
                                          randint(1, max_digits))
         col = Column(
             table, '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1),
             col_type)
         table.cols.append(col)
     return table
Esempio n. 5
0
 def create_table_from_describe(self, table_name, describe_rows):
     primary_key_names = self._fetch_primary_key_names(table_name)
     table = Table(table_name.lower())
     for row in describe_rows:
         col_name, data_type = row[:2]
         col_type = self.parse_col_desc(data_type)
         col = self.create_column(col_name, col_type)
         col.is_primary_key = col_name in primary_key_names
         table.add_col(col)
     return table
Esempio n. 6
0
 def create_table_from_describe(self, table_name, describe_rows):
   primary_key_names = self._fetch_primary_key_names(table_name)
   table = Table(table_name.lower())
   for row in describe_rows:
     col_name, data_type = row[:2]
     col_type = self.parse_col_desc(data_type)
     col = self.create_column(col_name, col_type)
     col.is_primary_key = col_name in primary_key_names
     table.add_col(col)
   return table
Esempio n. 7
0
def FakeTable(name, fake_columns):
    """
  Return a Table consisting of one or more FakeColumns. Because Columns are added via
  method, we support nesting here instead.
  """
    table = Table(name)
    if not fake_columns:
        raise Exception('You must supply at least one FakeColumn argument')
    for fake_column in fake_columns:
        table.add_col(fake_column)
    return table
Esempio n. 8
0
def FakeTable(name, fake_columns, storage_format='TEXTFILE'):
  """
  Return a Table consisting of one or more FakeColumns. Because Columns are added via
  method, we support nesting here instead.
  """
  table = Table(name)
  if not fake_columns:
    raise Exception('You must supply at least one FakeColumn argument')
  for fake_column in fake_columns:
    table.add_col(fake_column)
  table.storage_format = storage_format
  return table
Esempio n. 9
0
 def test_table_model(self, cursor, hive_cursor):
     table = Table("some_test_table")
     cursor.drop_table(table.name, if_exists=True)
     table.storage_format = 'textfile'
     table.add_col(Column(table, "bigint_col", BigInt))
     table.add_col(Column(table, "string_col", String))
     cursor.create_table(table)
     try:
         other = hive_cursor.describe_table(table.name)
         assert other.name == table.name
         assert other.cols == table.cols
     finally:
         cursor.drop_table(table.name)
Esempio n. 10
0
 def test_table_model(self, cursor, hive_cursor):
   table = Table("some_test_table")
   cursor.drop_table(table.name, if_exists=True)
   table.storage_format = 'textfile'
   table.add_col(Column(table, "bigint_col", BigInt))
   table.add_col(Column(table, "string_col", String))
   cursor.create_table(table)
   try:
     other = hive_cursor.describe_table(table.name)
     assert other.name == table.name
     assert other.cols == table.cols
   finally:
     cursor.drop_table(table.name)
Esempio n. 11
0
 def describe_table(self, table_name):
   '''Return a Table with table and col names always in lowercase.'''
   rows = self.conn.execute_and_fetchall(
       self.make_describe_table_sql(table_name))
   table = Table(table_name.lower())
   cols = table.cols   # This is a copy
   for row in rows:
     col_name, data_type = row[:2]
     if data_type == 'tinyint(1)':
       # Just assume this is a boolean...
       data_type = 'boolean'
     if 'decimal' not in data_type and '(' in data_type:
       # Strip the size of the data type
       data_type = data_type[:data_type.index('(')]
     cols.append(Column(table, col_name.lower(), self.parse_data_type(data_type)))
   table.cols = cols
   return table
Esempio n. 12
0
 def describe_table(self, table_name):
   '''Return a Table with table and col names always in lowercase.'''
   rows = self.conn.execute_and_fetchall(
       self.make_describe_table_sql(table_name))
   table = Table(table_name.lower())
   cols = table.cols   # This is a copy
   for row in rows:
     col_name, data_type = row[:2]
     if data_type == 'tinyint(1)':
       # Just assume this is a boolean...
       data_type = 'boolean'
     if 'decimal' not in data_type and '(' in data_type:
       # Strip the size of the data type
       data_type = data_type[:data_type.index('(')]
     cols.append(Column(table, col_name.lower(), self.parse_data_type(data_type)))
   table.cols = cols
   return table
def test_hive_create_equality_only_joins():
  """
  Tests that QueryGenerator produces a join condition with only equality functions if the
  HiveProfile is used.
  """

  class FakeHiveQueryProfile(HiveProfile):
    """
    A fake QueryProfile that extends the HiveProfile, various weights are modified in
    order to ensure that this test is deterministic.
    """

    def choose_join_condition_count(self):
      """
      There should be only one operator in the JOIN condition
      """
      return 1

    def choose_conjunct_disjunct_fill_ratio(self):
      """
      There should be no AND or OR operators
      """
      return 0

    def choose_relational_func_fill_ratio(self):
      """
      Force all operators to be relational
      """
      return 1

  query_generator = QueryGenerator(FakeHiveQueryProfile())

  # Create two tables that have one joinable Column
  right_table_expr_list = TableExprList()
  right_table = Table("right_table")
  right_table.add_col(Column("right_table", "right_col", Int))
  right_table_expr_list.append(right_table)

  left_table_expr_list = TableExprList()
  left_table = Table("left_table")
  left_table.add_col(Column("left_table", "left_col", Int))
  left_table_expr_list.append(left_table)

  # Validate the root predicate is an Equals funcs
  assert isinstance(query_generator._create_relational_join_condition(
    right_table_expr_list, left_table_expr_list), Equals)
Esempio n. 14
0
def test_use_nested_width_subquery():
  """
  Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this
  method to return False should prevent a WITH clause from being used inside a sub-query.
  """

  class MockQueryProfile(DefaultProfile):
    """
    A mock QueryProfile that sets use_nested_with to False and forces the
    QueryGenerator to created nested queries.
    """

    def __init__(self):
      super(MockQueryProfile, self).__init__()

      # Force the QueryGenerator to create nested queries
      self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4)

      # Force the QueryGenerator to use WITH clauses whenever possible
      self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1

      # Force the QueryGenerator to create inline views whenever possible
      self._probabilities['MISC']['INLINE_VIEW'] = 1

    def use_nested_with(self):
      return False

  mock_query_gen = QueryGenerator(MockQueryProfile())

  # Create two tables
  table_expr_list = TableExprList()

  right_table = Table("right_table")
  right_table.add_col(Column("right_table", "right_col", Int))
  table_expr_list.append(right_table)

  left_table = Table("left_table")
  left_table.add_col(Column("left_table", "left_col", Int))
  table_expr_list.append(left_table)

  # Check that each nested_query doesn't have a with clause
  for nested_query in mock_query_gen.generate_statement(table_expr_list).nested_queries:
    assert nested_query.with_clause is None
Esempio n. 15
0
 def describe_table(self, table_name):
   '''Return a Table with table and col names always in lowercase.'''
   rows = self.execute_and_fetchall(self.make_describe_table_sql(table_name))
   table = Table(table_name.lower())
   for row in rows:
     col_name, data_type = row[:2]
     match = self.SQL_TYPE_PATTERN.match(data_type)
     if not match:
       raise Exception('Unexpected data type format: %s' % data_type)
     type_name = self.TYPE_NAME_ALIASES.get(match.group(1).upper())
     if not type_name:
       raise Exception('Unknown data type: ' + match.group(1))
     if len(match.groups()) > 1 and match.group(2) is not None:
       type_size = [int(size) for size in match.group(2)[1:-1].split(',')]
     else:
       type_size = None
     table.cols.append(
         Column(table, col_name.lower(), self.parse_data_type(type_name, type_size)))
   self.load_unique_col_metadata(table)
   return table
def test_use_nested_width_subquery():
  """
  Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this
  method to return False should prevent a WITH clause from being used inside a sub-query.
  """

  class MockQueryProfile(DefaultProfile):
    """
    A mock QueryProfile that sets use_nested_with to False and forces the
    QueryGenerator to created nested queries.
    """

    def __init__(self):
      super(MockQueryProfile, self).__init__()

      # Force the QueryGenerator to create nested queries
      self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4)

      # Force the QueryGenerator to use WITH clauses whenever possible
      self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1

      # Force the QueryGenerator to create inline views whenever possible
      self._probabilities['MISC']['INLINE_VIEW'] = 1

    def use_nested_with(self):
      return False

  mock_query_gen = QueryGenerator(MockQueryProfile())

  # Create two tables
  table_expr_list = TableExprList()

  right_table = Table("right_table")
  right_table.add_col(Column("right_table", "right_col", Int))
  table_expr_list.append(right_table)

  left_table = Table("left_table")
  left_table.add_col(Column("left_table", "left_col", Int))
  table_expr_list.append(left_table)

  # Check that each nested_query doesn't have a with clause
  for nested_query in mock_query_gen.create_query(table_expr_list).nested_queries:
    assert nested_query.with_clause is None