def _create_random_table(self,
     table_name,
     min_col_count,
     max_col_count,
     allowed_storage_formats):
   '''Create and return a Table with a random number of cols.'''
   col_count = randint(min_col_count, max_col_count)
   storage_format = choice(allowed_storage_formats)
   table = Table(table_name)
   table.storage_format = storage_format
   allowed_types = list(TYPES)
   # Avro doesn't support timestamps yet.
   if table.storage_format == 'AVRO':
     allowed_types.remove(Timestamp)
   # TODO: 'table.cols' returns a copy of all scalar cols, so 'table.cols.append()'
   #       doesn't actually modify the table's columns. 'table.cols' should be changed
   #       to allow access to the real columns.
   cols = table.cols
   for col_idx in xrange(col_count):
     col_type = choice(allowed_types)
     col_type = choice(filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES))
     if issubclass(col_type, VarChar) and not issubclass(col_type, String):
       col_type = get_varchar_class(randint(1, VarChar.MAX))
     elif issubclass(col_type, Char) and not issubclass(col_type, String):
       col_type = get_char_class(randint(1, Char.MAX))
     elif issubclass(col_type, Decimal):
       max_digits = randint(1, Decimal.MAX_DIGITS)
       col_type = get_decimal_class(max_digits, randint(1, max_digits))
     col = Column(
         table,
         '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1),
         col_type)
     cols.append(col)
   table.cols = cols
   return table
예제 #2
0
 def _create_random_table(self, table_name, min_col_count, max_col_count,
                          allowed_storage_formats):
     '''Create and return a Table with a random number of cols.'''
     col_count = randint(min_col_count, max_col_count)
     storage_format = choice(allowed_storage_formats)
     table = Table(table_name)
     table.storage_format = storage_format
     allowed_types = list(TYPES)
     # Avro doesn't support timestamps yet.
     if table.storage_format == 'AVRO':
         allowed_types.remove(Timestamp)
     # TODO: 'table.cols' returns a copy of all scalar cols, so 'table.cols.append()'
     #       doesn't actually modify the table's columns. 'table.cols' should be changed
     #       to allow access to the real columns.
     cols = table.cols
     for col_idx in xrange(col_count):
         col_type = choice(allowed_types)
         col_type = choice(
             filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES))
         if issubclass(col_type,
                       VarChar) and not issubclass(col_type, String):
             col_type = get_varchar_class(randint(1, VarChar.MAX))
         elif issubclass(col_type,
                         Char) and not issubclass(col_type, String):
             col_type = get_char_class(randint(1, Char.MAX))
         elif issubclass(col_type, Decimal):
             max_digits = randint(1, Decimal.MAX_DIGITS)
             col_type = get_decimal_class(max_digits,
                                          randint(1, max_digits))
         col = Column(
             table, '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1),
             col_type)
         cols.append(col)
     table.cols = cols
     return table
예제 #3
0
    def create_column(self, col_name, col_type):
        ''' Takes the output from parse_col_desc and creates the right column type. This
    method returns one of Column, ArrayColumn, MapColumn, StructColumn.'''
        if isinstance(col_type, str):
            if col_type.upper() == 'VARCHAR':
                col_type = 'STRING'
            type_name = self.TYPE_NAME_ALIASES.get(col_type.upper())
            return Column(owner=None,
                          name=col_name.lower(),
                          exact_type=self.TYPES_BY_NAME[type_name])

        general_class = col_type[0]

        if general_class.upper() == 'ARRAY':
            return ArrayColumn(owner=None,
                               name=col_name.lower(),
                               item=self.create_column(col_name='item',
                                                       col_type=col_type[1]))

        if general_class.upper() == 'MAP':
            return MapColumn(owner=None,
                             name=col_name.lower(),
                             key=self.create_column(col_name='key',
                                                    col_type=col_type[1]),
                             value=self.create_column(col_name='value',
                                                      col_type=col_type[2]))

        if general_class.upper() == 'STRUCT':
            struct_col = StructColumn(owner=None, name=col_name.lower())
            for field_name, field_type in col_type[1:]:
                struct_col.add_col(self.create_column(field_name, field_type))
            return struct_col

        general_class = self.TYPE_NAME_ALIASES.get(col_type[0].upper())

        if general_class.upper() == 'DECIMAL':
            return Column(owner=None,
                          name=col_name.lower(),
                          exact_type=get_decimal_class(int(col_type[1]),
                                                       int(col_type[2])))

        if general_class.upper() == 'CHAR':
            return Column(owner=None,
                          name=col_name.lower(),
                          exact_type=get_char_class(int(col_type[1])))

        if general_class.upper() == 'VARCHAR':
            type_size = int(col_type[1])
            if type_size <= VarChar.MAX:
                cur_type = get_varchar_class(type_size)
            else:
                cur_type = self.TYPES_BY_NAME['STRING']
            return Column(owner=None,
                          name=col_name.lower(),
                          exact_type=cur_type)

        raise Exception('unable to parse: {0}, type: {1}'.format(
            col_name, col_type))
예제 #4
0
 def parse_data_type(self, type_name, type_size):
   if type_name in ('DECIMAL', 'NUMERIC'):
     return get_decimal_class(*type_size)
   if type_name == 'CHAR':
     return get_char_class(*type_size)
   if type_name == 'VARCHAR':
     if type_size and type_size[0] <= VarChar.MAX:
       return get_varchar_class(*type_size)
     type_name = 'STRING'
   return self.TYPES_BY_NAME[type_name]
예제 #5
0
  def create_column(self, col_name, col_type):
    ''' Takes the output from parse_col_desc and creates the right column type. This
    method returns one of Column, ArrayColumn, MapColumn, StructColumn.'''
    if isinstance(col_type, str):
      if col_type.upper() == 'VARCHAR':
        col_type = 'STRING'
      type_name = self.TYPE_NAME_ALIASES.get(col_type.upper())
      return Column(owner=None,
          name=col_name.lower(),
          exact_type=self.TYPES_BY_NAME[type_name])

    general_class = col_type[0]

    if general_class.upper() == 'ARRAY':
      return ArrayColumn(
          owner=None,
          name=col_name.lower(),
          item=self.create_column(col_name='item', col_type=col_type[1]))

    if general_class.upper() == 'MAP':
      return MapColumn(
          owner=None,
          name=col_name.lower(),
          key=self.create_column(col_name='key', col_type=col_type[1]),
          value=self.create_column(col_name='value', col_type=col_type[2]))

    if general_class.upper() == 'STRUCT':
      struct_col = StructColumn(owner=None, name=col_name.lower())
      for field_name, field_type in col_type[1:]:
        struct_col.add_col(self.create_column(field_name, field_type))
      return struct_col

    general_class = self.TYPE_NAME_ALIASES.get(col_type[0].upper())

    if general_class.upper() == 'DECIMAL':
      return Column(owner=None,
          name=col_name.lower(),
          exact_type=get_decimal_class(int(col_type[1]), int(col_type[2])))

    if general_class.upper() == 'CHAR':
      return Column(owner=None,
          name=col_name.lower(),
          exact_type=get_char_class(int(col_type[1])))

    if general_class.upper() == 'VARCHAR':
      type_size = int(col_type[1])
      if type_size <= VarChar.MAX:
        cur_type = get_varchar_class(type_size)
      else:
        cur_type = self.TYPES_BY_NAME['STRING']
      return Column(owner=None,
          name=col_name.lower(),
          exact_type=cur_type)

    raise Exception('unable to parse: {0}, type: {1}'.format(col_name, col_type))