def _create_random_table(self, table_name, min_col_count, max_col_count, allowed_storage_formats): '''Create and return a Table with a random number of cols.''' col_count = randint(min_col_count, max_col_count) storage_format = choice(allowed_storage_formats) table = Table(table_name) table.storage_format = storage_format allowed_types = list(TYPES) # Avro doesn't support timestamps yet. if table.storage_format == 'AVRO': allowed_types.remove(Timestamp) # TODO: 'table.cols' returns a copy of all scalar cols, so 'table.cols.append()' # doesn't actually modify the table's columns. 'table.cols' should be changed # to allow access to the real columns. cols = table.cols for col_idx in xrange(col_count): col_type = choice(allowed_types) col_type = choice( filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES)) if issubclass(col_type, VarChar) and not issubclass(col_type, String): col_type = get_varchar_class(randint(1, VarChar.MAX)) elif issubclass(col_type, Char) and not issubclass(col_type, String): col_type = get_char_class(randint(1, Char.MAX)) elif issubclass(col_type, Decimal): max_digits = randint(1, Decimal.MAX_DIGITS) col_type = get_decimal_class(max_digits, randint(1, max_digits)) col = Column( table, '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1), col_type) cols.append(col) table.cols = cols return table
def _create_random_table(self, table_name, min_col_count, max_col_count, allowed_storage_formats): '''Create and return a Table with a random number of cols.''' col_count = randint(min_col_count, max_col_count) storage_format = choice(allowed_storage_formats) table = Table(table_name) table.storage_format = storage_format allowed_types = list(TYPES) # Avro doesn't support timestamps yet. if table.storage_format == 'AVRO': allowed_types.remove(Timestamp) # TODO: 'table.cols' returns a copy of all scalar cols, so 'table.cols.append()' # doesn't actually modify the table's columns. 'table.cols' should be changed # to allow access to the real columns. cols = table.cols for col_idx in xrange(col_count): col_type = choice(allowed_types) col_type = choice(filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES)) if issubclass(col_type, VarChar) and not issubclass(col_type, String): col_type = get_varchar_class(randint(1, VarChar.MAX)) elif issubclass(col_type, Char) and not issubclass(col_type, String): col_type = get_char_class(randint(1, Char.MAX)) elif issubclass(col_type, Decimal): max_digits = randint(1, Decimal.MAX_DIGITS) col_type = get_decimal_class(max_digits, randint(1, max_digits)) col = Column( table, '%s_col_%s' % (col_type.__name__.lower(), col_idx + 1), col_type) cols.append(col) table.cols = cols return table
def create_column(self, col_name, col_type): ''' Takes the output from parse_col_desc and creates the right column type. This method returns one of Column, ArrayColumn, MapColumn, StructColumn.''' if isinstance(col_type, str): if col_type.upper() == 'VARCHAR': col_type = 'STRING' type_name = self.TYPE_NAME_ALIASES.get(col_type.upper()) return Column(owner=None, name=col_name.lower(), exact_type=self.TYPES_BY_NAME[type_name]) general_class = col_type[0] if general_class.upper() == 'ARRAY': return ArrayColumn(owner=None, name=col_name.lower(), item=self.create_column(col_name='item', col_type=col_type[1])) if general_class.upper() == 'MAP': return MapColumn(owner=None, name=col_name.lower(), key=self.create_column(col_name='key', col_type=col_type[1]), value=self.create_column(col_name='value', col_type=col_type[2])) if general_class.upper() == 'STRUCT': struct_col = StructColumn(owner=None, name=col_name.lower()) for field_name, field_type in col_type[1:]: struct_col.add_col(self.create_column(field_name, field_type)) return struct_col general_class = self.TYPE_NAME_ALIASES.get(col_type[0].upper()) if general_class.upper() == 'DECIMAL': return Column(owner=None, name=col_name.lower(), exact_type=get_decimal_class(int(col_type[1]), int(col_type[2]))) if general_class.upper() == 'CHAR': return Column(owner=None, name=col_name.lower(), exact_type=get_char_class(int(col_type[1]))) if general_class.upper() == 'VARCHAR': type_size = int(col_type[1]) if type_size <= VarChar.MAX: cur_type = get_varchar_class(type_size) else: cur_type = self.TYPES_BY_NAME['STRING'] return Column(owner=None, name=col_name.lower(), exact_type=cur_type) raise Exception('unable to parse: {0}, type: {1}'.format( col_name, col_type))
def parse_data_type(self, type_name, type_size): if type_name in ('DECIMAL', 'NUMERIC'): return get_decimal_class(*type_size) if type_name == 'CHAR': return get_char_class(*type_size) if type_name == 'VARCHAR': if type_size and type_size[0] <= VarChar.MAX: return get_varchar_class(*type_size) type_name = 'STRING' return self.TYPES_BY_NAME[type_name]
def create_column(self, col_name, col_type): ''' Takes the output from parse_col_desc and creates the right column type. This method returns one of Column, ArrayColumn, MapColumn, StructColumn.''' if isinstance(col_type, str): if col_type.upper() == 'VARCHAR': col_type = 'STRING' type_name = self.TYPE_NAME_ALIASES.get(col_type.upper()) return Column(owner=None, name=col_name.lower(), exact_type=self.TYPES_BY_NAME[type_name]) general_class = col_type[0] if general_class.upper() == 'ARRAY': return ArrayColumn( owner=None, name=col_name.lower(), item=self.create_column(col_name='item', col_type=col_type[1])) if general_class.upper() == 'MAP': return MapColumn( owner=None, name=col_name.lower(), key=self.create_column(col_name='key', col_type=col_type[1]), value=self.create_column(col_name='value', col_type=col_type[2])) if general_class.upper() == 'STRUCT': struct_col = StructColumn(owner=None, name=col_name.lower()) for field_name, field_type in col_type[1:]: struct_col.add_col(self.create_column(field_name, field_type)) return struct_col general_class = self.TYPE_NAME_ALIASES.get(col_type[0].upper()) if general_class.upper() == 'DECIMAL': return Column(owner=None, name=col_name.lower(), exact_type=get_decimal_class(int(col_type[1]), int(col_type[2]))) if general_class.upper() == 'CHAR': return Column(owner=None, name=col_name.lower(), exact_type=get_char_class(int(col_type[1]))) if general_class.upper() == 'VARCHAR': type_size = int(col_type[1]) if type_size <= VarChar.MAX: cur_type = get_varchar_class(type_size) else: cur_type = self.TYPES_BY_NAME['STRING'] return Column(owner=None, name=col_name.lower(), exact_type=cur_type) raise Exception('unable to parse: {0}, type: {1}'.format(col_name, col_type))