def test_letter_name(self): self.assertEqual(letter_name(0), 'A') self.assertEqual(letter_name(4), 'E') self.assertEqual(letter_name(25), 'Z') self.assertEqual(letter_name(30), 'EE') self.assertEqual(letter_name(77), 'ZZZ')
def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False): if isinstance(rows, six.string_types): raise ValueError( "When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?" ) # Validate column names if column_names: final_column_names = [] for i, column_name in enumerate(column_names): if column_name is None: new_column_name = utils.letter_name(i) warnings.warn( 'Column name not specified. "%s" will be used as name.' % new_column_name, RuntimeWarning ) elif isinstance(column_name, six.string_types): new_column_name = column_name else: raise ValueError("Column names must be strings or None.") final_column_name = new_column_name duplicates = 0 while final_column_name in final_column_names: final_column_name = new_column_name + "_" + str(duplicates + 2) duplicates += 1 if duplicates > 0: warn_duplicate_column(new_column_name, final_column_name) final_column_names.append(final_column_name) self._column_names = tuple(final_column_names) elif rows: self._column_names = tuple(utils.letter_name(i) for i in range(len(rows[0]))) warnings.warn( 'Column names not specified. "%s" will be used as names.' % str(self._column_names), RuntimeWarning, stacklevel=2, ) else: self._column_names = [] len_column_names = len(self._column_names) # Validate column_types if column_types is None: column_types = TypeTester() elif not isinstance(column_types, TypeTester): for column_type in column_types: if not isinstance(column_type, DataType): raise ValueError("Column types must be instances of DataType.") if isinstance(column_types, TypeTester): self._column_types = column_types.run(rows, self._column_names) else: self._column_types = tuple(column_types) if len_column_names != len(self._column_types): raise ValueError("column_names and column_types must be the same length.") if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError( "Row %i has %i values, but Table only has %i columns." % (i, len_row, len_column_names) ) elif len(row) < len_column_names: row = chain(row, [None] * (len(self.column_names) - len_row)) new_rows.append(Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, "__call__"): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif utils.issequence(row_names): computed_row_names = row_names else: raise ValueError("row_names must be a column name, function or sequence") self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i, (name, data_type) in enumerate(zip(self._column_names, self._column_types)): column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)
def test_letter_name(self): self.assertEqual(letter_name(0), 'a') self.assertEqual(letter_name(4), 'e') self.assertEqual(letter_name(25), 'z') self.assertEqual(letter_name(30), 'ee') self.assertEqual(letter_name(77), 'zzz')
def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False): # Validate column names if column_names: final_column_names = [] for i, column_name in enumerate(column_names): if column_name is None: final_column_names.append(utils.letter_name(i)) elif isinstance(column_name, six.string_types): final_column_names.append(column_name) else: raise ValueError('Column names must be strings or None.') if len(set(final_column_names)) != len(final_column_names): raise ValueError('Duplicate column names are not allowed.') self._column_names = tuple(final_column_names) else: self._column_names = tuple( utils.letter_name(i) for i in range(len(rows[0]))) len_column_names = len(self._column_names) # Validate column_types if column_types is None: column_types = TypeTester() elif isinstance(column_types, TypeTester): pass else: for column_type in column_types: if not isinstance(column_type, DataType): raise ValueError( 'Column types must be instances of DataType.') if isinstance(column_types, TypeTester): self._column_types = column_types.run(rows, self._column_names) else: self._column_types = tuple(column_types) if len_column_names != len(self._column_types): raise ValueError( 'column_names and column_types must be the same length.') if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError( 'Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names)) elif len(row) < len_column_names: row = chain(row, [None] * (len(self.column_names) - len_row)) new_rows.append( Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, '__call__'): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif isinstance(row_names, Sequence): computed_row_names = row_names else: raise ValueError( 'row_names must be a column name, function or sequence') self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i, (name, data_type) in enumerate( zip(self._column_names, self._column_types)): column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)
def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False): if isinstance(rows, six.string_types): raise ValueError( 'When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?' ) # Validate column names if column_names: final_column_names = [] for i, column_name in enumerate(column_names): if column_name is None: new_column_name = utils.letter_name(i) warnings.warn( 'Column name not specified. "%s" will be used as name.' % new_column_name, RuntimeWarning) elif isinstance(column_name, six.string_types): new_column_name = column_name else: raise ValueError('Column names must be strings or None.') final_column_name = new_column_name duplicates = 0 while final_column_name in final_column_names: final_column_name = new_column_name + '_' + str( duplicates + 2) duplicates += 1 if duplicates > 0: warn_duplicate_column(new_column_name, final_column_name) final_column_names.append(final_column_name) self._column_names = tuple(final_column_names) elif rows: self._column_names = tuple( utils.letter_name(i) for i in range(len(rows[0]))) warnings.warn( 'Column names not specified. "%s" will be used as names.' % str(self._column_names), RuntimeWarning, stacklevel=2) else: self._column_names = [] len_column_names = len(self._column_names) # Validate column_types if column_types is None: column_types = TypeTester() elif isinstance(column_types, dict): for v in six.itervalues(column_types): if not isinstance(v, DataType): raise ValueError( 'Column types must be instances of DataType.') column_types = TypeTester(force=column_types) elif not isinstance(column_types, TypeTester): for column_type in column_types: if not isinstance(column_type, DataType): raise ValueError( 'Column types must be instances of DataType.') if isinstance(column_types, TypeTester): self._column_types = column_types.run(rows, self._column_names) else: self._column_types = tuple(column_types) if len_column_names != len(self._column_types): raise ValueError( 'column_names and column_types must be the same length.') if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError( 'Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names)) elif len(row) < len_column_names: row = chain(row, [None] * (len(self.column_names) - len_row)) new_rows.append( Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, '__call__'): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif utils.issequence(row_names): computed_row_names = row_names else: raise ValueError( 'row_names must be a column name, function or sequence') self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i, (name, data_type) in enumerate( zip(self._column_names, self._column_types)): column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)
def __init__(self, rows, column_info, row_names=None, _is_fork=False): column_info = list(column_info) if isinstance(column_info[0], Column): self._column_names = tuple(c.name for c in column_info) self._column_types = tuple(c.data_type for c in column_info) else: column_names, self._column_types = zip(*column_info) self._column_names = [] # Validation for i, column_name in enumerate(column_names): if not column_name: self._column_names.append(letter_name(i)) else: if not isinstance(column_name, six.string_types): raise ValueError('Column names must be strings.') self._column_names.append(column_name) len_column_names = len(self._column_names) if len(set(self._column_names)) != len_column_names: raise ValueError('Duplicate column names are not allowed.') self._column_names = tuple(self._column_names) for column_type in self._column_types: if not isinstance(column_type, DataType): raise ValueError('Column types must be instances of DataType.') if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError('Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names)) elif len(row) < len_column_names: row = chain(row, [None] * (len(self.column_names) - len_row)) new_rows.append(Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, '__call__'): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif isinstance(row_names, Sequence): computed_row_names = row_names else: raise ValueError('row_names must be a column name, function or sequence') self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i, (name, data_type) in enumerate(zip(self._column_names, self._column_types)): column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)
def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False): if isinstance(rows, six.string_types): raise ValueError('When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?') # Validate column names if column_names: final_column_names = [] for i, column_name in enumerate(column_names): if column_name is None: new_column_name = utils.letter_name(i) warnings.warn('Column name not specified. "%s" will be used as name.' % new_column_name, RuntimeWarning) elif isinstance(column_name, six.string_types): new_column_name = column_name else: raise ValueError('Column names must be strings or None.') final_column_name = new_column_name duplicates = 0 while final_column_name in final_column_names: final_column_name = new_column_name + '_' + str(duplicates + 2) duplicates += 1 if duplicates > 0: warn_duplicate_column(new_column_name, final_column_name) final_column_names.append(final_column_name) self._column_names = tuple(final_column_names) elif rows: self._column_names = tuple(utils.letter_name(i) for i in range(len(rows[0]))) warnings.warn('Column names not specified. "%s" will be used as names.' % str(self._column_names), RuntimeWarning, stacklevel=2) else: self._column_names = [] len_column_names = len(self._column_names) # Validate column_types if column_types is None: column_types = TypeTester() elif isinstance(column_types, dict): for v in column_types.values(): if not isinstance(v, DataType): raise ValueError('Column types must be instances of DataType.') column_types = TypeTester(force=column_types) elif not isinstance(column_types, TypeTester): for column_type in column_types: if not isinstance(column_type, DataType): raise ValueError('Column types must be instances of DataType.') if isinstance(column_types, TypeTester): self._column_types = column_types.run(rows, self._column_names) else: self._column_types = tuple(column_types) if len_column_names != len(self._column_types): raise ValueError('column_names and column_types must be the same length.') if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError('Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names)) elif len(row) < len_column_names: row = chain(row, [None] * (len_column_names - len_row)) row_values = [] for j, d in enumerate(row): try: row_values.append(cast_funcs[j](d)) except CastError as e: raise CastError(str(e) + ' Error at row %s column %s.' % (i, self._column_names[j])) new_rows.append(Row(row_values, self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, '__call__'): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif utils.issequence(row_names): computed_row_names = row_names else: raise ValueError('row_names must be a column name, function or sequence') for row_name in computed_row_names: if type(row_name) is int: raise ValueError('Row names cannot be of type int. Use Decimal for numbered row names.') self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i in range(len_column_names): name = self._column_names[i] data_type = self._column_types[i] column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)
def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False): if isinstance(rows, six.string_types): raise ValueError('When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?') # Validate column names if column_names: final_column_names = [] for i, column_name in enumerate(column_names): if column_name is None: final_column_names.append(utils.letter_name(i)) elif isinstance(column_name, six.string_types): final_column_names.append(column_name) else: raise ValueError('Column names must be strings or None.') if len(set(final_column_names)) != len(final_column_names): raise ValueError('Duplicate column names are not allowed.') self._column_names = tuple(final_column_names) elif rows: self._column_names = tuple(utils.letter_name(i) for i in range(len(rows[0]))) else: self._column_names = [] len_column_names = len(self._column_names) # Validate column_types if column_types is None: column_types = TypeTester() elif not isinstance(column_types, TypeTester): for column_type in column_types: if not isinstance(column_type, DataType): raise ValueError('Column types must be instances of DataType.') if isinstance(column_types, TypeTester): self._column_types = column_types.run(rows, self._column_names) else: self._column_types = tuple(column_types) if len_column_names != len(self._column_types): raise ValueError('column_names and column_types must be the same length.') if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError('Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names)) elif len(row) < len_column_names: row = chain(row, [None] * (len(self.column_names) - len_row)) new_rows.append(Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, '__call__'): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif isinstance(row_names, Sequence): computed_row_names = row_names else: raise ValueError('row_names must be a column name, function or sequence') self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i, (name, data_type) in enumerate(zip(self._column_names, self._column_types)): column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)
def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False): if isinstance(rows, six.string_types): raise ValueError( 'When created directly, the first argument to Table must be a sequence of rows. ' 'Did you want agate.Table.from_csv?') # Validate column names if column_names: self._column_names = utils.deduplicate(column_names, column_names=True) elif rows: self._column_names = tuple( utils.letter_name(i) for i in range(len(rows[0]))) warnings.warn( 'Column names not specified. "%s" will be used as names.' % str(self._column_names), RuntimeWarning, stacklevel=2) else: self._column_names = tuple() len_column_names = len(self._column_names) # Validate column_types if column_types is None: column_types = TypeTester() elif isinstance(column_types, dict): for v in column_types.values(): if not isinstance(v, DataType): raise ValueError( 'Column types must be instances of DataType.') column_types = TypeTester(force=column_types) elif not isinstance(column_types, TypeTester): for column_type in column_types: if not isinstance(column_type, DataType): raise ValueError( 'Column types must be instances of DataType.') if isinstance(column_types, TypeTester): self._column_types = column_types.run(rows, self._column_names) else: self._column_types = tuple(column_types) if len_column_names != len(self._column_types): raise ValueError( 'column_names and column_types must be the same length.') if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError( 'Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names)) elif len(row) < len_column_names: row = chain(row, [None] * (len_column_names - len_row)) row_values = [] for j, d in enumerate(row): try: row_values.append(cast_funcs[j](d)) except CastError as e: raise CastError( str(e) + ' Error at row %s column %s.' % (i, self._column_names[j])) new_rows.append(Row(row_values, self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, '__call__'): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif utils.issequence(row_names): computed_row_names = row_names else: raise ValueError( 'row_names must be a column name, function or sequence') for row_name in computed_row_names: if type(row_name) is int: raise ValueError( 'Row names cannot be of type int. Use Decimal for numbered row names.' ) self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i in range(len_column_names): name = self._column_names[i] data_type = self._column_types[i] column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)