def validate(self, table): column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('Percent column must contain Number data.') if self._total is not None and self._total <= 0: raise DataTypeError('The total must be a positive number') # Throw a warning if there are nulls in there if HasNulls(self._column_name).run(table): warn_null_calculation(self, column)
def prepare(self, table): before_column = table.columns[self._before_column_name] after_column = table.columns[self._after_column_name] if not isinstance(before_column.data_type, Number): raise DataTypeError( 'PercentChange before column must contain Number data.') if not isinstance(after_column.data_type, Number): raise DataTypeError( 'PercentChange after column must contain Number data.')
def validate(self, table): before_column = table.columns[self._before_column_name] after_column = table.columns[self._after_column_name] if not isinstance(before_column.data_type, Number): raise DataTypeError('PercentChange before column must contain Number data.') if not isinstance(after_column.data_type, Number): raise DataTypeError('PercentChange after column must contain Number data.') if HasNulls(self._before_column_name).run(table): warn_null_calculation(self, before_column) if HasNulls(self._after_column_name).run(table): warn_null_calculation(self, after_column)
def run(self, table): """ :returns: :class:`decimal.Decimal` """ # If the user has provided a total, use that if self._total is not None: total = self._total # Otherwise compute the sum of all the values in that column to # act as our denominator else: total = table.aggregate(Sum(self._column_name)) # Raise error if sum is less than or equal to zero if total <= 0: raise DataTypeError( 'The sum of column values must be a positive number') # Create a list new rows new_column = [] # Loop through the existing rows for row in table.rows: # Pull the value value = row[self._column_name] if value is None: new_column.append(None) continue # Try to divide it out of the total percent = value / total # And multiply it by 100 percent = percent * 100 # Append the value to the new list new_column.append(percent) # Pass out the list return new_column
def validate(self, table): column = table.columns[self._column_name] if not (isinstance(column.data_type, Number) or isinstance(column.data_type, Date) or isinstance(column.data_type, DateTime)): raise DataTypeError('Min can only be applied to columns containing DateTime orNumber data.')
def validate(self, table): column = table.columns[self._column_name] if not isinstance(column.data_type, (Number, TimeDelta)): raise DataTypeError( 'Sum can only be applied to columns containing Number or TimeDelta data.' )
def validate(self, table): column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError( 'MaxPrecision can only be applied to columns containing Number data.' )
def validate(self, table): column = table.columns[self._column_name] if not isinstance(column.data_type, Text): raise DataTypeError( 'MaxLength can only be applied to columns containing Text data.' )
def prepare(self, table): column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError( 'PercentileRank column must contain Number data.') self._percentiles = Percentiles(self._column_name).run(table)
def merge(cls, tables, row_names=None, column_names=None): """ Create a new table from a sequence of similar tables. This method will not carry over row names from the merged tables, but new row names can be specified with the :code:`row_names` argument. It is possible to limit the columns included in the new :class:`.Table` with :code:`column_names` argument. For example, to only include columns from a specific table, set :code:`column_names` equal to :code:`table.column_names`. :param tables: An sequence of :class:`.Table` instances. :param row_names: See :class:`.Table` for the usage of this parameter. :param column_names: A sequence of column names to include in the new :class:`.Table`. If not specified, all distinct column names from `tables` are included. :returns: A new :class:`.Table`. """ from agate.table import Table new_columns = OrderedDict() for table in tables: for i in range(0, len(table.columns)): if column_names is None or table.column_names[i] in column_names: column_name = table.column_names[i] column_type = table.column_types[i] if column_name in new_columns: if not isinstance(column_type, type(new_columns[column_name])): raise DataTypeError('Tables contain columns with the same names, but different types.') else: new_columns[column_name] = column_type column_keys = new_columns.keys() column_types = new_columns.values() rows = [] for table in tables: # Performance optimization for identical table structures if table.column_names == column_keys and table.column_types == column_types: rows.extend(table.rows) else: for row in table.rows: data = [] for column_key in column_keys: data.append(row.get(column_key, None)) rows.append(Row(data, column_keys)) return Table(rows, column_keys, column_types, row_names=row_names, _is_fork=True)
def run(self, column): """ :returns: :class:`decimal.Decimal`. """ if not isinstance(column.data_type, Number): raise DataTypeError( 'StDev can only be applied to columns containing Number data.') return column.aggregate(Variance()).sqrt()
def validate(self, table): before_column = table.columns[self._before_column_name] after_column = table.columns[self._after_column_name] for data_type in (Number, Date, DateTime, TimeDelta): if isinstance(before_column.data_type, data_type): if not isinstance(after_column.data_type, data_type): raise DataTypeError('Specified columns must be of the same type') if HasNulls(self._before_column_name).run(table): warn_null_calculation(self, before_column) if HasNulls(self._after_column_name).run(table): warn_null_calculation(self, after_column) return raise DataTypeError('Change before and after columns must both contain data that is one of: Number, Date, DateTime or TimeDelta.')
def run(self, column): """ :returns: :class:`decimal.Decimal`. """ if not isinstance(column.data_type, Number): raise DataTypeError( 'Sum can only be applied to columns containing Number data.') return sum(column.values_without_nulls())
def run(self, column): """ :returns: :class:`int`. """ if not isinstance(column.data_type, Text): raise DataTypeError( 'MaxLength can only be applied to columns containing Text data.' ) return max([len(d) for d in column.values_without_nulls()])
def validate(self, table): column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('Deciles can only be applied to columns containing Number data.') has_nulls = HasNulls(self._column_name).run(table) if has_nulls: warn_null_calculation(self, column)
def run(self, table): """ :returns: :class:`decimal.Decimal`. """ column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('PopulationStDev can only be applied to columns containing Number data.') return PopulationVariance(self._column_name).run(table).sqrt()
def run(self, table): """ :returns: :class:`decimal.Decimal`. """ column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('MaxPrecision can only be applied to columns containing Number data.') return max_precision(column.values_without_nulls())
def run(self, column): """ :returns: :class:`decimal.Decimal`. """ if not isinstance(column.data_type, Number): raise DataTypeError( 'Mean can only be applied to columns containing Number data.') if column.aggregate(HasNulls()): warn_null_calculation(self, column) return column.aggregate(Sum()) / len(column.values_without_nulls())
def run(self, column): """ :returns: :class:`datetime.date` """ if not (isinstance(column.data_type, Number) or \ isinstance(column.data_type, Date) or \ isinstance(column.data_type, DateTime)): raise DataTypeError( 'Max can only be applied to columns containing DateTime or Number data.' ) return max(column.values_without_nulls())
def run(self, table): """ :returns: A single value whose type is dependent on the type of the column. """ column = table.columns[self._column_name] if not (isinstance(column.data_type, Number) or \ isinstance(column.data_type, Date) or \ isinstance(column.data_type, DateTime)): raise DataTypeError('Max can only be applied to columns containing DateTime or Number data.') return max(column.values_without_nulls())
def run(self, column): """ :returns: :class:`decimal.Decimal`. """ if not isinstance(column.data_type, Number): raise DataTypeError( 'IQR can only be applied to columns containing Number data.') if column.aggregate(HasNulls()): warn_null_calculation(self, column) percentiles = column.aggregate(Percentiles()) return percentiles[75] - percentiles[25]
def validate(self, table): if issequence(self._column_name): column_names = self._column_name else: column_names = [self._column_name] for column_name in column_names: column = table.columns[column_name] if not isinstance(column.data_type, Text): raise DataTypeError('Slug column must contain Text data.') if HasNulls(column_name).run(table): raise ValueError('Slug column cannot contain `None`.')
def run(self, column): """ :returns: :class:`decimal.Decimal`. """ if not isinstance(column.data_type, Number): raise DataTypeError( 'MAD can only be applied to columns containing Number data.') if column.aggregate(HasNulls()): warn_null_calculation(self, column) data = column.values_without_nulls_sorted() m = column.aggregate(Percentiles())[50] return median(tuple(abs(n - m) for n in data))
def run(self, column): """ :returns: :class:`decimal.Decimal`. """ if not isinstance(column.data_type, Number): raise DataTypeError( 'PopulationVariance can only be applied to columns containing Number data.' ) if column.aggregate(HasNulls()): warn_null_calculation(self, column) data = column.values_without_nulls() mean = column.aggregate(Mean()) return sum((n - mean)**2 for n in data) / len(data)
def run(self, table): """ :returns: :class:`decimal.Decimal`. """ column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('IQR can only be applied to columns containing Number data.') has_nulls = HasNulls(self._column_name).run(table) if has_nulls: warn_null_calculation(self, column) percentiles = Percentiles(self._column_name).run(table) return percentiles[75] - percentiles[25]
def run(self, column): """ :returns: :class:`decimal.Decimal`. """ if not isinstance(column.data_type, Number): raise DataTypeError( 'Mode can only be applied to columns containing Number data.') if column.aggregate(HasNulls()): warn_null_calculation(self, column) data = column.values_without_nulls() state = defaultdict(int) for n in data: state[n] += 1 return max(state.keys(), key=lambda x: state[x])
def run(self, table): """ :returns: :class:`decimal.Decimal`. """ column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('Mean can only be applied to columns containing Number data.') has_nulls = HasNulls(self._column_name).run(table) if has_nulls: warn_null_calculation(self, column) sum_total = Sum(self._column_name).run(table) return sum_total / len(column.values_without_nulls())
def run(self, table): """ :returns: :class:`decimal.Decimal`. """ column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('PopulationVariance can only be applied to columns containing Number data.') has_nulls = HasNulls(self._column_name).run(table) if has_nulls: warn_null_calculation(self, column) data = column.values_without_nulls() mean = Mean(self._column_name).run(table) return sum((n - mean) ** 2 for n in data) / len(data)
def run(self, table): """ :returns: :class:`decimal.Decimal`. """ column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('MAD can only be applied to columns containing Number data.') has_nulls = HasNulls(self._column_name).run(table) if has_nulls: warn_null_calculation(self, column) data = column.values_without_nulls_sorted() m = Median(self._column_name).run(table) return median(tuple(abs(n - m) for n in data))
def run(self, table): """ :returns: :class:`decimal.Decimal`. """ column = table.columns[self._column_name] if not isinstance(column.data_type, Number): raise DataTypeError('Mode can only be applied to columns containing Number data.') has_nulls = HasNulls(self._column_name).run(table) if has_nulls: warn_null_calculation(self, column) data = column.values_without_nulls() state = defaultdict(int) for n in data: state[n] += 1 return max(state.keys(), key=lambda x: state[x])