def __init__(self, dataset): self.mean = mean(dataset) self.std = std(dataset, self.mean) self.percentiles = percentiles(dataset, PERCENTILES) self.calculated_percentiles = [ self.mean - 2 * self.std, self.mean - self.std, self.mean - 0.67 * self.std, self.mean, self.mean + 0.67 * self.std, self.mean + self.std, self.mean + 2 * self.std ] self.max = max(Dataset.get_num_column_or_list(dataset)) self.min = min(Dataset.get_num_column_or_list(dataset)) self.is_normal = self._is_normal(self.percentiles, self.calculated_percentiles) self.length = len(dataset)
def hist(dataset, **kwargs): defaults = { 'grid': False, 'xlabel': '', 'ylabel': '', 'title': '', 'output': None, } plot_params = { 'bins': 20, 'normed': 1, 'facecolor': 'green', 'alpha': 0.75, } plot_params.update(kwargs) [plot_params.pop(k) for k in defaults if k in plot_params] defaults.update(kwargs) values = list(Dataset.get_num_column_or_list(dataset)) n, bins, patches = plt.hist(values, **plot_params) plt.xlabel(defaults['xlabel']) plt.ylabel(defaults['ylabel']) plt.title(defaults['title']) plt.grid(defaults['grid']) filename = defaults['output'] or get_tmp_file_name('.png') plt.savefig(filename) return filename
def hist(dataset, **kwargs): defaults = { 'grid': False, 'xlabel': '', 'ylabel': '', 'title': '', 'output': None, } graph_params = { 'bins': 20, 'normed': 1, 'facecolor': 'green', 'alpha': 0.75, } graph_params.update(kwargs) defaults.update(kwargs) [graph_params.pop(k) for k in defaults if k in graph_params] values = list(Dataset.get_num_column_or_list(dataset)) n, bins, patches = plt.hist(values, **graph_params) plt.xlabel(defaults['xlabel']) plt.ylabel(defaults['ylabel']) plt.title(defaults['title']) plt.grid(defaults['grid']) filename = defaults['output'] or get_tmp_file_name('.png') plt.savefig(filename) return filename
def __init__(self, dataset): self.mean = mean(dataset) self.std = std(dataset, self.mean) self.percentiles = percentiles(dataset, PERCENTILES) self.calculated_percentiles = [ self.mean - 2 * self.std, self.mean - self.std, self.mean - 0.67 * self.std, self.mean, self.mean + 0.67 * self.std, self.mean + self.std, self.mean + 2 * self.std] self.max = max(Dataset.get_num_column_or_list(dataset)) self.min = min(Dataset.get_num_column_or_list(dataset)) self.is_normal = self._is_normal( self.percentiles, self.calculated_percentiles) self.length = len(dataset)
def mean(dataset): values = Dataset.get_num_column_or_list(dataset) return sum(values) / float(len(dataset))
def percentiles(dataset, vals): n = len(dataset) values = list(Dataset.get_num_column_or_list(dataset)) values.sort() return [_percentile(values, n, p) for p in vals]
def std(dataset, m=None): n = len(dataset) values = Dataset.get_num_column_or_list(dataset) if m is None: m = mean(dataset) return (sum((i - m) ** 2 for i in values) / float(n)) ** 0.5
def mean(dataset): values = Dataset.get_num_column_or_list(dataset) return sum(values) / float(len(dataset))
def percentiles(dataset, vals): n = len(dataset) values = list(Dataset.get_num_column_or_list(dataset)) values.sort() return [_percentile(values, n, p) for p in vals]
def std(dataset, m=None): n = len(dataset) values = Dataset.get_num_column_or_list(dataset) if m is None: m = mean(dataset) return (sum((i - m)**2 for i in values) / float(n))**0.5