Example #1
0
 def test_dataset_iterator(self):
     """Dataset must correctly work as iterable object."""
     dts = Dataset()
     N = 10
     for i in range(N):
         dts.add_row([i])
     for i in range(2):
         self.assertEqual(N, len([i for i in dts]))
Example #2
0
 def test_dataset_iterator(self):
     """Dataset must correctly work as iterable object."""
     dts = Dataset()
     N = 10
     for i in range(N):
         dts.add_row([i])
     for i in range(2):
         self.assertEqual(N, len([i for i in dts]))
Example #3
0
 def test_load_invalid_date_type(self):
     """Test invalid data for dataset fails with verbose message."""
     dts = Dataset([Dataset.DATE])
     try:
         dts.load([['aa-bb-cc']])
         self.fail('dataset has loaded invalid data')
     except dts.ParseError as exc:
         self.assertEqual(
             'Invalid value "aa-bb-cc" in line 1 '
             'for "d" column type (index: 0)', str(exc))
Example #4
0
 def test_load_invalid_date_type(self):
     """Test invalid data for dataset fails with verbose message."""
     dts = Dataset([Dataset.DATE])
     try:
         dts.load([['aa-bb-cc']])
         self.fail('dataset has loaded invalid data')
     except dts.ParseError as exc:
         self.assertEqual('Invalid value "aa-bb-cc" in line 1 '
                          'for "d" column type (index: 0)',
                          str(exc))
Example #5
0
    def test_get_dataset_columns(self):
        """Test column splitting correctness.

        | 0 | 1 | 2 |
        | 3 | 4 | 5 |
        """
        dts = Dataset()
        [dts.add_row(i) for i in chunk(range(6), 3)]
        for i in range(3):
            self.assertEqual([i, i + 3], list(dts.column(i)))
        # test multiple
        self.assertEqual([(0, 1), (3, 4)], list(dts.column(0, 1)))
        self.assertEqual([(1, 2), (4, 5)], list(dts.column(1, 2)))
Example #6
0
    def test_get_dataset_columns(self):
        """Test column splitting correctness.

        | 0 | 1 | 2 |
        | 3 | 4 | 5 |
        """
        dts = Dataset()
        [dts.add_row(i) for i in chunk(range(6), 3)]
        for i in range(3):
            self.assertEqual([i, i + 3], list(dts.column(i)))
        # test multiple
        self.assertEqual([(0, 1), (3, 4)], list(dts.column(0,1)))
        self.assertEqual([(1, 2), (4, 5)], list(dts.column(1,2)))
Example #7
0
 def __init__(self, dataset):
     self.mean = mean(dataset)
     self.std = std(dataset, self.mean)
     self.percentiles = percentiles(dataset, PERCENTILES)
     self.calculated_percentiles = [
         self.mean - 2 * self.std, self.mean - self.std,
         self.mean - 0.67 * self.std, self.mean,
         self.mean + 0.67 * self.std, self.mean + self.std,
         self.mean + 2 * self.std
     ]
     self.max = max(Dataset.get_num_column_or_list(dataset))
     self.min = min(Dataset.get_num_column_or_list(dataset))
     self.is_normal = self._is_normal(self.percentiles,
                                      self.calculated_percentiles)
     self.length = len(dataset)
Example #8
0
File: graph.py Project: amadev/doan
def hist(dataset, **kwargs):
    defaults = {
        'grid': False,
        'xlabel': '',
        'ylabel': '',
        'title': '',
        'output': None,
    }
    graph_params = {
        'bins': 20,
        'normed': 1,
        'facecolor': 'green',
        'alpha': 0.75,
    }
    graph_params.update(kwargs)
    defaults.update(kwargs)
    [graph_params.pop(k) for k in defaults if k in graph_params]

    values = list(Dataset.get_num_column_or_list(dataset))

    n, bins, patches = plt.hist(values, **graph_params)
    plt.xlabel(defaults['xlabel'])
    plt.ylabel(defaults['ylabel'])
    plt.title(defaults['title'])
    plt.grid(defaults['grid'])

    filename = defaults['output'] or get_tmp_file_name('.png')
    plt.savefig(filename)

    return filename
Example #9
0
def hist(dataset, **kwargs):
    defaults = {
        'grid': False,
        'xlabel': '',
        'ylabel': '',
        'title': '',
        'output': None,
    }
    plot_params = {
        'bins': 20,
        'normed': 1,
        'facecolor': 'green',
        'alpha': 0.75,
    }
    plot_params.update(kwargs)
    [plot_params.pop(k) for k in defaults if k in plot_params]
    defaults.update(kwargs)

    values = list(Dataset.get_num_column_or_list(dataset))

    n, bins, patches = plt.hist(values, **plot_params)
    plt.xlabel(defaults['xlabel'])
    plt.ylabel(defaults['ylabel'])
    plt.title(defaults['title'])
    plt.grid(defaults['grid'])

    filename = defaults['output'] or get_tmp_file_name('.png')
    plt.savefig(filename)

    return filename
Example #10
0
File: stat.py Project: amadev/doan
 def __init__(self, dataset):
     self.mean = mean(dataset)
     self.std = std(dataset, self.mean)
     self.percentiles = percentiles(dataset,  PERCENTILES)
     self.calculated_percentiles = [
         self.mean - 2 * self.std,
         self.mean - self.std,
         self.mean - 0.67 * self.std,
         self.mean,
         self.mean + 0.67 * self.std,
         self.mean + self.std,
         self.mean + 2 * self.std]
     self.max = max(Dataset.get_num_column_or_list(dataset))
     self.min = min(Dataset.get_num_column_or_list(dataset))
     self.is_normal = self._is_normal(
         self.percentiles, self.calculated_percentiles)
     self.length = len(dataset)
Example #11
0
File: app.py Project: amadev/metr
def show_metric(metric):
    conn = sqlite3.connect(DB)
    points = get_points(conn, metric)
    conn.close()
    if not os.path.exists(IMAGE_DIR):
        os.makedirs(IMAGE_DIR)
    fname = IMAGE_DIR + '/%s.png' % metric
    d = Dataset([Dataset.DATE, Dataset.FLOAT])
    d.load(points)
    plot_date(d, output=fname, figsize=(14, 7), linestyle='-')

    data = open(fname, 'rb').read()
    r = Response()
    r.body = data
    r.code = '200 OK'
    r.headers = [('content-type', 'image/png'),
                 ('content-length', str(len(r.body)))]
    return r
Example #12
0
def mean(dataset):
    values = Dataset.get_num_column_or_list(dataset)
    return sum(values) / float(len(dataset))
Example #13
0
def std(dataset, m=None):
    n = len(dataset)
    values = Dataset.get_num_column_or_list(dataset)
    if m is None:
        m = mean(dataset)
    return (sum((i - m)**2 for i in values) / float(n))**0.5
Example #14
0
File: stat.py Project: amadev/doan
def mean(dataset):
    values = Dataset.get_num_column_or_list(dataset)
    return sum(values) / float(len(dataset))
Example #15
0
File: stat.py Project: amadev/doan
def percentiles(dataset, vals):
    n = len(dataset)
    values = list(Dataset.get_num_column_or_list(dataset))
    values.sort()
    return [_percentile(values, n, p) for p in vals]
Example #16
0
File: stat.py Project: amadev/doan
def std(dataset, m=None):
    n = len(dataset)
    values = Dataset.get_num_column_or_list(dataset)
    if m is None:
        m = mean(dataset)
    return (sum((i - m) ** 2  for i in values) / float(n)) ** 0.5
Example #17
0
def percentiles(dataset, vals):
    n = len(dataset)
    values = list(Dataset.get_num_column_or_list(dataset))
    values.sort()
    return [_percentile(values, n, p) for p in vals]