Beispiel #1
0
    def test_bin_vals(self):

        # constructor
        cnt = Counter()
        for i in range(10):
            cnt[i * 2] = i

        vc = ValueCounts(key='x', counts=cnt)
        bin_specs = {'bin_width': 1, 'bin_offset': 0}

        h = Histogram(vc, variable='x', bin_specs=bin_specs)

        h_bin_vals = h.get_bin_vals()
        self.assertIsInstance(h_bin_vals, tuple)
        self.assertEqual(len(h_bin_vals), 2)
        h_bin_entries, h_bin_edges = h_bin_vals[0], h_bin_vals[1]

        bin_entries = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        self.assertIsInstance(h_bin_entries, np.ndarray)
        self.assertListEqual(h_bin_entries.tolist(), bin_entries)
        bin_edges = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 19]
        self.assertIsInstance(h_bin_edges, np.ndarray)
        self.assertListEqual(h_bin_edges.tolist(), bin_edges)

        h_bin_vals = h.get_bin_vals(variable_range=[5.5, 12.5])
        self.assertIsInstance(h_bin_vals, tuple)
        self.assertEqual(len(h_bin_vals), 2)
        h_bin_entries, h_bin_edges = h_bin_vals[0], h_bin_vals[1]

        bin_entries = [0, 3, 0, 4, 0, 5, 0, 6]
        self.assertIsInstance(h_bin_entries, np.ndarray)
        self.assertListEqual(h_bin_entries.tolist(), bin_entries)
        bin_edges = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
        self.assertIsInstance(h_bin_edges, np.ndarray)
        self.assertListEqual(h_bin_edges.tolist(), bin_edges)
Beispiel #2
0
    def test_bin_edges(self):

        # constructor
        cnt = Counter()
        for i in range(10):
            cnt[i * 2] = i

        vc = ValueCounts(key='x', counts=cnt)
        bin_specs = {'bin_width': 1, 'bin_offset': 0}

        h = Histogram(vc, variable='x', bin_specs=bin_specs)

        # uniform
        bin_edges = [
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
            19
        ]
        self.assertListEqual(h.get_uniform_bin_edges(), bin_edges)

        # truncated uniform bin edges
        truncated_bin_edges = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
        self.assertListEqual(h.truncated_bin_edges([5.5, 12.5]),
                             truncated_bin_edges)

        h_bin_edges = h.bin_edges()
        self.assertIsInstance(h_bin_edges, np.ndarray)
        self.assertListEqual(h_bin_edges.tolist(), bin_edges)
Beispiel #3
0
    def process_and_store(self):
        """Make, clean, and store ValueCount objects"""

        # nothing to do?
        if self.store_key_hists is None and self.store_key_counts is None:
            return

        proc_mgr = ProcessManager()
        ds = proc_mgr.service(DataStore)

        # 1. construct value counts
        for col in self.columns:
            name = ':'.join(col)
            vc = ValueCounts(col, col, self._counts[name])
            # remove all items from Counters where the key is not of correct datatype.
            # e.g. in Counter dict of ints, remove any non-ints that may arise
            # from dq issues.
            if self.drop_inconsistent_key_types:
                vc = self.drop_inconsistent_keys(col, vc)
            self._valcnts[name] = vc

        if self.store_key_counts is not None:
            ds[self.store_key_counts] = self._valcnts

        # 2. construct hists from value counts
        if self.store_key_hists is None:
            return

        for col in self.columns:
            if len(col) != 1:
                continue
            name = ':'.join(col)
            dt = np.dtype(self.var_dtype[name]).type()
            is_number = isinstance(dt, np.number)
            is_timestamp = isinstance(dt, np.datetime64)

            # bin_specs is used for converting index back to original var in
            # histogram class.
            bin_specs = {}
            if is_number:
                bin_specs = self.bin_specs.get(name, self._unit_bin_specs)
            elif is_timestamp:
                bin_specs = self.bin_specs.get(name,
                                               self._unit_timestamp_specs)
            h = Histogram(self._valcnts[name],
                          variable=name,
                          datatype=self.var_dtype[name],
                          bin_specs=bin_specs)
            self._hists[name] = h
        # and store
        ds[self.store_key_hists] = self._hists

        # cleanup
        if self.store_key_counts is None:
            del self._valcnts
        if self.store_key_hists is None:
            del self._hists
Beispiel #4
0
    def test_constructor2(self):

        # constructor
        cnt = Counter()
        for i in range(10):
            cnt[i * 2] = i

        vc = ValueCounts(key='x', counts=cnt)
        bin_specs = {'bin_width': 1, 'bin_offset': 0}

        h = Histogram(vc, variable='x', bin_specs=bin_specs)

        self.assertIsInstance(h, Histogram)
Beispiel #5
0
    def test_bin_range(self):

        # constructor
        cnt = Counter()
        for i in range(10):
            cnt[i * 2] = i

        vc = ValueCounts(key='x', counts=cnt)
        bin_specs = {'bin_width': 1, 'bin_offset': 0}

        h = Histogram(vc, variable='x', bin_specs=bin_specs)

        bin_range = (0, 19)
        self.assertTupleEqual(h.get_bin_range(), bin_range)
Beispiel #6
0
    def test_contents(self):

        # constructor
        cnt = Counter()
        for i in range(10):
            cnt[i * 2] = i

        vc = ValueCounts(key='x', counts=cnt)
        bin_specs = {'bin_width': 1, 'bin_offset': 0}

        h = Histogram(vc, variable='x', bin_specs=bin_specs)

        self.assertEqual(h.n_dim, 1)
        self.assertEqual(h.n_bins, 10)
        self.assertEqual(h._val_counts.sum_counts, 45)
Beispiel #7
0
    def test_bin_labels(self):

        # constructor
        cnt = Counter()
        for i in range(10):
            cnt[i * 2] = i

        vc = ValueCounts(key='x', counts=cnt)
        bin_specs = {'bin_width': 1, 'bin_offset': 0}

        h = Histogram(vc, variable='x', bin_specs=bin_specs)

        bin_labels = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
        h_bin_labels = h.bin_labels()
        self.assertIsInstance(h_bin_labels, np.ndarray)
        self.assertListEqual(h_bin_labels.tolist(), bin_labels)
Beispiel #8
0
    def test_bin_centers(self):

        # constructor
        cnt = Counter()
        for i in range(10):
            cnt[i * 2] = i

        vc = ValueCounts(key='x', counts=cnt)
        bin_specs = {'bin_width': 1, 'bin_offset': 0}

        h = Histogram(vc, variable='x', bin_specs=bin_specs)

        bin_centers = [0.5, 2.5, 4.5, 6.5, 8.5, 10.5, 12.5, 14.5, 16.5, 18.5]
        h_bin_centers = h.bin_centers()
        self.assertIsInstance(h_bin_centers, np.ndarray)
        self.assertListEqual(h_bin_centers.tolist(), bin_centers)