def test_bin_vals(self): # constructor cnt = Counter() for i in range(10): cnt[i * 2] = i vc = ValueCounts(key='x', counts=cnt) bin_specs = {'bin_width': 1, 'bin_offset': 0} h = Histogram(vc, variable='x', bin_specs=bin_specs) h_bin_vals = h.get_bin_vals() self.assertIsInstance(h_bin_vals, tuple) self.assertEqual(len(h_bin_vals), 2) h_bin_entries, h_bin_edges = h_bin_vals[0], h_bin_vals[1] bin_entries = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] self.assertIsInstance(h_bin_entries, np.ndarray) self.assertListEqual(h_bin_entries.tolist(), bin_entries) bin_edges = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 19] self.assertIsInstance(h_bin_edges, np.ndarray) self.assertListEqual(h_bin_edges.tolist(), bin_edges) h_bin_vals = h.get_bin_vals(variable_range=[5.5, 12.5]) self.assertIsInstance(h_bin_vals, tuple) self.assertEqual(len(h_bin_vals), 2) h_bin_entries, h_bin_edges = h_bin_vals[0], h_bin_vals[1] bin_entries = [0, 3, 0, 4, 0, 5, 0, 6] self.assertIsInstance(h_bin_entries, np.ndarray) self.assertListEqual(h_bin_entries.tolist(), bin_entries) bin_edges = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] self.assertIsInstance(h_bin_edges, np.ndarray) self.assertListEqual(h_bin_edges.tolist(), bin_edges)
def test_bin_edges(self): # constructor cnt = Counter() for i in range(10): cnt[i * 2] = i vc = ValueCounts(key='x', counts=cnt) bin_specs = {'bin_width': 1, 'bin_offset': 0} h = Histogram(vc, variable='x', bin_specs=bin_specs) # uniform bin_edges = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ] self.assertListEqual(h.get_uniform_bin_edges(), bin_edges) # truncated uniform bin edges truncated_bin_edges = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] self.assertListEqual(h.truncated_bin_edges([5.5, 12.5]), truncated_bin_edges) h_bin_edges = h.bin_edges() self.assertIsInstance(h_bin_edges, np.ndarray) self.assertListEqual(h_bin_edges.tolist(), bin_edges)
def process_and_store(self): """Make, clean, and store ValueCount objects""" # nothing to do? if self.store_key_hists is None and self.store_key_counts is None: return proc_mgr = ProcessManager() ds = proc_mgr.service(DataStore) # 1. construct value counts for col in self.columns: name = ':'.join(col) vc = ValueCounts(col, col, self._counts[name]) # remove all items from Counters where the key is not of correct datatype. # e.g. in Counter dict of ints, remove any non-ints that may arise # from dq issues. if self.drop_inconsistent_key_types: vc = self.drop_inconsistent_keys(col, vc) self._valcnts[name] = vc if self.store_key_counts is not None: ds[self.store_key_counts] = self._valcnts # 2. construct hists from value counts if self.store_key_hists is None: return for col in self.columns: if len(col) != 1: continue name = ':'.join(col) dt = np.dtype(self.var_dtype[name]).type() is_number = isinstance(dt, np.number) is_timestamp = isinstance(dt, np.datetime64) # bin_specs is used for converting index back to original var in # histogram class. bin_specs = {} if is_number: bin_specs = self.bin_specs.get(name, self._unit_bin_specs) elif is_timestamp: bin_specs = self.bin_specs.get(name, self._unit_timestamp_specs) h = Histogram(self._valcnts[name], variable=name, datatype=self.var_dtype[name], bin_specs=bin_specs) self._hists[name] = h # and store ds[self.store_key_hists] = self._hists # cleanup if self.store_key_counts is None: del self._valcnts if self.store_key_hists is None: del self._hists
def test_constructor2(self): # constructor cnt = Counter() for i in range(10): cnt[i * 2] = i vc = ValueCounts(key='x', counts=cnt) bin_specs = {'bin_width': 1, 'bin_offset': 0} h = Histogram(vc, variable='x', bin_specs=bin_specs) self.assertIsInstance(h, Histogram)
def test_bin_range(self): # constructor cnt = Counter() for i in range(10): cnt[i * 2] = i vc = ValueCounts(key='x', counts=cnt) bin_specs = {'bin_width': 1, 'bin_offset': 0} h = Histogram(vc, variable='x', bin_specs=bin_specs) bin_range = (0, 19) self.assertTupleEqual(h.get_bin_range(), bin_range)
def test_contents(self): # constructor cnt = Counter() for i in range(10): cnt[i * 2] = i vc = ValueCounts(key='x', counts=cnt) bin_specs = {'bin_width': 1, 'bin_offset': 0} h = Histogram(vc, variable='x', bin_specs=bin_specs) self.assertEqual(h.n_dim, 1) self.assertEqual(h.n_bins, 10) self.assertEqual(h._val_counts.sum_counts, 45)
def test_bin_labels(self): # constructor cnt = Counter() for i in range(10): cnt[i * 2] = i vc = ValueCounts(key='x', counts=cnt) bin_specs = {'bin_width': 1, 'bin_offset': 0} h = Histogram(vc, variable='x', bin_specs=bin_specs) bin_labels = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] h_bin_labels = h.bin_labels() self.assertIsInstance(h_bin_labels, np.ndarray) self.assertListEqual(h_bin_labels.tolist(), bin_labels)
def test_bin_centers(self): # constructor cnt = Counter() for i in range(10): cnt[i * 2] = i vc = ValueCounts(key='x', counts=cnt) bin_specs = {'bin_width': 1, 'bin_offset': 0} h = Histogram(vc, variable='x', bin_specs=bin_specs) bin_centers = [0.5, 2.5, 4.5, 6.5, 8.5, 10.5, 12.5, 14.5, 16.5, 18.5] h_bin_centers = h.bin_centers() self.assertIsInstance(h_bin_centers, np.ndarray) self.assertListEqual(h_bin_centers.tolist(), bin_centers)