def test_get_idxs_to_balance_class_count_other_highest(self): self.l[10:60, 1] = 0 self.l[10:30, 1] = 1 bal = Balancer(np.copy(self.l)) counts = bal.get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 20) assert_equals(counts[CLNAME_OTHER], 70) assert_equals(counts[CLNAME_OTHER], np.max(counts.values()), "this test requires class count for %s to be highest!") tolerance_order = 1 idxs = bal.get_idxs_to_balance_class_count(counts.values()) assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10) ), 10 + (70 - 10), tolerance_order) assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 30) ), 20 + (70 - 20), tolerance_order) assert_equals(np.count_nonzero(idxs >= 30), 70, tolerance_order)
def balance_class_count_hdf5(fpath, keys, key_label='label', other_clname=CLNAME_OTHER): """ Resample keys in an HDF5 to generate a near balanced dataset. Returns a dictionary with resampled features and ground truth and indicies from the original label that were sampled. Not suitable for very large datasets. fpath -- path to HDF5 file keys -- keys to resample (e.g. features) Keyword arguments: key_label -- key for ground truth data in HDF5 other_clname -- name for negative class (None if non-existent) """ h_src = h5py.File(fpath, 'r') labls = h_src[key_label][:] bal = Balancer(np.squeeze(labls)) class_count = bal.get_class_count(other_clname=other_clname) idxs = bal.get_idxs_to_balance_class_count(class_count.values()) np.random.shuffle(idxs) # shuffle the array along the first index of a multi-dimensional array, in-place dict_balanced = {key_label : labls[idxs]} for k in keys: dict_balanced[k] = h_src[k][:][idxs] return dict_balanced, idxs
def balance_class_count_hdf5(fpath, keys, key_label='label', other_clname=CLNAME_OTHER): """ Resample keys in an HDF5 to generate a near balanced dataset. Returns a dictionary with resampled features and ground truth and indicies from the original label that were sampled. Not suitable for very large datasets. fpath -- path to HDF5 file keys -- keys to resample (e.g. features) Keyword arguments: key_label -- key for ground truth data in HDF5 other_clname -- name for negative class (None if non-existent) """ h_src = h5py.File(fpath, 'r') labls = h_src[key_label][:] bal = Balancer(np.squeeze(labls)) class_count = bal.get_class_count(other_clname=other_clname) idxs = bal.get_idxs_to_balance_class_count(class_count.values()) np.random.shuffle( idxs ) # shuffle the array along the first index of a multi-dimensional array, in-place dict_balanced = {key_label: labls[idxs]} for k in keys: dict_balanced[k] = h_src[k][:][idxs] return dict_balanced, idxs
def test_get_class_count_no_other(self): counts = Balancer(np.copy(self.l)).get_class_count(other_clname=None) assert_is_instance(counts, dict, "Unexpected return instance type.") assert_list_equal(counts.keys(), range(self.l.shape[-1]), "Expecting a key for each class.") for key in counts.keys(): assert_equals(counts[key], np.sum(self.l[:, int(key)]), "Unexpected count for class '%s'" % (key,)) assert_greater(np.sum(counts.values()), 0)
def test_get_class_count_no_other(self): counts = Balancer(np.copy(self.l)).get_class_count(other_clname=None) assert_is_instance(counts, dict, "Unexpected return instance type.") assert_list_equal(counts.keys(), range(self.l.shape[-1]), "Expecting a key for each class.") for key in counts.keys(): assert_equals(counts[key], np.sum(self.l[:, int(key)]), "Unexpected count for class '%s'" % (key, )) assert_greater(np.sum(counts.values()), 0)
def get_class_count_hdf5(fpath, key_label='label', other_clname=CLNAME_OTHER): """ Count per-class instances in HDF5 and return a dictionary of class ids and per-class count fpath -- path to HDF5 file Keyword arguments: key_label -- key for ground truth data in HDF5 other_clname -- name for negative class (None if non-existent) """ h = h5py.File(fpath, 'r') b = Balancer(np.squeeze(h[key_label])) return b.get_class_count(other_clname=other_clname)
def save_balanced_sampled_class_count_hdf5(fpath, keys, fpath_dst, key_label='label', other_clname=CLNAME_OTHER, chunks=None, target_count=None): """ Resample keys in an HDF5 to generate a near balanced dataset and save into a new HDF5. Returns indicies from the original label that were sampled. Not suitable for very large datasets. Classes with count < target_count will sub-sampled without replacement. Classes with count > target_count will get over-sampled. Classes with count equal to target_count will be copied. fpath -- path to source HDF5 file keys -- keys to resample (e.g. features) fpath_dst -- path to destination HDF5 file Keyword arguments: key_label -- key for ground truth data in HDF5 other_clname -- name for negative class (None if non-existent) chunks -- forward chunks parameter to use during hdf5 writing target_count -- per-class count to target when sampling """ if os.path.abspath(fpath) == os.path.abspath(fpath_dst): raise IOError("Cannot read and write to the same file (%s) (%s)" % (fpath, fpath_dst)) with h5py.File(fpath, 'r') as h_src: labls = h_src[key_label][:] bal = Balancer(np.squeeze(labls)) class_count = bal.get_class_count(other_clname=other_clname) idxs = bal.get_idxs_to_balance_class_count(class_count.values(), target_count) np.random.shuffle( idxs ) # shuffle the array along the first index of a multi-dimensional array, in-place with h5py.File(fpath_dst, 'w') as h_dst: h_dst[key_label] = labls[idxs] for k in keys: dataset_src = h_src[k] shape_new = list(dataset_src.shape) shape_new[0] = len(idxs) dataset_dst = h_dst.create_dataset(k, tuple(shape_new), dataset_src.dtype, chunks=chunks) for idx_dst, idx_src in enumerate(idxs): dataset_dst[idx_dst] = dataset_src[idx_src] return idxs
def save_balanced_sampled_class_count_hdf5(fpath, keys, fpath_dst, key_label='label', other_clname=CLNAME_OTHER, chunks=None, target_count=None ): """ Resample keys in an HDF5 to generate a near balanced dataset and save into a new HDF5. Returns indicies from the original label that were sampled. Not suitable for very large datasets. Classes with count < target_count will sub-sampled without replacement. Classes with count > target_count will get over-sampled. Classes with count equal to target_count will be copied. fpath -- path to source HDF5 file keys -- keys to resample (e.g. features) fpath_dst -- path to destination HDF5 file Keyword arguments: key_label -- key for ground truth data in HDF5 other_clname -- name for negative class (None if non-existent) chunks -- forward chunks parameter to use during hdf5 writing target_count -- per-class count to target when sampling """ if os.path.abspath(fpath) == os.path.abspath(fpath_dst): raise IOError("Cannot read and write to the same file (%s) (%s)" % (fpath, fpath_dst)) with h5py.File(fpath, 'r') as h_src: labls = h_src[key_label][:] bal = Balancer(np.squeeze(labls)) class_count = bal.get_class_count(other_clname=other_clname) idxs = bal.get_idxs_to_balance_class_count(class_count.values(), target_count) np.random.shuffle(idxs) # shuffle the array along the first index of a multi-dimensional array, in-place with h5py.File(fpath_dst, 'w') as h_dst: h_dst[key_label] = labls[idxs] for k in keys: dataset_src = h_src[k] shape_new = list(dataset_src.shape) shape_new[0] = len(idxs) dataset_dst = h_dst.create_dataset(k, tuple(shape_new), dataset_src.dtype, chunks=chunks) for idx_dst, idx_src in enumerate(idxs): dataset_dst[idx_dst] = dataset_src[idx_src] return idxs
def test_get_class_count_other_empty(self): other_clname = 'other_class_bin' counts = Balancer(np.copy(self.l)).get_class_count(other_clname=other_clname) assert_is_instance(counts, dict, "Unexpected return instance type.") assert_equals(len(counts.keys()), self.l.shape[-1] + 1, "Expecting a key for each class + 1 for 'other'.") assert_in(other_clname, counts.keys()) for key in counts.keys(): if key == other_clname: assert_equals(counts[key], 0, "Unexpected count for 'other' class") else: assert_equals(counts[key], np.sum(self.l[:, int(key)]), "Unexpected count for class '%s'" % (key,))
def test_get_idxs_to_balance_class_count_other_not_highest(self): bal = Balancer(np.copy(self.l)) counts = bal.get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 50) assert_equals(counts[CLNAME_OTHER], 40) tolerance_order = 1 idxs = bal.get_idxs_to_balance_class_count(counts.values()) assert_almost_equal( np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10)), 10 + (50 - 10), tolerance_order) assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)), 50, 1) assert_almost_equal(np.count_nonzero(idxs >= 60), 40 + (50 - 40), tolerance_order)
def test_get_class_count_other_empty(self): other_clname = 'other_class_bin' counts = Balancer(np.copy( self.l)).get_class_count(other_clname=other_clname) assert_is_instance(counts, dict, "Unexpected return instance type.") assert_equals(len(counts.keys()), self.l.shape[-1] + 1, "Expecting a key for each class + 1 for 'other'.") assert_in(other_clname, counts.keys()) for key in counts.keys(): if key == other_clname: assert_equals(counts[key], 0, "Unexpected count for 'other' class") else: assert_equals(counts[key], np.sum(self.l[:, int(key)]), "Unexpected count for class '%s'" % (key, ))
def test_get_idxs_to_balance_class_count_other_not_highest(self): bal = Balancer(np.copy(self.l)) counts = bal.get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 50) assert_equals(counts[CLNAME_OTHER], 40) for target_count in [500]:#[10, 20, 500]: idxs = bal.sample_idxs_to_target_count(counts.values(), target_count) assert_equals(idxs.size, (self.num_classes + 1) * target_count) assert_equals(np.count_nonzero(idxs < 10), target_count) assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)), target_count) assert_equals(np.count_nonzero(idxs >= 60), target_count)
def test_get_idxs_to_balance_class_count_other_not_highest(self): bal = Balancer(np.copy(self.l)) counts = bal.get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 50) assert_equals(counts[CLNAME_OTHER], 40) for target_count in [500]: #[10, 20, 500]: idxs = bal.sample_idxs_to_target_count(counts.values(), target_count) assert_equals(idxs.size, (self.num_classes + 1) * target_count) assert_equals(np.count_nonzero(idxs < 10), target_count) assert_equals( np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)), target_count) assert_equals(np.count_nonzero(idxs >= 60), target_count)
def test_get_idxs_to_balance_class_count_other_not_highest(self): bal = Balancer(np.copy(self.l)) counts = bal.get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 50) assert_equals(counts[CLNAME_OTHER], 40) tolerance_order = 1 idxs = bal.get_idxs_to_balance_class_count(counts.values()) assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10) ), 10 + (50 - 10), tolerance_order) assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60) ), 50, 1) assert_almost_equal(np.count_nonzero(idxs >= 60), 40 + (50 - 40), tolerance_order)
def test_get_class_count_other_non_empty(self): other_clname = 'foo' n, num_classes = self.l.shape # append label vector for 'other' class labls = np.vstack((self.l, np.zeros((n * 2, num_classes), dtype=self.l.dtype))) counts = Balancer(labls).get_class_count(other_clname=other_clname) assert_is_instance(counts, dict, "Unexpected return instance type.") assert_equals(len(counts.keys()), self.l.shape[-1] + 1, "Expecting a key for each class + 1 for 'other'.") assert_in(other_clname, counts.keys()) for key in counts.keys(): if key == other_clname: assert_equals(counts[key], n * 2, "Unexpected count for '%s' class" % (other_clname,)) else: assert_equals(counts[key], np.sum(self.l[:, int(key)]), "Unexpected count for class '%s'" % (key,))
def test_get_idxs_to_balance_class_count_no_other(self): new_col = np.zeros((len(self.l), 1)) labls = np.hstack((self.l, new_col)) labls[60:, -1] = 1 bal = Balancer(labls) counts = bal.get_class_count(other_clname=None) assert_not_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 50) assert_equals(counts[2], 40) tolerance_order = 1 idxs = bal.get_idxs_to_balance_class_count(counts.values()) assert_almost_equal( np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10)), 10 + (50 - 10), tolerance_order) assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)), 50, 1) assert_almost_equal(np.count_nonzero(idxs >= 60), 40 + (50 - 40), tolerance_order)
def test_get_idxs_to_balance_class_count_other_highest(self): self.l[10:60, 1] = 0 self.l[10:30, 1] = 1 bal = Balancer(np.copy(self.l)) counts = bal.get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 20) assert_equals(counts[CLNAME_OTHER], 70) assert_equals(counts[CLNAME_OTHER], np.max(counts.values()), "this test requires class count for %s to be highest!") tolerance_order = 1 idxs = bal.get_idxs_to_balance_class_count(counts.values()) assert_almost_equal( np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10)), 10 + (70 - 10), tolerance_order) assert_almost_equal( np.count_nonzero(np.logical_and(idxs >= 10, idxs < 30)), 20 + (70 - 20), tolerance_order) assert_equals(np.count_nonzero(idxs >= 30), 70, tolerance_order)
def test_get_class_count_other_non_empty(self): other_clname = 'foo' n, num_classes = self.l.shape # append label vector for 'other' class labls = np.vstack( (self.l, np.zeros((n * 2, num_classes), dtype=self.l.dtype))) counts = Balancer(labls).get_class_count(other_clname=other_clname) assert_is_instance(counts, dict, "Unexpected return instance type.") assert_equals(len(counts.keys()), self.l.shape[-1] + 1, "Expecting a key for each class + 1 for 'other'.") assert_in(other_clname, counts.keys()) for key in counts.keys(): if key == other_clname: assert_equals( counts[key], n * 2, "Unexpected count for '%s' class" % (other_clname, )) else: assert_equals(counts[key], np.sum(self.l[:, int(key)]), "Unexpected count for class '%s'" % (key, ))
def test_get_idxs_to_balance_class_count_no_other(self): new_col = np.zeros((len(self.l), 1)) labls = np.hstack((self.l, new_col)) labls[60:, -1] = 1 bal = Balancer(labls) counts = bal.get_class_count(other_clname=None) assert_not_in(CLNAME_OTHER, counts.keys()) assert_equals(counts[0], 10) assert_equals(counts[1], 50) assert_equals(counts[2], 40) tolerance_order = 1 idxs = bal.get_idxs_to_balance_class_count(counts.values()) assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10) ), 10 + (50 - 10), tolerance_order) assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60) ), 50, 1) assert_almost_equal(np.count_nonzero(idxs >= 60), 40 + (50 - 40), tolerance_order)
# Copyright Pololu Corporation. For more information, see https://www.pololu.com/ from flask import Flask from flask import render_template from flask import redirect from subprocess import call app = Flask(__name__, static_folder='server_balboa_resources/static', template_folder='server_balboa_resources/templates') app.debug = True from a_star import AStar a_star = AStar() from balance import Balancer balancer = Balancer() import json led0_state = False led1_state = False led2_state = False @app.route("/") def hello(): return render_template("index.html") @app.route("/status.json") def status():
def test_get_class_count_other_default(self): counts = Balancer(np.copy(self.l)).get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys())
def test_get_class_count_other_default(self): counts = Balancer(np.copy( self.l)).get_class_count(other_clname=CLNAME_OTHER) assert_in(CLNAME_OTHER, counts.keys())