def clean_keys(labels, loaders): """Removes all loader information from the keys. Parameters ---------- labels : dict(str, numpy.memmap) Labels contain all load-easy dataset relevant data. Returns ------- labels : dict(str, numpy.memmap) The original labels, with keys without the ``:loader`` part. """ class Cleaner: def __init__(self): self.to_delete = [] self.to_set = [] def __call__(self, key, val): k, l = loader_from_key(key) if l is not None: self.to_set += [[k + "_", retrieve(labels, key)]] self.to_delete += [key] C = Cleaner() walk(labels, C, pass_key=True) for key, val in C.to_set: set_value(labels, key, val) for key in C.to_delete: pop_keypath(labels, key) for k_ in list(loaders.keys()): if k_ in labels: k = k_ + "_" labels[k] = labels[k_] del labels[k_] return labels
def after_step(self, step, last_results): """Save examples and store label values.""" if self.lk is not None: label_vals = pop_keypath(last_results, self.lk, default={}) else: label_vals = {} idxs = self.idxs # indices collected before_step path_dicts = save_output( root=self.save_root, example=last_results, index=idxs, sub_dir_keys=self.sdks, keypath=self.keypath, ) for idx in idxs: for key, path in path_dicts[idx].items(): if key not in label_vals: label_vals[key] = [] label_vals[key] += [path] for key in list(path_dicts[idxs[0]].keys()): label_vals[key] = np.array(label_vals[key]) if self.label_arrs is None: self.label_arrs = {} for k in label_vals.keys(): example = label_vals[k][0] ex_shape = list(np.shape(example)) shape = [len(self.data_in)] + ex_shape s = "x".join([str(s) for s in shape]) dtype = d = example.dtype k_ = k.replace("/", "--") savepath = os.path.join(self.save_root, "labels", "{}-*-{}-*-{}.npy".format(k_, s, d)) memmap = np.memmap(savepath, shape=tuple(shape), mode="w+", dtype=dtype) self.label_arrs[k] = memmap for k in label_vals.keys(): # Can the inner loop be made a fancy indexing assign? for i, idx in enumerate(idxs): self.label_arrs[k][idx] = label_vals[k][i]
def test_raise_keyNotFoundError_pass_success(self, collection, key, expected_value): with pytest.raises(KeyNotFoundError) as exc_info: util.pop_keypath(collection, key, pass_success=True)
def test_pass_success(self, collection, key, expected_value): popped_value = util.pop_keypath(collection, key, default="abc", pass_success=True) assert expected_value == popped_value
def test_default(self, collection, key, expected_value): popped_value = util.pop_keypath(collection, key, default="abc") assert expected_value == popped_value
def test_pop_keypath(self, collection, key, expected_value): popped_value = util.pop_keypath(collection, key) assert expected_value == popped_value
def after_step(self, step, last_results): """ Parameters ---------- step : last_results : Returns ------- """ if self.lk is not None: label_vals = pop_keypath(last_results, self.lk, default={}) else: label_vals = {} if self.label_arrs is None: self.label_arrs = {} for k in label_vals.keys(): example = label_vals[k][0] ex_shape = list(np.shape(example)) shape = [len(self.data_in)] + ex_shape s = "x".join([str(s) for s in shape]) dtype = d = example.dtype k_ = k.replace("/", "--") savepath = os.path.join(self.save_root, "{}-*-{}-*-{}.npy".format(k_, s, d)) memmap = np.memmap(savepath, shape=tuple(shape), mode="w+", dtype=dtype) self.label_arrs[k] = memmap idxs = self.idxs # indices collected before_step for k in label_vals.keys(): # Can the inner loop be made a fancy indexing assign? for i, idx in enumerate(idxs): self.label_arrs[k][idx] = label_vals[k][i] path_dicts = save_output( root=self.save_root, example=last_results, index=idxs, sub_dir_keys=self.sdks, keypath=self.keypath, ) if self.data_frame is None: columns = sorted(path_dicts[list(path_dicts.keys())[0]]) if len(columns) == 0: # No load heavy logs written out pass else: self.data_frame = pd.DataFrame(columns=columns) if self.data_frame is not None: for idx, path_dict in path_dicts.items(): self.data_frame.loc[idx] = path_dict