def define_waterwork(self, array=empty, return_tubes=None, prefix=''): """Get the waterwork that completely describes the pour and pump transformations. Parameters ---------- array : np.ndarray or empty The array to be transformed. Returns ------- Waterwork The waterwork with all the tanks (operations) added, and names set. """ # Convert the category values to indices. cti, cti_slots = td.cat_to_index( array, self.cat_val_to_index, tube_plugs={'input_dtype': lambda z: self.input_dtype} ) cti_slots['cats'].set_name('array') cti['missing_vals'].set_name('missing_vals') # Clone the indices so that a copy of 'indices' can be outputted as a tap. cloned, _ = td.clone(cti['target']) cloned['a'].set_name('indices') # Convert the indices into one-hot vectors. one_hots, _ = td.one_hot( cloned['b'], len(self.cat_val_to_index), tube_plugs={ 'missing_vals': lambda z: np.ones(z[self._pre('indices', prefix)].shape)*-2 } ) if self.norm_mode == 'mean_std': one_hots, _ = td.sub( one_hots['target'], self.mean, tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.mean} ) one_hots, _ = td.div( one_hots['target'], self.std, tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([])} ) one_hots['target'].set_name('one_hots') if return_tubes is not None: ww = one_hots['target'].waterwork r_tubes = [] for r_tube_key in return_tubes: r_tubes.append(ww.maybe_get_tube(r_tube_key)) return r_tubes
def define_waterwork(self, array=empty, return_tubes=None, prefix=''): """Get the waterwork that completely describes the pour and pump transformations. Parameters ---------- array : np.ndarray or empty The array to be transformed. Returns ------- Waterwork The waterwork with all the tanks (operations) added, and names set. """ # Replace all the NaN's with the inputted replace_with function. nans, nans_slots = td.isnan(array) nans_slots['a'].set_name('array') nums, _ = td.replace( nans['a'], nans['target'], slot_plugs={ 'replace_with': lambda z: self.fill_nan_func(z[self._pre('array', prefix)]) }, tube_plugs={ 'replace_with': np.array([]), 'replaced_vals': np.array(np.nan) }) nums['replaced_vals'].set_name('replaced_vals') nums['mask'].set_name('nans') # Do any additional normalization if self.norm_mode == 'mean_std': nums, _ = td.sub(nums['target'], self.mean, tube_plugs={ 'a_is_smaller': False, 'smaller_size_array': self.mean }) nums, _ = td.div(nums['target'], self.std, tube_plugs={ 'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([]) }) elif self.norm_mode == 'min_max': nums, _ = td.sub(nums['target'], self.min, tube_plugs={ 'a_is_smaller': False, 'smaller_size_array': self.min }) nums, _ = td.div(nums['target'], (self.max - self.min), tube_plugs={ 'a_is_smaller': False, 'smaller_size_array': (self.max - self.min), 'missing_vals': np.array([]), 'remainder': np.array([]) }) nums['target'].set_name('nums') if return_tubes is not None: ww = nums['target'].waterwork r_tubes = [] for r_tube_key in return_tubes: r_tubes.append(ww.maybe_get_tube(r_tube_key)) return r_tubes
def __sub__(self, other): """Define an add tank (operation) between two tubes.""" import wtrwrks.tanks.tank_defs as td return td.sub(a=self, b=other)
def define_waterwork(self, array=empty, return_tubes=None, prefix=''): """Get the waterwork that completely describes the pour and pump transformations. Parameters ---------- array : np.ndarray or empty The array to be transformed. Returns ------- Waterwork The waterwork with all the tanks (operations) added, and names set. """ # Replace all the NaT's with the inputted replace_with. nats, nats_slots = td.isnat(array) nats_slots['a'].set_name('array') replaced, _ = td.replace( nats['a'], nats['target'], slot_plugs={ 'replace_with': lambda z: self.fill_nat_func(z[self._pre('array', prefix)]) }, tube_plugs={ 'replace_with': np.array([]), 'replaced_vals': np.array([None], dtype=np.datetime64) } ) replaced['replaced_vals'].set_name('replaced_vals') replaced['mask'].set_name('nats') # Convert the datetimes to numbers nums, _ = td.datetime_to_num(replaced['target'], self.zero_datetime, self.num_units, self.time_unit, name='dtn') nums['diff'].set_name('diff') if self.norm_mode == 'mean_std': nums, _ = td.sub( nums['target'], self.mean, tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.mean} ) nums, _ = td.div( nums['target'], self.std, tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([])} ) elif self.norm_mode == 'min_max': nums, _ = td.sub( nums['target'], self.min, tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.min} ) nums, _ = td.div( nums['target'], (self.max - self.min), tube_plugs={'a_is_smaller': False, 'smaller_size_array': (self.max - self.min), 'missing_vals': np.array([]), 'remainder': np.array([])} ) nums['target'].set_name('nums') if return_tubes is not None: ww = nums['target'].waterwork r_tubes = [] for r_tube_key in return_tubes: r_tubes.append(ww.maybe_get_tube(r_tube_key)) return r_tubes