Example #1
0
  def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
    """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
    # Convert the category values to indices.
    cti, cti_slots = td.cat_to_index(
      array, self.cat_val_to_index,
      tube_plugs={'input_dtype': lambda z: self.input_dtype}
    )
    cti_slots['cats'].set_name('array')
    cti['missing_vals'].set_name('missing_vals')

    # Clone the indices so that a copy of 'indices' can be outputted as a tap.
    cloned, _ = td.clone(cti['target'])
    cloned['a'].set_name('indices')

    # Convert the indices into one-hot vectors.
    one_hots, _ = td.one_hot(
      cloned['b'], len(self.cat_val_to_index),
      tube_plugs={
        'missing_vals': lambda z: np.ones(z[self._pre('indices', prefix)].shape)*-2
      }
    )

    if self.norm_mode == 'mean_std':
      one_hots, _ = td.sub(
        one_hots['target'], self.mean,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.mean}
      )
      one_hots, _ = td.div(
        one_hots['target'], self.std,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([])}
      )

    one_hots['target'].set_name('one_hots')

    if return_tubes is not None:
      ww = one_hots['target'].waterwork
      r_tubes = []
      for r_tube_key in return_tubes:
        r_tubes.append(ww.maybe_get_tube(r_tube_key))
      return r_tubes
Example #2
0
    def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        # Replace all the NaN's with the inputted replace_with function.
        nans, nans_slots = td.isnan(array)
        nans_slots['a'].set_name('array')

        nums, _ = td.replace(
            nans['a'],
            nans['target'],
            slot_plugs={
                'replace_with':
                lambda z: self.fill_nan_func(z[self._pre('array', prefix)])
            },
            tube_plugs={
                'replace_with': np.array([]),
                'replaced_vals': np.array(np.nan)
            })

        nums['replaced_vals'].set_name('replaced_vals')
        nums['mask'].set_name('nans')

        # Do any additional normalization
        if self.norm_mode == 'mean_std':
            nums, _ = td.sub(nums['target'],
                             self.mean,
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': self.mean
                             })
            nums, _ = td.div(nums['target'],
                             self.std,
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': self.std,
                                 'missing_vals': np.array([]),
                                 'remainder': np.array([])
                             })
        elif self.norm_mode == 'min_max':
            nums, _ = td.sub(nums['target'],
                             self.min,
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': self.min
                             })
            nums, _ = td.div(nums['target'], (self.max - self.min),
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': (self.max - self.min),
                                 'missing_vals': np.array([]),
                                 'remainder': np.array([])
                             })

        nums['target'].set_name('nums')

        if return_tubes is not None:
            ww = nums['target'].waterwork
            r_tubes = []
            for r_tube_key in return_tubes:
                r_tubes.append(ww.maybe_get_tube(r_tube_key))
            return r_tubes
Example #3
0
 def __sub__(self, other):
   """Define an add tank (operation) between two tubes."""
   import wtrwrks.tanks.tank_defs as td
   return td.sub(a=self, b=other)
  def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
    """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
    # Replace all the NaT's with the inputted replace_with.
    nats, nats_slots = td.isnat(array)
    nats_slots['a'].set_name('array')

    replaced, _ = td.replace(
      nats['a'], nats['target'],
      slot_plugs={
        'replace_with': lambda z: self.fill_nat_func(z[self._pre('array', prefix)])
      },
      tube_plugs={
        'replace_with': np.array([]),
        'replaced_vals': np.array([None], dtype=np.datetime64)
      }
    )

    replaced['replaced_vals'].set_name('replaced_vals')
    replaced['mask'].set_name('nats')

    # Convert the datetimes to numbers
    nums, _ = td.datetime_to_num(replaced['target'], self.zero_datetime, self.num_units, self.time_unit, name='dtn')
    nums['diff'].set_name('diff')

    if self.norm_mode == 'mean_std':
      nums, _ = td.sub(
        nums['target'], self.mean,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.mean}
      )
      nums, _ = td.div(
        nums['target'], self.std,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([])}
      )
    elif self.norm_mode == 'min_max':
      nums, _ = td.sub(
        nums['target'], self.min,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.min}
      )
      nums, _ = td.div(
        nums['target'], (self.max - self.min),
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': (self.max - self.min), 'missing_vals': np.array([]), 'remainder': np.array([])}
      )

    nums['target'].set_name('nums')

    if return_tubes is not None:
      ww = nums['target'].waterwork
      r_tubes = []
      for r_tube_key in return_tubes:
        r_tubes.append(ww.maybe_get_tube(r_tube_key))
      return r_tubes