Exemple #1
0
  def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
    """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
    # Convert the category values to indices.
    cti, cti_slots = td.cat_to_index(
      array, self.cat_val_to_index,
      tube_plugs={'input_dtype': lambda z: self.input_dtype}
    )
    cti_slots['cats'].set_name('array')
    cti['missing_vals'].set_name('missing_vals')

    # Clone the indices so that a copy of 'indices' can be outputted as a tap.
    cloned, _ = td.clone(cti['target'])
    cloned['a'].set_name('indices')

    # Convert the indices into one-hot vectors.
    one_hots, _ = td.one_hot(
      cloned['b'], len(self.cat_val_to_index),
      tube_plugs={
        'missing_vals': lambda z: np.ones(z[self._pre('indices', prefix)].shape)*-2
      }
    )

    if self.norm_mode == 'mean_std':
      one_hots, _ = td.sub(
        one_hots['target'], self.mean,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.mean}
      )
      one_hots, _ = td.div(
        one_hots['target'], self.std,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([])}
      )

    one_hots['target'].set_name('one_hots')

    if return_tubes is not None:
      ww = one_hots['target'].waterwork
      r_tubes = []
      for r_tube_key in return_tubes:
        r_tubes.append(ww.maybe_get_tube(r_tube_key))
      return r_tubes
Exemple #2
0
    def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        # Replace all the NaN's with the inputted replace_with function.
        nans, nans_slots = td.isnan(array)
        nans_slots['a'].set_name('array')

        nums, _ = td.replace(
            nans['a'],
            nans['target'],
            slot_plugs={
                'replace_with':
                lambda z: self.fill_nan_func(z[self._pre('array', prefix)])
            },
            tube_plugs={
                'replace_with': np.array([]),
                'replaced_vals': np.array(np.nan)
            })

        nums['replaced_vals'].set_name('replaced_vals')
        nums['mask'].set_name('nans')

        # Do any additional normalization
        if self.norm_mode == 'mean_std':
            nums, _ = td.sub(nums['target'],
                             self.mean,
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': self.mean
                             })
            nums, _ = td.div(nums['target'],
                             self.std,
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': self.std,
                                 'missing_vals': np.array([]),
                                 'remainder': np.array([])
                             })
        elif self.norm_mode == 'min_max':
            nums, _ = td.sub(nums['target'],
                             self.min,
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': self.min
                             })
            nums, _ = td.div(nums['target'], (self.max - self.min),
                             tube_plugs={
                                 'a_is_smaller': False,
                                 'smaller_size_array': (self.max - self.min),
                                 'missing_vals': np.array([]),
                                 'remainder': np.array([])
                             })

        nums['target'].set_name('nums')

        if return_tubes is not None:
            ww = nums['target'].waterwork
            r_tubes = []
            for r_tube_key in return_tubes:
                r_tubes.append(ww.maybe_get_tube(r_tube_key))
            return r_tubes
Exemple #3
0
 def __rdiv__(self, other):
   """Define an add tank (operation) between two tubes."""
   import wtrwrks.tanks.tank_defs as td
   return td.div(a=other, b=self)
  def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
    """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
    # Replace all the NaT's with the inputted replace_with.
    nats, nats_slots = td.isnat(array)
    nats_slots['a'].set_name('array')

    replaced, _ = td.replace(
      nats['a'], nats['target'],
      slot_plugs={
        'replace_with': lambda z: self.fill_nat_func(z[self._pre('array', prefix)])
      },
      tube_plugs={
        'replace_with': np.array([]),
        'replaced_vals': np.array([None], dtype=np.datetime64)
      }
    )

    replaced['replaced_vals'].set_name('replaced_vals')
    replaced['mask'].set_name('nats')

    # Convert the datetimes to numbers
    nums, _ = td.datetime_to_num(replaced['target'], self.zero_datetime, self.num_units, self.time_unit, name='dtn')
    nums['diff'].set_name('diff')

    if self.norm_mode == 'mean_std':
      nums, _ = td.sub(
        nums['target'], self.mean,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.mean}
      )
      nums, _ = td.div(
        nums['target'], self.std,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([])}
      )
    elif self.norm_mode == 'min_max':
      nums, _ = td.sub(
        nums['target'], self.min,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.min}
      )
      nums, _ = td.div(
        nums['target'], (self.max - self.min),
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': (self.max - self.min), 'missing_vals': np.array([]), 'remainder': np.array([])}
      )

    nums['target'].set_name('nums')

    if return_tubes is not None:
      ww = nums['target'].waterwork
      r_tubes = []
      for r_tube_key in return_tubes:
        r_tubes.append(ww.maybe_get_tube(r_tube_key))
      return r_tubes
Exemple #5
0
  def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
    """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
    splits, splits_slots = td.split(array, [1], axis=1)
    splits_slots['a'].unplug()
    splits_slots['a'].set_name('array')

    splits, _ = td.iter_list(splits['target'], 2)
    splits[1].set_name('amps')

    times, _ = td.reshape(
      splits[0],
      slot_plugs={'shape': lambda r: r[self._pre('array', prefix)].shape[:1]},
      tube_plugs={'old_shape': lambda r: list(r[self._pre('nums', prefix)].shape[:1]) + [1]}
    )
    times, _ = td.cast(
      times['target'], np.datetime64,
      tube_plugs={
        'input_dtype': self.input_dtype,
        'diff': np.array([], dtype=self.input_dtype)
      }
    )
    # Replace all the NaT's with the inputted replace_with.
    nats, nats_slots = td.isnat(times['target'])

    replaced, _ = td.replace(
      nats['a'], nats['target'],
      slot_plugs={
        'replace_with': lambda z: self.fill_nat_func(z[self._pre('array', prefix)])
      },
      tube_plugs={
        'replace_with': np.array([]),
        'replaced_vals': np.array([None], dtype=np.datetime64)
      }
    )

    replaced['replaced_vals'].set_name('replaced_vals')
    replaced['mask'].set_name('nats')

    end = (self.end_datetime - self.zero_datetime) / np.timedelta64(self.num_units, self.time_unit)
    end = end.astype(self.dtype)

    # Convert the datetimes to numbers
    nums, _ = td.datetime_to_num(replaced['target'], self.zero_datetime, self.num_units, self.time_unit, name='dtn')

    nums['diff'].set_name('diff')

    # nums, _ = td.sub(
    #   nums['target'], 0.0,
    #   tube_plugs={'a_is_smaller': False, 'smaller_size_array': 0.0}
    # )
    nums, _ = td.div(
      nums['target'], end,
      tube_plugs={'a_is_smaller': False, 'smaller_size_array': end, 'missing_vals': np.array([]), 'remainder': np.array([])}
    )

    decomp, _ = td.phase_decomp(
      nums['target'], self.w_k[:self.top_frequencies],
    )

    decomp['div'].set_name('div')
    decomp['target'].set_name('nums')

    if return_tubes is not None:
      ww = decomp['target'].waterwork
      r_tubes = []
      for r_tube_key in return_tubes:
        r_tubes.append(ww.maybe_get_tube(r_tube_key))
      return r_tubes