Esempio n. 1
0
    def _from_save_dict(self, save_dict):
        with ns.NameSpace(''):
            self.name = save_dict['name']
            for tank_name in save_dict['tanks']:
                tank_dict = save_dict['tanks'][tank_name]
                mod = importlib.import_module(tank_dict['__module__'])
                cls = getattr(mod, tank_dict['__class__'])

                tank = cls(name=tank_name, waterwork=self)
                self.tanks[tank_name] = tank

            for slot_name in save_dict['slots']:
                slot_dict = save_dict['slots'][slot_name]
                tank = self.tanks[slot_dict['tank']]

                slot = tank.get_slot(slot_dict['key'])
                slot.plug = slot_dict['plug']

                # Set to proper name
                del self.slots[slot.name]
                slot.name = slot_name
                self.slots[slot_name] = slot

            for tube_name in save_dict['tubes']:
                tube_dict = save_dict['tubes'][tube_name]
                tank = self.tanks[tube_dict['tank']]

                tube = tank.get_tube(tube_dict['key'])
                tube.plug = tube_dict['plug']

                # Set to proper name
                del self.tubes[tube.name]
                tube.name = tube_name
                self.tubes[tube_name] = tube

            for slot_name in self.slots:
                slot_dict = save_dict['slots'][slot_name]
                slot = self.slots[slot_name]

                if slot_dict['tube'] is not None:
                    tube = self.tubes[slot_dict['tube']]
                    tube.slot = slot
                else:
                    tube = empty

                slot.tube = tube

            self.funnels = {}
            for funnel_name in save_dict['funnels']:
                self.funnels[funnel_name] = self.slots[funnel_name]

            self.taps = {}
            for tap_name in save_dict['taps']:
                self.taps[tap_name] = self.tubes[tap_name]
Esempio n. 2
0
    def define_waterwork(self, array=empty, return_tubes=None):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        assert self.input_dtype is not None, (
            "Run calc_global_values before running the transform")

        with ns.NameSpace(self.name):
            for trans_num, trans_key in enumerate(self.transform_order):
                trans = self.transforms[trans_key]

                with ns.NameSpace(trans.name):
                    if trans_num < len(self.transform_order) - 1:
                        tap_key = self.tap_keys[trans_num + 1]
                        return_tubes = [self._pre(tap_key)]
                    else:
                        return_tubes = None

                    tubes = trans.define_waterwork(array, return_tubes)

                    if tubes is None:
                        continue

                    old_name = tubes[0].name
                    tubes[0].set_name("to_be_cloned")

                    tube_dict, _ = td.clone(tubes[0])
                    array = tube_dict['a']

                    tube_dict['b'].set_name(old_name)
Esempio n. 3
0
    def __enter__(self):
        """When entering, set the global _default_waterwork to this waterwork."""
        if gl._default_waterwork is not None:
            raise ValueError(
                "_default_waterwork is already set. Cannot be reset until context is exitted. Are you within the with statement of another waterwork?"
            )

        # Create a new namespace for this waterwork
        self.name_space = ns.NameSpace(self.name)
        self.name_space.__enter__()

        gl._default_waterwork = self
        return self
Esempio n. 4
0
    def define_waterwork(self, array=empty, return_tubes=None):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        # If the dimensions are not being kept then run flat_tokenize which puts
        # all sentences on the same axis.
        if not self.keep_dims:
            sents, sents_slots = td.flat_tokenize(
                strings=array,
                tokenizer=self.sent_tokenizer,
                detokenizer=self.sent_detokenizer)
            sents_slots['ids'].set_name('doc_ids')
            sents['ids'].set_name('ids')

        # Otherwise call tokenize which keeps the structure of array and adds a dim.
        else:
            sents, sents_slots = td.tokenize(strings=array,
                                             tokenizer=self.sent_tokenizer,
                                             detokenizer=self.sent_detokenizer,
                                             max_len=self.max_doc_len)

        sents_slots['strings'].set_name('input')

        with ns.NameSpace(self.string_transform.name):
            self.string_transform.define_waterwork(array=sents['target'])

        if return_tubes is not None:
            ww = sents['target'].waterwork
            r_tubes = []
            for r_tube_key in return_tubes:
                r_tubes.append(ww.maybe_get_tube(r_tube_key))
            return r_tubes
Esempio n. 5
0
    def _from_save_dict(self, save_dict):
        import wtrwrks.tanks.tank_defs as td
        with ns.NameSpace(''):
            self.name = save_dict['name']
            for tank_name in save_dict['tanks']:
                tank_dict = save_dict['tanks'][tank_name]
                func = getattr(td, tank_dict['func_name'])

                kwargs = {}
                if 'kwargs' in tank_dict:
                    kwargs = tank_dict['kwargs']

                args = []
                if 'args' in tank_dict:
                    args = tank_dict['args']

                tubes, slots = func(name=tank_name,
                                    waterwork=self,
                                    *args,
                                    **kwargs)
                tank = tubes[tubes.keys()[0]].tank
                self.tanks[tank_name] = tank

            for slot_name in save_dict['slots']:
                slot_dict = save_dict['slots'][slot_name]
                tank = self.tanks[slot_dict['tank']]

                slot = tank.get_slot(slot_dict['key'])
                slot.plug = slot_dict['plug']

                # Set to proper name
                del self.slots[slot.name]
                slot.name = slot_name
                self.slots[slot_name] = slot

            for tube_name in save_dict['tubes']:
                tube_dict = save_dict['tubes'][tube_name]
                tank = self.tanks[tube_dict['tank']]

                tube = tank.get_tube(tube_dict['key'])
                tube.plug = tube_dict['plug']

                # Set to proper name
                del self.tubes[tube.name]
                tube.name = tube_name
                self.tubes[tube_name] = tube

            for tube_name in save_dict['tubes']:
                tube = self.tubes[tube_name]
                downstream_tube_name = save_dict['tubes'][tube_name][
                    'downstream_tube']

                if downstream_tube_name is not None:
                    tube.downstream_tube = self.tubes[downstream_tube_name]

            for slot_name in self.slots:
                slot_dict = save_dict['slots'][slot_name]
                slot = self.slots[slot_name]

                if slot_dict['tube'] is not None:
                    tube = self.tubes[slot_dict['tube']]
                    tube.slot = slot
                else:
                    tube = empty

                slot.tube = tube

            self.funnels = {}
            for funnel_name in save_dict['funnels']:
                self.funnels[funnel_name] = self.slots[funnel_name]

            self.taps = {}
            for tap_name in save_dict['taps']:
                self.taps[tap_name] = self.tubes[tap_name]

        for key in save_dict['merged']:
            tube = self.tubes[key]
            self.merged[tube] = set(
                [self.tubes[k] for k in save_dict['merged'][key]])
Esempio n. 6
0
    def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.
    return_tubes : list of str or None
      Tube objects to be returned from the Waterwork object. Only needed if Waterworks are being stiched together.
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        assert self.is_calc_run, (
            "Run calc_global_values before running the transform")

        with ns.NameSpace(self.name):
            indices = []

            all_cols = []
            for name in self.transform_names:
                trans_cols = self.transform_cols[name]
                all_cols.extend(trans_cols)

            for name in self.transform_names:
                trans_cols = self.transform_cols[name]
                indices.append([list(all_cols).index(c) for c in trans_cols])

            # Can only partition along the 0th axis so transpose it so that the
            # 'column' dimension is the first
            perm = [1, 0]
            transp, transp_slots = td.transpose(a=array, axes=perm)

            # Parition the full dataset array into subarrays so that the individual
            # transforms can handle them.
            parts, _ = td.partition_by_index(a=transp['target'],
                                             indices=indices,
                                             tube_plugs={
                                                 'missing_cols':
                                                 np.zeros(
                                                     (0, 1),
                                                     dtype=self.input_dtype),
                                                 'missing_array':
                                                 np.zeros(
                                                     (0, 1),
                                                     dtype=self.input_dtype)
                                             })
            parts['missing_cols'].set_name('missing_cols')
            parts['missing_array'].set_name('missing_array')
            transp_slots['a'].set_name('array')

            # Split up the Tube object into a list of Tubes so they can each be fed
            # into individual transforms.
            parts_list, _ = td.iter_list(parts['target'],
                                         num_entries=len(self.transforms))
            for part, name in zip(parts_list, self.transform_names):
                trans = self.transforms[name]

                # Transpose it back to it's original orientation
                trans_back, _ = td.transpose(a=part,
                                             axes=perm,
                                             name=name + '-Trans')
                part = trans_back['target']

                # Depending on the type of transform, cast the subarray to its valid
                # type.
                cast, _ = td.cast(part,
                                  trans.input_dtype,
                                  tube_plugs={
                                      'input_dtype': self.input_dtype,
                                      'diff': np.array([],
                                                       dtype=self.input_dtype)
                                  },
                                  name=name + '-Cast')
                # if isinstance(trans, nt.NumTransform):
                #   cast, _ = td.cast(part, np.float64, name='-'.join([name, 'cast']))
                #   part = cast['target']
                # elif isinstance(trans, dt.DateTimeTransform):
                #   cast, _ = td.cast(part, np.datetime64, name='-'.join([name, 'cast']))
                #   part = cast['target']
                # elif isinstance(trans, st.StringTransform):
                #   cast, _ = td.cast(part, np.unicode, name='-'.join([name, 'cast']))
                #   part = cast['target']
                # elif isinstance(trans, mlst.MultiLingualStringTransform):
                #   cast, _ = td.cast(part, np.unicode, name='-'.join([name, 'cast']))
                #   part = cast['target']
                # elif isinstance(trans, ct.CatTransform):
                #   cast, _ = td.cast(part, np.unicode, name='-'.join([name, 'cast']))
                #   part = cast['target']
                with ns.NameSpace(name):
                    trans.define_waterwork(array=cast['target'],
                                           prefix=os.path.join(
                                               prefix, self.name))

        if return_tubes is not None:
            ww = parts['missing_array'].waterwork
            r_tubes = []
            for r_tube_key in return_tubes:
                r_tubes.append(ww.maybe_get_tube(r_tube_key))
            return r_tubes
Esempio n. 7
0
    def define_waterwork(self, array=empty, return_tubes=None):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        assert self.input_dtype is not None, (
            "Run calc_global_values before running the transform")

        with ns.NameSpace(self.name):
            indices = [
                self.transform_col_ranges[k] for k in sorted(self.transforms)
            ]

            # Can only partition along the 0th axis so transpose it so that the
            # 'column' dimension is the first
            perm = [1, 0] + list(self.input_shape[2:])
            transp, transp_slots = td.transpose(a=array, axes=perm)

            # Parition the full dataset array into subarrays so that the individual
            # transforms can handle them.
            parts, _ = td.partition(a=transp['target'], indices=indices)
            parts['missing_cols'].set_name('missing_cols')
            parts['missing_array'].set_name('missing_array')
            transp_slots['a'].set_name('input')

            # Split up the Tube object into a list of Tubes so they can each be fed
            # into individual transforms.
            parts_list, _ = td.iter_list(parts['target'],
                                         num_entries=len(self.transforms))
            for part, name in zip(parts_list, sorted(self.transforms)):
                trans = self.transforms[name]

                # Transpose it back to it's original orientation
                trans_back, _ = td.transpose(a=part, axes=perm)
                part = trans_back['target']

                # Depending on the type of transform, cast the subarray to its valid
                # type.
                if isinstance(trans, nt.NumTransform):
                    cast, _ = td.cast(part,
                                      np.float64,
                                      name='-'.join([name, 'cast']))
                    part = cast['target']
                elif isinstance(trans, dt.DateTimeTransform):
                    cast, _ = td.cast(part,
                                      np.datetime64,
                                      name='-'.join([name, 'cast']))
                    part = cast['target']
                elif isinstance(trans, st.StringTransform):
                    cast, _ = td.cast(part,
                                      np.unicode,
                                      name='-'.join([name, 'cast']))
                    part = cast['target']
                elif isinstance(trans, ct.CatTransform):
                    cast, _ = td.cast(part,
                                      np.unicode,
                                      name='-'.join([name, 'cast']))
                    part = cast['target']
                with ns.NameSpace(name):
                    trans.define_waterwork(array=part)

        if return_tubes is not None:
            ww = parts['missing_array'].waterwork
            r_tubes = []
            for r_tube_key in return_tubes:
                r_tubes.append(ww.maybe_get_tube(r_tube_key))
            return r_tubes