コード例 #1
0
class SpatialDescriptorModel:
    """The spatial descriptor model is an interface to compute spatial
    descriptors for a descriptor model processer mainly.
    It contains the utility classes of:
        * Retrievers: getting spatial neighbour elements.
        * Descriptor model: to transform it to spatial descriptors.
    Its main function in the process of computing descriptors from points is to
    manage the dealing with perturbation of the system for the sake of testing
    predictors and models.

    TODO
    ----
    - Return main parameters summary of the class
    - Run the process here

    """

    def _initialization(self):
        ## Main classes
        self.retrievers = None
        self.featurers = None
        ## Mapper
        self.selectors = None
        #        self._default_selectors = (0, 0), (0, 0, 0, 0, 0, 0)
        self._default_selectors = None, None
        ## Parameters useful
        self.n_inputs = 0
        self._pos_inputs = slice(0, 0, 1)
        self._map_indices = lambda self, i: i

    def __init__(
        self,
        retrievers,
        featurers,
        mapselector_spdescriptor=None,
        pos_inputs=None,
        map_indices=None,
        perturbations=None,
        aggregations=None,
        name_desc="",
        model=None,
    ):
        """Spatial descriptor model initialization.

        Parameters
        ----------
        retrievers: list, pst.BaseRetriver or pst.RetrieverManager
            the retriever information.
        featurers: list, pst.FeaturesManager or pst.BaseFeatures
            the features to be used in order to compute spatial descriptors.
        mapselector_spdescriptor: np.ndarray, tuple, function or instance
            the selector information in order to decide retriever or
            features or descriptormodel use. (default=None)
        pos_inputs: int, tuple, slice (default=None)
            the possible indices input in order to obtain their spatial
            descriptors.
        map_indices: function or None (default=None)
            the map from the input index to the usable index.
        perturbations: list or pst.BasePerturbation (default=None)
            the perturbation information.
        aggregations: tuple (default=None)
            the aggregation information.
        name_desc: str (default="")
            the name of the descriptor we are going to use.

        """
        self._initialization()
        self._format_retrievers(retrievers)
        self._format_featurers(featurers)
        self._format_perturbations(perturbations)
        self._format_mapper_selectors(mapselector_spdescriptor)
        self._format_loop(pos_inputs, map_indices)
        self._format_aggregations(aggregations)
        self._format_identifiers(name_desc)
        self._format_model(model)

    def compute(self, i=None):
        """Computation interface function.

        Parameters
        ----------
        i: int, list or np.ndarray (default=None)
            the indice or indices of the elements we want to get their spatial
            descriptors.

        Returns
        -------
        measure: np.ndarray or list
            the measure computed by the whole spatial descriptor model. It
            could return a partial result of some particular element `i`, if
            the parameter `i` is not None.

        """
        if i is None:
            return self._compute_nets()
        else:
            return self._compute_descriptors(i)

    ################################ Formatters ###############################
    ###########################################################################
    def _format_retrievers(self, retrievers):
        """Formatter for retrievers.

        Parameters
        ----------
        retrievers: list, pst.BaseRetriver or pst.RetrieverManager
            the retriever information.

        """
        if type(retrievers) == list:
            self.retrievers = RetrieverManager(retrievers)
        elif isinstance(retrievers, RetrieverManager):
            self.retrievers = retrievers
        else:
            self.retrievers = RetrieverManager(retrievers)
        self.retrievers.set_neighs_info(True)

    def _format_perturbations(self, perturbations):
        """Format perturbations. TODO

        Parameters
        ----------
        perturbations: list or pst.BasePerturbation
            the perturbation information.

        """
        ## 0. Perturbations processing
        if perturbations is None:
            return
        ret_perturbs, feat_perturbs = sp_general_filter_perturbations(perturbations)
        #        ## 1. Static neighbourhood (same neighs output for all k)
        #        aux = len(ret_perturbs) == 1 and ret_perturbs[0]._perturbtype == 'none'
        #        self._staticneighs = aux
        ## 1. Apply perturbations
        self.retrievers.add_perturbations(ret_perturbs)
        self.featurers.add_perturbations(feat_perturbs)
        assert self.retrievers.k_perturb == self.featurers.k_perturb

    def _format_aggregations(self, aggregations, i_r=(None, None)):
        """Prepare and add aggregations to retrievers and features.

        Parameters
        ----------
        aggregations: tuple
            the aggregation information.
        i_r: tuple
            the indices of retriever and features to use in the aggregation.

        """
        if aggregations is None:
            return
        if type(aggregations) == list:
            for i in range(len(aggregations)):
                self._format_aggregations(aggregations[i], i_r)
        if type(aggregations) == tuple:
            ## Prepare instructions
            i_ret = i_r[0]
            i_ret = range(len(self.retrievers)) if i_ret is None else i_ret
            i_ret = [i_ret] if type(i_ret) != list else i_ret
            i_feat = i_r[1]
            i_feat = range(len(self.featurers)) if i_feat is None else i_feat
            i_feat = [i_feat] * len(i_ret) if type(i_feat) != list else i_feat
            ## Assert correctness
            assert len(i_ret) == len(i_feat)
            ## Main loop
            for i in range(len(i_ret)):
                ## Preparing information to retriever number i_ret
                ret = self.retrievers.retrievers[i_ret[i]]
                agg_0 = _discretization_information_creation(aggregations[0], ret)
                aggregations_i = tuple([agg_0] + list(aggregations[1:]))
                # Add aggregation to retrievers
                new_ret = create_aggretriever(aggregations_i)
                self.retrievers.add_aggregations(new_ret)
                # Add aggregations to features
                i_feat_i = [i_feat[i]] if type(i_feat[i]) == int else i_feat[i]
                for j in i_feat_i:
                    new_features = create_aggfeatures(aggregations_i, self.featurers.features[j])
                    self.featurers.add_aggregations(new_features)

    def _format_featurers(self, featurers):
        """Format features retriever.

        Parameters
        ----------
        featurers: list, pst.FeaturesManager or pst.BaseFeatures
            the features to be used in order to compute spatial descriptors.

        """
        if isinstance(featurers, FeaturesManager):
            self.featurers = featurers
        else:
            self.featurers = FeaturesManager(featurers)

    def _format_mapper_selectors(self, _mapselector_spdescriptor):
        """Format selectors.

        Returns
        -------
        _mapselector_spdescriptor: np.ndarray, tuple, function or instance
            the selector information in order to decide retriever or
            features or descriptormodel use.

        """
        self.selectors = self._default_selectors
        if _mapselector_spdescriptor is None:
            self._mapselector_spdescriptor = self._mapselector_spdescriptor_null
        if type(_mapselector_spdescriptor) == np.ndarray:
            assert len(_mapselector_spdescriptor.shape) == 2
            assert _mapselector_spdescriptor.shape[1] == 8
            sels = (
                _mapselector_spdescriptor[:, 0:2].astype(int),
                [
                    _mapselector_spdescriptor[:, 2:4].astype(int),
                    _mapselector_spdescriptor[:, 4:6].astype(int),
                    _mapselector_spdescriptor[:, 6:8].astype(int),
                ],
            )
            self.retrievers.set_selector(sels[0])
            self.featurers.set_selector(*sels[1])
            self._mapselector_spdescriptor = self._mapselector_spdescriptor_null
        elif type(_mapselector_spdescriptor) == tuple:
            if type(_mapselector_spdescriptor[0]) == int:
                assert len(_mapselector_spdescriptor) == 8
                sels = (
                    _mapselector_spdescriptor[:2],
                    [_mapselector_spdescriptor[2:4], _mapselector_spdescriptor[4:6], _mapselector_spdescriptor[6:8]],
                )
                self.retrievers.set_selector(sels[0])
                self.featurers.set_selector(*sels[1])
                self._mapselector_spdescriptor = self._mapselector_spdescriptor_null
            elif type(_mapselector_spdescriptor[0]) == tuple:
                assert len(_mapselector_spdescriptor) == 2
                assert len(_mapselector_spdescriptor[0]) == 2
                if len(_mapselector_spdescriptor[1]) == 6:
                    sels = (
                        _mapselector_spdescriptor[0],
                        [
                            _mapselector_spdescriptor[1][:2],
                            _mapselector_spdescriptor[1][2:4],
                            _mapselector_spdescriptor[1][4:],
                        ],
                    )
                else:
                    assert len(_mapselector_spdescriptor[1]) == 3
                    logi = [len(e) == 2 for e in _mapselector_spdescriptor[1]]
                    assert all(logi)
                    sels = _mapselector_spdescriptor
                self.retrievers.set_selector(sels[0])
                self.featurers.set_selector(*sels[1])
                self._mapselector_spdescriptor = self._mapselector_spdescriptor_null
            elif type(_mapselector_spdescriptor[0]) == np.ndarray:
                assert len(_mapselector_spdescriptor) == 2
                assert len(_mapselector_spdescriptor[0].shape) == 2
                assert _mapselector_spdescriptor[0].shape[1] == 2
                if type(_mapselector_spdescriptor[1]) == tuple:
                    logi = [e.shape[1] == 2 for e in _mapselector_spdescriptor[1]]
                    assert all(logi)
                    sels = _mapselector_spdescriptor
                else:
                    assert _mapselector_spdescriptor[1].shape[1] == 6
                    assert len(_mapselector_spdescriptor[1].shape) == 2
                    sels = (
                        _mapselector_spdescriptor[0].astype(int),
                        [
                            _mapselector_spdescriptor[1][:, :2].astype(int),
                            _mapselector_spdescriptor[1][:, 2:4].astype(int),
                            _mapselector_spdescriptor[1][:, 4:].astype(int),
                        ],
                    )
                self.retrievers.set_selector(sels[0])
                self.featurers.set_selector(*sels[1])
                self._mapselector_spdescriptor = self._mapselector_spdescriptor_null
            elif type(_mapselector_spdescriptor[0]).__name__ == "function":
                assert len(_mapselector_spdescriptor) == 2
                self.retrievers.set_selector(_mapselector_spdescriptor[0])
                self.featurers.set_selector(_mapselector_spdescriptor[1])
                self._mapselector_spdescriptor = self._mapselector_spdescriptor_null
        elif type(_mapselector_spdescriptor).__name__ == "function":
            self.selectors = Sp_DescriptorSelector(_mapselector_spdescriptor)
            #            mapperselector.set_from_function(_mapselector_spdescriptor)
            self._mapselector_spdescriptor = self._mapselector_spdescriptor_selector
        elif isinstance(_mapselector_spdescriptor, Sp_DescriptorSelector):
            self.selectors = _mapselector_spdescriptor
            self._mapselector_spdescriptor = self._mapselector_spdescriptor_selector

    #            try:
    #                _mapselector_spdescriptor[0]
    #            except:
    #                msg = "Incorrect input for spatial descriptor mapperselector."
    #                raise TypeError(msg)

    def _format_loop(self, pos_inputs, map_indices):
        """Format the possible loop to go through.

        Parameters
        ----------
        pos_inputs: int, tuple, slice
            the possible indices input in order to obtain their spatial
            descriptors.
        map_indices: function or None
            the map from the input index to the usable index.

        """
        ## TODO: check coherence with retriever
        if pos_inputs is None:
            pos_inputs = self.retrievers.n_inputs
        if isinstance(pos_inputs, int):
            self.n_inputs = pos_inputs
            self._pos_inputs = slice(0, pos_inputs, 1)
        elif isinstance(pos_inputs, tuple):
            step = 1 if len(pos_inputs) == 2 else pos_inputs[2]
            self.n_inputs = pos_inputs[1] - pos_inputs[0]
            self._pos_inputs = slice(pos_inputs[0], pos_inputs[1], step)
        elif isinstance(pos_inputs, slice):
            st0, st1, stp = pos_inputs.start, pos_inputs.stop, pos_inputs.step
            n_inputs = len(range(st0, st1, stp))
            self.n_inputs = n_inputs
            self._pos_inputs = pos_inputs
        elif type(pos_inputs) not in [int, tuple, slice]:
            raise TypeError("Incorrect possible indices input.")
        ## Create map_indices
        if map_indices is None:

            def map_indices(s, i):
                return s._pos_inputs.start + s._pos_inputs.step * i

        #                if s._pos_inputs is not None:
        #                    return s._pos_inputs.start + s._pos_inputs.step*i
        #                else:
        #                    return i
        self._map_indices = map_indices
        ## Notice to featurer
        self.featurers.set_map_vals_i(pos_inputs)

    def _format_identifiers(self, name_desc):
        """Format information of the method applied.

        Parameters
        ----------
        name_desc: str
            the name of the descriptor we are going to use.

        """
        if name_desc is None or type(name_desc) != str:
            self.name_desc = self.featurers.descriptormodels[0].name_desc
        else:
            self.name_desc = name_desc

    def _format_model(self, model):
        if model is None:
            self.model = DummySkmodel()
        elif type(model) == dict:
            self.model = DummySkmodel(**model)
        elif type(model) == tuple:
            self.model = model[0](**model[1])
        else:
            self.model = model

    ################################# Getters #################################
    ###########################################################################
    def _get_methods(self, i):
        """Obtain the possible mappers we have to use in the process.

        Parameters
        ----------
        i: int, list, np.ndarray
            the indices of the elements we want to compute its spatial
            features.

        Returns
        -------
        staticneighs: boolean
            if there is a retriever with loation perturbations.
        typeret: tuple
            the indices of the selections in the retriever part.
        typefeats: tuple
            the indices of the selections in the features part.

        """
        staticneighs = self.retrievers.staticneighs
        methods = self._mapselector_spdescriptor(i)
        if type(methods) == list:
            typeret, typefeats = [], []
            for e in methods:
                e1, e2 = e
                typeret.append(e1)
                typefeats.append(e2)
        else:
            typeret, typefeats = methods
        return staticneighs, typeret, typefeats

    def _mapselector_spdescriptor_null(self, i):
        """Get the selectors for the element `i` from the constant information.

        Returns
        -------
        selectors_i: tuple
            the selectors for the element `i`.

        """
        return self._default_selectors

    #    def _mapselector_spdescriptor_constant(self, i):
    #        i_len = 1 if type(i) == int else len(i)
    #        logi = type(i) == int
    #        if logi:
    #            return self.selectors
    #        else:
    #            return [self.selectors[0]]*i_len, [self.selectors[1]]*i_len

    def _mapselector_spdescriptor_selector(self, i):
        """Get the selectors for the element `i` from the selector object.

        Returns
        -------
        selectors_i: tuple
            the selectors for the element `i`.

        """
        return self.selectors[i]

    def iter_indices(self):
        """Get indices in iteration of indices.

        Returns
        -------
        idx: int
            the indices output sequentially.

        """
        start, stop = self._pos_inputs.start, self._pos_inputs.stop
        step = self._pos_inputs.step
        for idx in xrange(start, stop, step):
            yield idx

    ################################# Setters #################################
    ###########################################################################
    def add_perturbations(self, perturbations):
        """Add perturbations to the spatial descriptormodel.

        Parameters
        ----------
        perturbations: list or pst.BasePerturbation
            the perturbation information.

        """
        self._format_perturbations(perturbations)

    def add_aggregations(self, aggregations, i_r=(None, None)):
        """Add aggregations to the spatial descriptor model.

        Parameters
        ----------
        aggregations: tuple
            the aggregation information.
        i_r: tuple
            the indices of retriever and features to use in the aggregation.

        """
        self._format_aggregations(aggregations, i_r)

    def set_loop(self, pos_inputs, map_indices=None):
        """Set loop in order to get only reduced possibilities.

        Parameters
        ----------
        pos_inputs: int, tuple, slice
            the possible indices input in order to obtain their spatial
            descriptors.
        map_indices: function or None (default=None)
            the map from the input index to the usable index.

        """
        self._format_loop(pos_inputs, map_indices)

    def apply_aggregations(self, regs, agg_info, selectors):
        """Apply aggregations.

        Parameters
        ----------
        regs: np.ndarray
            the regions in which we want to aggregate information.
        agg_info: tuple
            the information to aggregate the information.
        selectors: int, tuple, np.ndarray
            how to select which retriever.

        """
        ## 0. Prepare aggregations
        locs = self.retrievers.retrievers[0]._data
        if len(regs.shape) == 1:
            regs = regs.reshape((len(regs), 1))
        assert len(locs) == len(regs)
        ## 1. Create aggregations
        for i in range(regs.shape[1]):
            retriever_in, retriever_out, aggregating = copy.copy(agg_info)
            retriever_out["input_map"] = regs[:, i]
            disc_info = (locs, regs[:, i])
            agg_info_i = disc_info, retriever_in, retriever_out, aggregating
            self.add_aggregations(agg_info_i, i_r=(0, 0))
        ## 2. Set selectors
        pass

    ############################ Computer functions ###########################
    ###########################################################################
    def _compute_nets(self):
        """Function used to compute the total measure.

        Returns
        -------
        measure: np.ndarray or list
            the measure computed by the whole spatial descriptor model.

        """
        measure = self.featurers.initialization_output()
        #        print 'x'*20, measure
        for i in self.iter_indices():
            ## Compute descriptors for i
            desc_i, vals_i = self._compute_descriptors(i)
            #            print 'y'*25, desc_i, vals_i
            measure = self.featurers.add2result(measure, desc_i, vals_i)
        #        print measure
        measure = self.featurers.to_complete_measure(measure)
        return measure

    def _compute_retdriven(self):
        """Compute the whole spatial descriptor measure let the retrievers
        drive the process.

        Returns
        -------
        measure: np.ndarray or list
            the measure computed by the whole spatial descriptor model.

        """
        #        _, typeret, typefeats = self._get_methods(i)
        #        self.retrievers.set_typeret(typeret)
        measure = self.featurers.initialization_output()
        k_pert = self.featurers.k_perturb + 1
        ks = list(range(k_pert))
        for iss, neighs_info in self.retrievers:
            characs_iss, vals_iss = self.featurers.compute_descriptors(iss, neighs_info, ks)
            measure = self.featurers.add2result(measure, characs_iss, vals_iss)
        measure = self.featurers.to_complete_measure(measure)
        return measure

    def _compute_descriptors(self, i):
        """Compute the descriptors assigned to element i.

        Parameters
        ----------
        i: int, list, np.ndarray
            the indices of the elements we want to compute its spatial
            features.

        Returns
        -------
        desc_i: list or np.ndarray
            the descriptors of each element `i` for each possible `k`
            perturbation.
        vals_i: list or np.ndarray
            the store information index of each element `i` for each possible
            `k` perturbation.

        """
        #        print 'b'*10, i
        staticneighs, typeret, typefeats = self._get_methods(i)
        #        print 'c', i
        k_pert = self.featurers.k_perturb + 1
        ks = list(range(k_pert))
        neighs_info = self.retrievers.retrieve_neighs(i, typeret_i=typeret)
        neighs_info.set_ks(ks)
        ## TESTING ASSERTIONS
        #        assert(staticneighs == neighs_info.staticneighs)
        #        i_len = 1 if type(i) == int else len(i)
        #        i_list = [i] if type(i) == int else i
        #        print 'd', i
        #        print i_len, ks, neighs_info.iss, neighs_info.ks
        #        print neighs_info.idxs
        #        assert(len(neighs_info.iss) == i_len)
        #        assert(neighs_info.iss == i_list)
        #        if not staticneighs:
        #            assert(len(neighs_info.ks) == len(ks))
        #            assert(neighs_info.ks == ks)
        #        print 'a'*25, typefeats, typeret, i
        #####################
        desc_i, vals_i = self.featurers.compute_descriptors(i, neighs_info, ks, typefeats)
        return desc_i, vals_i

    #    def _compute_descriptors(self, i):
    #        "Compute the descriptors assigned to element i."
    #        staticneighs, typeret, typefeats = self._get_methods(i)
    #        if staticneighs:
    #            characs, vals_i = self._compute_descriptors_seq0(i, typeret,
    #                                                             typefeats)
    #        else:
    #            characs, vals_i = self._compute_descriptors_seq1(i, typeret,
    #                                                             typefeats)
    #
    #        return characs, vals_i
    #
    #    def _compute_descriptors_seq0(self, i, typeret, typefeats):
    #        "Computation descriptors for non-aggregated data."
    #        ## Model1
    #        staticneighs, _, _ = self._get_methods(i)
    #        k_pert = self.featurers.k_perturb+1
    #        ks = list(range(k_pert))
    #        neighs_info =\
    #            self.retrievers.retrieve_neighs(i, typeret_i=typeret)  #, k=ks)
    #        assert(staticneighs == neighs_info.staticneighs)
    #        characs, vals_i =\
    #            self.featurers.compute_descriptors(i, neighs_info, ks, typefeats)
    #        return characs, vals_i
    #
    #    def _compute_descriptors_seq1(self, i, typeret, typefeats):
    #        "Computation descriptors for aggregated data."
    #        k_pert = self.featurers.k_perturb+1
    #        characs, vals_i = [], []
    #        for k in range(k_pert):
    #            neighs_info =\
    #                self.retrievers.retrieve_neighs(i, typeret_i=typeret, k=k)
    #            assert(len(neighs_info.ks) == 1)
    #            assert(neighs_info.ks[0] == k)
    #            characs_k, vals_i_k =\
    #                self.featurers.compute_descriptors(i, neighs_info,
    #                                                   k, typefeats)
    #            characs.append(characs_k)
    #            vals_i.append(vals_i_k)
    #        ## Joining descriptors from different perturbations
    #        characs = self.featurers._join_descriptors(characs)
    #        vals_i = np.concatenate(vals_i)
    #        return characs, vals_i

    ################################ ITERATORS ################################
    ###########################################################################
    def compute_nets_i(self):
        """Computation of the associate spatial descriptors for each i.

        Returns
        -------
        desc_i: list or np.ndarray
            the descriptors of each element `i` for each possible `k`
            perturbation.
        vals_i: list or np.ndarray
            the store information index of each element `i` for each possible
            `k` perturbation.

        """
        for i in self.iter_indices():
            ## Compute descriptors for i
            desc_i, vals_i = self._compute_descriptors(i)
            yield desc_i, vals_i

    def compute_net_ik(self):
        """Function iterator used to get the result of each val_i and corr_i
        result for each combination of element i and permutation k.

        Returns
        -------
        desc_i: list or np.ndarray
            the descriptors of each element `i`.
        vals_i: list or np.ndarray
            the store information index of each element `i`.

        """
        for i in self.iter_indices():
            for k in range(self.retrievers.k_perturb + 1):
                # 1. Retrieve local characterizers
                desc_i, vals_i = self._compute_descriptors(i)
                for k in range(len(desc_i)):
                    yield vals_i[k], desc_i[k]

    ############################ Process function #############################
    ###########################################################################
    def compute_process(self, logfile, lim_rows=0, n_procs=0):
        """Wrapper function to the spatialdescriptormodel process object. This
        processer contains tools of logging and storing information about the
        process.

        Parameters
        ----------
        logfile: str
            the file we want to log all the process.
        lim_rows: int (default=0)
            the limit number of rows uninformed. If is 0, there are not
            partial information of the process.
        n_procs: int (default=0)
            the number of cpu used.

        Returns
        -------
        measure: np.ndarray or list
            the measure computed by the whole spatial descriptor model.

        """
        modelproc = SpatialDescriptorModelProcess(self, logfile, lim_rows, n_procs)
        measure = modelproc.compute_measure()
        return measure

    ############################# Model functions #############################
    ###########################################################################
    def fit(self, indices, y, sample_weight=None):
        """Use the SpatialDescriptorModel as a model.

        Parameters
        ----------
        indices: np.ndarray
            the indices of the samples used to compute the model.
        y: np.ndarray
            the target we want to predict.
        sample_weight : np.ndarray [n_samples]
            Individual weights for each sample

        Returns
        -------
        self : returns an instance of self.

        """
        assert len(indices) == len(y)
        indices = list(indices)
        X = self.compute(indices)
        self.model = self.model.fit(X, y)
        return self

    def predict(self, indices):
        """Use the SpatialDescriptorModel as a model to predict targets from
        spatial descriptors.

        Parameters
        ----------
        indices: np.ndarray
            the indices of the samples used to compute the target predicted.

        Returns
        -------
        y_pred : np.ndarray
            the predicted target.

        """
        X = self.compute(indices)
        indices = list(indices)
        y_pred = self.model.predict(X)
        assert len(indices) == len(y_pred)
        return y_pred