Beispiel #1
0
    def get_backmappers(self, data):
        backmappers = []
        n_values = []

        dataclasses = data.domain.class_vars
        modelclasses = self.domain.class_vars
        if not (modelclasses and dataclasses):
            return None, []  # classless model or data; don't touch
        if len(dataclasses) != len(modelclasses):
            raise DomainTransformationError(
                "Mismatching number of model's classes and data classes")
        for dataclass, modelclass in zip(dataclasses, modelclasses):
            if dataclass != modelclass:
                if dataclass.name != modelclass.name:
                    raise DomainTransformationError(
                        f"Model for '{modelclass.name}' "
                        f"cannot predict '{dataclass.name}'")
                else:
                    raise DomainTransformationError(
                        f"Variables '{modelclass.name}' in the model is "
                        "incompatible with the variable of the same name "
                        "in the data.")
            n_values.append(dataclass.is_discrete and len(dataclass.values))
            if dataclass is not modelclass and dataclass.is_discrete:
                backmappers.append(dataclass.get_mapper_from(modelclass))
            else:
                backmappers.append(None)
        if all(x is None for x in backmappers):
            backmappers = None
        return backmappers, n_values
Beispiel #2
0
    def data_to_model_domain(self, data: Table) -> Table:
        """
        Transforms data to the model domain if possible.

        Parameters
        ----------
        data
            Data to be transformed to the model domain

        Returns
        -------
        Transformed data table

        Raises
        ------
        DomainTransformationError
            Error indicates that transformation is not possible since domains
            are not compatible
        """
        if data.domain == self.domain:
            return data

        if self.original_domain.attributes != data.domain.attributes \
                and data.X.size \
                and not all_nan(data.X):
            new_data = data.transform(self.original_domain)
            if all_nan(new_data.X):
                raise DomainTransformationError(
                    "domain transformation produced no defined values")
            return new_data.transform(self.domain)

        return data.transform(self.domain)
    def data_to_model_domain(self, data: Table) -> Table:
        if data.domain == self.domain:
            return data

        if self.original_domain.attributes != data.domain.attributes \
                and data.X.size \
                and not all_nan(data.X):
            new_data = data.transform(self.original_domain)
            if all_nan(new_data.X):
                raise DomainTransformationError(
                    "domain transformation produced no defined values")
            return new_data.transform(self.domain)
        return data.transform(self.domain)
Beispiel #4
0
    def data_to_model_domain(
            self,
            data: Table,
            progress_callback: Callable = dummy_callback) -> Table:
        """
        Transforms data to the model domain if possible.

        Parameters
        ----------
        data
            Data to be transformed to the model domain
        progress_callback
            Callback - callable - to report the progress

        Returns
        -------
        Transformed data table

        Raises
        ------
        DomainTransformationError
            Error indicates that transformation is not possible since domains
            are not compatible
        """
        if data.domain == self.domain:
            return data

        progress_callback(0)
        if self.original_domain.attributes != data.domain.attributes \
                and data.X.size \
                and not all_nan(data.X):
            progress_callback(0.5)
            new_data = data.transform(self.original_domain)
            if all_nan(new_data.X):
                raise DomainTransformationError(
                    "domain transformation produced no defined values")
            progress_callback(0.75)
            data = new_data.transform(self.domain)
            progress_callback(1)
            return data

        progress_callback(0.5)
        data = data.transform(self.domain)
        progress_callback(1)
        return data
Beispiel #5
0
    def data_to_model_domain(self, data: Table, progress_callback: Callable) \
            -> Table:
        if data.domain == self.domain:
            return data

        progress_callback(0)
        if self.original_domain.attributes != data.domain.attributes \
                and data.X.size \
                and not all_nan(data.X):
            progress_callback(0.5)
            new_data = data.transform(self.original_domain)
            if all_nan(new_data.X):
                raise DomainTransformationError(
                    "domain transformation produced no defined values")
            progress_callback(0.75)
            data = new_data.transform(self.domain)
            progress_callback(1)
            return data

        progress_callback(0.5)
        data = data.transform(self.domain)
        progress_callback(1)
        return data
Beispiel #6
0
    def __call__(self, data):
        def fix_dim(x):
            return x[0] if one_d else x

        one_d = False
        if isinstance(data, np.ndarray):
            one_d = data.ndim == 1
            prediction = self.predict(np.atleast_2d(data))
        elif isinstance(data, scipy.sparse.csr_matrix) or \
                isinstance(data, scipy.sparse.csc_matrix):
            prediction = self.predict(data)
        elif isinstance(data, (Table, Instance)):
            if isinstance(data, Instance):
                data = Table.from_list(data.domain, [data])
                one_d = True
            if data.domain != self.domain:
                if self.original_domain.attributes != data.domain.attributes \
                        and data.X.size \
                        and not np.isnan(data.X).all():
                    data = data.transform(self.original_domain)
                    if np.isnan(data.X).all():
                        raise DomainTransformationError(
                            "domain transformation produced no defined values")
                data = data.transform(self.domain)
            prediction = self.predict(data.X)
        elif isinstance(data, (list, tuple)):
            if not isinstance(data[0], (list, tuple)):
                data = [data]
                one_d = True
            data = Table.from_list(self.original_domain, data)
            data = data.transform(self.domain)
            prediction = self.predict(data.X)
        else:
            raise TypeError("Unrecognized argument (instance of '{}')".format(
                type(data).__name__))

        return fix_dim(prediction)
Beispiel #7
0
    def __call__(self, data, ret=Value):
        if not 0 <= ret <= 2:
            raise ValueError("invalid value of argument 'ret'")
        if ret > 0 and any(v.is_continuous for v in self.domain.class_vars):
            raise ValueError("cannot predict continuous distributions")

        # Call the predictor
        if isinstance(data, np.ndarray):
            prediction = self.predict(np.atleast_2d(data))
        elif isinstance(data, scipy.sparse.csr.csr_matrix):
            prediction = self.predict(data)
        elif isinstance(data, (Table, Instance)):
            if isinstance(data, Instance):
                data = Table(data.domain, [data])
            if data.domain != self.domain:
                if self.original_domain.attributes != data.domain.attributes \
                        and data.X.size \
                        and not np.isnan(data.X).all():
                    data = data.transform(self.original_domain)
                    if np.isnan(data.X).all():
                        raise DomainTransformationError(
                            "domain transformation produced no defined values")
                data = data.transform(self.domain)
            prediction = self.predict_storage(data)
        elif isinstance(data, (list, tuple)):
            if not isinstance(data[0], (list, tuple)):
                data = [data]
            data = Table(self.original_domain, data)
            data = data.transform(self.domain)
            prediction = self.predict_storage(data)
        else:
            raise TypeError("Unrecognized argument (instance of '{}')".format(
                type(data).__name__))

        # Parse the result into value and probs
        multitarget = len(self.domain.class_vars) > 1
        if isinstance(prediction, tuple):
            value, probs = prediction
        elif prediction.ndim == 1 + multitarget:
            value, probs = prediction, None
        elif prediction.ndim == 2 + multitarget:
            value, probs = None, prediction
        else:
            raise TypeError("model returned a %i-dimensional array",
                            prediction.ndim)

        # Ensure that we have what we need to return
        if ret != Model.Probs and value is None:
            value = np.argmax(probs, axis=-1)
        if ret != Model.Value and probs is None:
            if multitarget:
                max_card = max(len(c.values) for c in self.domain.class_vars)
                probs = np.zeros(value.shape + (max_card, ), float)
                for i in range(len(self.domain.class_vars)):
                    probs[:, i, :] = one_hot(value[:, i])
            else:
                probs = one_hot(value)
            if ret == Model.ValueProbs:
                return value, probs
            else:
                return probs

        # Return what we need to
        if ret == Model.Probs:
            return probs
        if isinstance(data, Instance) and not multitarget:
            value = Value(self.domain.class_var, value[0])
        if ret == Model.Value:
            return value
        else:  # ret == Model.ValueProbs
            return value, probs