Beispiel #1
0
 def _check_roles(self):
     """
     Checks the consistency between defined roles and supported roles.
     """
     if not hasattr(self, '_entrypoint'):
         raise SystemExit(
             'One internal learner does not follow the new syntax.')
     params = signature(self._entrypoint).parameters
     for role in DataRoles._allowed:
         attr = DataRoles.to_attribute(role)
         if hasattr(self, attr) and getattr(self, attr) is not None and \
                 attr not in params:
             if role == Role.Label:
                 # warnings instead of an exception but we should
                 # really simplify the logic
                 # in experiment.py. The model should know which
                 # roles it supports.
                 # current code makes it difficult to guess.
                 # A minor modification in entrypoints.py should do the
                 # trick.
                 if self.type not in {"clusterer", "anomaly"}:
                     warnings.warn(
                         "Model '{0}' (type='{1}') does not support "
                         "role '{2}' (for developers, check "
                         "_allowed_roles is defined).".format(
                             type(self), self.type, role))
             else:
                 raise RuntimeError(
                     "Model '{0}' (type='{1}') does not support role "
                     "'{2}' (for developers, check _allowed_roles is "
                     "defined).".format(type(self), self.type, role))
Beispiel #2
0
    def _handle_extra_syntax_parameters(self, params):
        """
        Handles extra parameters given to the constructor such as
        *columns* or a role.
        """

        # remove column_ for roles
        def clean_name(name):
            return DataRoles._allowed_attr.get(name, name)

        set_params = set(map(clean_name, params))

        # Checks that extra parameters are allowed.
        sign = signature(self.__class__.__init__)
        allowed = set(sign.parameters)

        notin = set_params - allowed - \
            BasePipelineItem._hidden_constructor_arguments
        if len(notin) > 0:
            allowed = "\n".join(
                wrap(", ".join(sorted(filter(lambda _: _ != 'self',
                                             allowed)))))
            if len(notin) == 1:
                raise NameError("Parameter '{0}' is not allowed for class '{"
                                "1}'.\nAllowed: {2}".format(
                                    list(sorted(notin))[0],
                                    self.__class__.__name__, allowed))
            else:
                raise NameError("Parameters {0} are not allowed for class '{"
                                "1}'.\nAllowed: {2}".format(
                                    sorted(notin), self.__class__.__name__,
                                    allowed))

        # Handles parameters columns.
        inputs = OrderedDict()
        cols = params.pop('columns', None)
        if cols:
            if isinstance(cols, dict):
                inputs.update(cols)
            else:
                self.set_inputs(cols, early=True)

        for role in DataRoles._allowed:
            name = DataRoles.to_attribute(role)
            if name in params:
                if cols is not None and role in cols and params[name] != \
                        cols[role]:
                    raise AttributeError(
                        "Attribute '{0}' is already set to '{1}', "
                        "cannot be replaced by '{2}'".format(
                            name, cols[role], params[name]))
                attr = DataRoles.to_attribute(role)
                if attr in allowed:
                    setattr(self, attr, params[name])
                else:
                    inputs[role] = params[name]
                del params[name]

        if len(inputs) > 0:
            self.set_inputs(inputs, early=True)
Beispiel #3
0
    def get_params(self, deep=True):
        "Scikit-learn API, returns all parameters."

        sig = signature(self.__class__.__init__)
        params = [(p if p != 'columns' else '_columns', p)
                  for p in sig.parameters if p not in ('self', 'params')]
        res = {p: getattr(self, att)
               for att, p in params if hasattr(self, att)}
        if hasattr(self, "_columns") and isinstance(self._columns, dict):
            res['columns'] = self._columns

        return res
Beispiel #4
0
 def _use_input_schema(self):
     """
     Some transforms are using a different API to define inputs and
     outputs.
     (source, name) or (input, output). This methods returns True if
     the first one is used for this object.
     """
     if self._use_only_one_output():
         return 'so'
     if self._use_single_input_as_string():
         return 'si'
     sign = signature(self._entrypoint)
     for p in sign.parameters:
         if p == "source":
             return 'ns'
     return "io"
Beispiel #5
0
 def get_params(self, deep=True):
     "Scikit-learn API with same params, returns all parameters."
     sig = signature(self.__class__.__init__)
     params = [(p if p != 'columns' else '_columns', p)
               for p in sig.parameters if p not in ('self', 'params')]
     res = {p: getattr(self, att)
            for att, p in params if hasattr(self, att)}
     if hasattr(self, "_columns") and isinstance(self._columns, dict):
         res['columns'] = self._columns
     if self.type != "transform" and 'columns' in res:
         cols = res.pop('columns')
         if isinstance(cols, dict):
             for k, v in cols.items():
                 k2 = Role.to_attribute(k, "")
                 res[k2] = v
         else:
             res['feature'] = cols
     return res
Beispiel #6
0
 def __init__(self, type=None, random_state=None, **params):
     # The consctuctor is usually called twice.
     # First time from BaseSomething like BaseTransform.
     # Second from internal classes.
     if hasattr(self, '_BasePipelineItem_already_called'):
         return
     self._BasePipelineItem_already_called = True
     if type is None:
         raise ValueError("Type must be defined.")
     self.type = type
     if 'schema' in params:
         raise RuntimeError("Schema not allowed.")
     if 'input' in params:
         raise RuntimeError("Input not allowed.")
     if 'output' in params:
         raise RuntimeError("Output not allowed.")
     if 'columns' in params and type != 'transform' and params[
             "columns"] is not None:
         raise RuntimeError(
             "Predictor use arguements feature, label to defined "
             "roles, argument columns is not allowed.")
     self.random_state = random_state
     # It assumes all columns are used as input.
     self.input = None
     # Default options for output columns. Depends on the model.
     self.output = None
     sig_params = signature(self._entrypoint).parameters
     self._allowed_roles = set(
         r for r in DataRoles._allowed if
         Role.to_attribute(r) in sig_params)
     # Basic checking on parameters.
     for k, v in params.items():
         if '_num_' in k and not isinstance(v, (int, float)):
             raise TypeError(
                 "Parameter '{0}' is not numeric but {1}.".format(
                     k, type(v)))
     self._handle_extra_syntax_parameters(params)
Beispiel #7
0
    def set_inputs(self, inp, early=False):
        """
        Change the input columns.

        :param inp: inputs (dictionary, list, str, tuple,
        see `Columns </nimbusml/concepts/columns>`_)
        :param early: set inputs from the constructor, object type is
        unknown
        """
        if isinstance(inp, (list, tuple, dict)):
            if len(inp) == 0:
                raise ValueError("inp is empty")
        elif inp in (None, ''):
            raise ValueError("inp is empty")
        if self.type not in ('transform', None):
            if isinstance(inp, dict):
                return self._set_role(inp)
            elif isinstance(inp, (str, tuple)):
                return self._set_role(inp, 'Feature')
        elif isinstance(inp, dict) and self._use_role_except_feature():
            inp = inp.copy()
            for k in DataRoles._allowed:
                if k in inp and self._use_role(k):
                    self._set_role(inp[k], role=k)
                    del inp[k]
            if len(inp) == 0:
                return self

        if not early and self.type != 'transform' and not self._use_role(
                'Feature'):
            raise RuntimeError(
                "This learner (type: '{0}') does not use role "
                "'Feature'.\nentrypoint={1}\nparams={2}".format(
                    self.type, self._entrypoint,
                    ", ".join(sorted(signature(self._entrypoint).parameters))))

        if self._use_input_schema() == "ns":
            # Couple source, name
            attr = 'source'
            if isinstance(inp, (str, tuple)):
                self._add_attribute(attr, inp)
                self._set_outputs(inp)
            elif isinstance(inp, list):
                if len(inp) != 1:
                    raise RuntimeError(
                        "Only one column is allowed for '{0}'.".format(
                            type(self)))
                self._add_attribute(attr, inp[0])
                self._set_outputs(inp[0])
            elif isinstance(inp, dict):
                if len(inp) != 1:
                    raise RuntimeError(
                        "Only one input is allowed for '{0}'.".format(
                            type(self)))
                key = list(inp.keys())[0]
                value = inp[key]
                if isinstance(value, list):
                    if len(value) != 1:
                        raise RuntimeError(
                            "Only one input is allowed for '{0}'.".format(
                                type(self)))
                    value = value[0]
                if not isinstance(value, (str, tuple)):
                    raise RuntimeError(
                        "'{0}' only accepts one input given as string or "
                        "tuple.".format(type(self)))
                setattr(self, attr, value)
                self._set_outputs(key)
            else:
                self._add_attribute(attr, inp)
                raise NotImplementedError(
                    "Type '{0}' is not supported.".format(type(inp)))
            if not isinstance(getattr(self, attr), (str, tuple)):
                raise TypeError(
                    "Unable to convert input into a string or a tuple: {"
                    "0}".format(type(getattr(self, attr))))

        elif self._use_multi_output():
            # Couple input, output
            attr = 'input'
            if isinstance(inp, dict):
                couples = [(k, v) for k, v in inp.items()]
                self._add_attribute(attr, [v for k, v in couples])
                self._set_outputs([k for k, v in couples])
            elif isinstance(inp, list):
                res = []
                is_string_or_tuple = False
                for i, v in enumerate(inp):
                    if isinstance(v, list) and not is_string_or_tuple:
                        res.append(v)
                    elif isinstance(v, (
                            DataStream, ViewDataStream)) and \
                            not is_string_or_tuple:
                        res.append([c.Name for c in inp.schema])
                    elif isinstance(v, (str, tuple)):
                        is_string_or_tuple = True
                        res.append(v)
                    else:
                        raise TypeError(
                            "Unexpected type for input {0}".format(i))
                if is_string_or_tuple:
                    self._add_attribute(attr, [res])
                else:
                    self._add_attribute(attr, res)
            else:
                self._add_attribute(attr, inp)
            if not isinstance(getattr(self, attr), list):
                raise TypeError(
                    "Unable to convert input into a list: {0}".format(
                        type(getattr(self, attr))))
            for i, inp in enumerate(getattr(self, attr)):
                if not isinstance(inp, list):
                    raise TypeError("Input {0} is not a list but: {1}".format(
                        i, type(inp)))
        else:
            attr = 'input'
            if isinstance(inp, (str, tuple)):
                # tuple for MultiIndexColumn
                self._add_attribute(attr, [inp], input=True)
                self._set_outputs([inp])
            elif isinstance(inp, dict):
                couples = [(k, v) for k, v in inp.items()]
                self._add_attribute(attr, [v for k, v in couples], input=True)
                self._set_outputs([k for k, v in couples])
            elif isinstance(inp, list):
                self._add_attribute(attr, inp, input=True)
                if self._use_unique_default_output_is_feature(
                ) and len(inp) != 1:
                    raise RuntimeError(
                        "The transform only allows only output, "
                        "use a dictionary to specify its name.")
                else:
                    self._set_outputs(inp)
            else:
                raise TypeError("Unexpected type for inp: {0}".format(
                    type(inp)))

        # Needed for learner. % is also used to define feature roles.
        if self.type in {
                'classifier', 'regressor', 'ranker', 'clustering', 'anomaly'
        }:
            self.feature_column_name = getattr(self, attr)
            if not isinstance(self.feature_column_name, (str, tuple)):
                if isinstance(self.feature_column_name, list):
                    if len(self.feature_column_name) == 1:
                        self.feature_column_name = self.feature_column_name[0]
                    else:
                        # Experiment will merge them.
                        # raise RuntimeError("Too many feature columns.
                        # Use ConcatTransform to merge them: "
                        #     " ConcatTransform() % {0} >
                        # Role.Feature".format(self.feature_column_name))
                        pass
                else:
                    raise TypeError(
                        "Feature column type is unexpected: {0}".format(
                            type(self.feature_column_name)))

        self._attr_input = attr
        self._check_inputs()
        return self