def _check_roles(self): """ Checks the consistency between defined roles and supported roles. """ if not hasattr(self, '_entrypoint'): raise SystemExit( 'One internal learner does not follow the new syntax.') params = signature(self._entrypoint).parameters for role in DataRoles._allowed: attr = DataRoles.to_attribute(role) if hasattr(self, attr) and getattr(self, attr) is not None and \ attr not in params: if role == Role.Label: # warnings instead of an exception but we should # really simplify the logic # in experiment.py. The model should know which # roles it supports. # current code makes it difficult to guess. # A minor modification in entrypoints.py should do the # trick. if self.type not in {"clusterer", "anomaly"}: warnings.warn( "Model '{0}' (type='{1}') does not support " "role '{2}' (for developers, check " "_allowed_roles is defined).".format( type(self), self.type, role)) else: raise RuntimeError( "Model '{0}' (type='{1}') does not support role " "'{2}' (for developers, check _allowed_roles is " "defined).".format(type(self), self.type, role))
def _handle_extra_syntax_parameters(self, params): """ Handles extra parameters given to the constructor such as *columns* or a role. """ # remove column_ for roles def clean_name(name): return DataRoles._allowed_attr.get(name, name) set_params = set(map(clean_name, params)) # Checks that extra parameters are allowed. sign = signature(self.__class__.__init__) allowed = set(sign.parameters) notin = set_params - allowed - \ BasePipelineItem._hidden_constructor_arguments if len(notin) > 0: allowed = "\n".join( wrap(", ".join(sorted(filter(lambda _: _ != 'self', allowed))))) if len(notin) == 1: raise NameError("Parameter '{0}' is not allowed for class '{" "1}'.\nAllowed: {2}".format( list(sorted(notin))[0], self.__class__.__name__, allowed)) else: raise NameError("Parameters {0} are not allowed for class '{" "1}'.\nAllowed: {2}".format( sorted(notin), self.__class__.__name__, allowed)) # Handles parameters columns. inputs = OrderedDict() cols = params.pop('columns', None) if cols: if isinstance(cols, dict): inputs.update(cols) else: self.set_inputs(cols, early=True) for role in DataRoles._allowed: name = DataRoles.to_attribute(role) if name in params: if cols is not None and role in cols and params[name] != \ cols[role]: raise AttributeError( "Attribute '{0}' is already set to '{1}', " "cannot be replaced by '{2}'".format( name, cols[role], params[name])) attr = DataRoles.to_attribute(role) if attr in allowed: setattr(self, attr, params[name]) else: inputs[role] = params[name] del params[name] if len(inputs) > 0: self.set_inputs(inputs, early=True)
def get_params(self, deep=True): "Scikit-learn API, returns all parameters." sig = signature(self.__class__.__init__) params = [(p if p != 'columns' else '_columns', p) for p in sig.parameters if p not in ('self', 'params')] res = {p: getattr(self, att) for att, p in params if hasattr(self, att)} if hasattr(self, "_columns") and isinstance(self._columns, dict): res['columns'] = self._columns return res
def _use_input_schema(self): """ Some transforms are using a different API to define inputs and outputs. (source, name) or (input, output). This methods returns True if the first one is used for this object. """ if self._use_only_one_output(): return 'so' if self._use_single_input_as_string(): return 'si' sign = signature(self._entrypoint) for p in sign.parameters: if p == "source": return 'ns' return "io"
def get_params(self, deep=True): "Scikit-learn API with same params, returns all parameters." sig = signature(self.__class__.__init__) params = [(p if p != 'columns' else '_columns', p) for p in sig.parameters if p not in ('self', 'params')] res = {p: getattr(self, att) for att, p in params if hasattr(self, att)} if hasattr(self, "_columns") and isinstance(self._columns, dict): res['columns'] = self._columns if self.type != "transform" and 'columns' in res: cols = res.pop('columns') if isinstance(cols, dict): for k, v in cols.items(): k2 = Role.to_attribute(k, "") res[k2] = v else: res['feature'] = cols return res
def __init__(self, type=None, random_state=None, **params): # The consctuctor is usually called twice. # First time from BaseSomething like BaseTransform. # Second from internal classes. if hasattr(self, '_BasePipelineItem_already_called'): return self._BasePipelineItem_already_called = True if type is None: raise ValueError("Type must be defined.") self.type = type if 'schema' in params: raise RuntimeError("Schema not allowed.") if 'input' in params: raise RuntimeError("Input not allowed.") if 'output' in params: raise RuntimeError("Output not allowed.") if 'columns' in params and type != 'transform' and params[ "columns"] is not None: raise RuntimeError( "Predictor use arguements feature, label to defined " "roles, argument columns is not allowed.") self.random_state = random_state # It assumes all columns are used as input. self.input = None # Default options for output columns. Depends on the model. self.output = None sig_params = signature(self._entrypoint).parameters self._allowed_roles = set( r for r in DataRoles._allowed if Role.to_attribute(r) in sig_params) # Basic checking on parameters. for k, v in params.items(): if '_num_' in k and not isinstance(v, (int, float)): raise TypeError( "Parameter '{0}' is not numeric but {1}.".format( k, type(v))) self._handle_extra_syntax_parameters(params)
def set_inputs(self, inp, early=False): """ Change the input columns. :param inp: inputs (dictionary, list, str, tuple, see `Columns </nimbusml/concepts/columns>`_) :param early: set inputs from the constructor, object type is unknown """ if isinstance(inp, (list, tuple, dict)): if len(inp) == 0: raise ValueError("inp is empty") elif inp in (None, ''): raise ValueError("inp is empty") if self.type not in ('transform', None): if isinstance(inp, dict): return self._set_role(inp) elif isinstance(inp, (str, tuple)): return self._set_role(inp, 'Feature') elif isinstance(inp, dict) and self._use_role_except_feature(): inp = inp.copy() for k in DataRoles._allowed: if k in inp and self._use_role(k): self._set_role(inp[k], role=k) del inp[k] if len(inp) == 0: return self if not early and self.type != 'transform' and not self._use_role( 'Feature'): raise RuntimeError( "This learner (type: '{0}') does not use role " "'Feature'.\nentrypoint={1}\nparams={2}".format( self.type, self._entrypoint, ", ".join(sorted(signature(self._entrypoint).parameters)))) if self._use_input_schema() == "ns": # Couple source, name attr = 'source' if isinstance(inp, (str, tuple)): self._add_attribute(attr, inp) self._set_outputs(inp) elif isinstance(inp, list): if len(inp) != 1: raise RuntimeError( "Only one column is allowed for '{0}'.".format( type(self))) self._add_attribute(attr, inp[0]) self._set_outputs(inp[0]) elif isinstance(inp, dict): if len(inp) != 1: raise RuntimeError( "Only one input is allowed for '{0}'.".format( type(self))) key = list(inp.keys())[0] value = inp[key] if isinstance(value, list): if len(value) != 1: raise RuntimeError( "Only one input is allowed for '{0}'.".format( type(self))) value = value[0] if not isinstance(value, (str, tuple)): raise RuntimeError( "'{0}' only accepts one input given as string or " "tuple.".format(type(self))) setattr(self, attr, value) self._set_outputs(key) else: self._add_attribute(attr, inp) raise NotImplementedError( "Type '{0}' is not supported.".format(type(inp))) if not isinstance(getattr(self, attr), (str, tuple)): raise TypeError( "Unable to convert input into a string or a tuple: {" "0}".format(type(getattr(self, attr)))) elif self._use_multi_output(): # Couple input, output attr = 'input' if isinstance(inp, dict): couples = [(k, v) for k, v in inp.items()] self._add_attribute(attr, [v for k, v in couples]) self._set_outputs([k for k, v in couples]) elif isinstance(inp, list): res = [] is_string_or_tuple = False for i, v in enumerate(inp): if isinstance(v, list) and not is_string_or_tuple: res.append(v) elif isinstance(v, ( DataStream, ViewDataStream)) and \ not is_string_or_tuple: res.append([c.Name for c in inp.schema]) elif isinstance(v, (str, tuple)): is_string_or_tuple = True res.append(v) else: raise TypeError( "Unexpected type for input {0}".format(i)) if is_string_or_tuple: self._add_attribute(attr, [res]) else: self._add_attribute(attr, res) else: self._add_attribute(attr, inp) if not isinstance(getattr(self, attr), list): raise TypeError( "Unable to convert input into a list: {0}".format( type(getattr(self, attr)))) for i, inp in enumerate(getattr(self, attr)): if not isinstance(inp, list): raise TypeError("Input {0} is not a list but: {1}".format( i, type(inp))) else: attr = 'input' if isinstance(inp, (str, tuple)): # tuple for MultiIndexColumn self._add_attribute(attr, [inp], input=True) self._set_outputs([inp]) elif isinstance(inp, dict): couples = [(k, v) for k, v in inp.items()] self._add_attribute(attr, [v for k, v in couples], input=True) self._set_outputs([k for k, v in couples]) elif isinstance(inp, list): self._add_attribute(attr, inp, input=True) if self._use_unique_default_output_is_feature( ) and len(inp) != 1: raise RuntimeError( "The transform only allows only output, " "use a dictionary to specify its name.") else: self._set_outputs(inp) else: raise TypeError("Unexpected type for inp: {0}".format( type(inp))) # Needed for learner. % is also used to define feature roles. if self.type in { 'classifier', 'regressor', 'ranker', 'clustering', 'anomaly' }: self.feature_column_name = getattr(self, attr) if not isinstance(self.feature_column_name, (str, tuple)): if isinstance(self.feature_column_name, list): if len(self.feature_column_name) == 1: self.feature_column_name = self.feature_column_name[0] else: # Experiment will merge them. # raise RuntimeError("Too many feature columns. # Use ConcatTransform to merge them: " # " ConcatTransform() % {0} > # Role.Feature".format(self.feature_column_name)) pass else: raise TypeError( "Feature column type is unexpected: {0}".format( type(self.feature_column_name))) self._attr_input = attr self._check_inputs() return self