def __call__(self, data, variable): variable = data.domain[variable] if variable.is_discrete: fmt = "{var.name}" value = "N/A" var = Orange.data.DiscreteVariable( fmt.format(var=variable), values=variable.values + (value, ), compute_value=Lookup(variable, np.arange(len(variable.values), dtype=int), unknown=len(variable.values)), sparse=variable.sparse, ) return var elif variable.is_continuous: fmt = "{var.name}_def" indicator_var = Orange.data.DiscreteVariable( fmt.format(var=variable), values=("undef", "def"), compute_value=IsDefined(variable), sparse=variable.sparse, ) stats = basic_stats.BasicStats(data, variable) return (variable.copy( compute_value=ReplaceUnknowns(variable, stats.mean)), indicator_var) else: raise TypeError(type(variable))
def column_imputer_as_value(variable, table): if isinstance(variable, Orange.data.DiscreteVariable): fmt = "{var.name}" value = "N/A" var = Orange.data.DiscreteVariable( fmt.format(var=variable), values=variable.values + [value], base_value=variable.base_value ) var.get_value_from = Lookup( variable, numpy.arange(len(variable.values), dtype=int), unknown=len(variable.values) ) codomain = [var] transformers = [var.get_value_from] elif isinstance(variable, Orange.data.ContinuousVariable): fmt = "{var.name}_def" var = Orange.data.DiscreteVariable( fmt.format(var=variable), values=("undef", "def"), ) var.get_value_from = IsDefined(variable) codomain = [variable, var] stats = basic_stats.BasicStats(table, variable) transformers = [ReplaceUnknowns(variable, stats.mean), var.get_value_from] else: raise TypeError(type(variable)) return ColumnImputerAsValue( table.domain, Orange.data.Domain(codomain), transformers)
def __call__(self, data, variable, value=None): variable = data.domain[variable] if value is None: if variable.is_continuous: stats = basic_stats.BasicStats(data, variable) value = stats.mean elif variable.is_discrete: dist = distribution.get_distribution(data, variable) value = dist.modus() else: raise TypeError("Variable must be continuous or discrete") return variable.copy(compute_value=ReplaceUnknowns(variable, value))
def __call__(self, data, variable): if is_continuous(variable): stats = basic_stats.BasicStats(data, variable) value = stats.mean elif is_discrete(variable): dist = distribution.get_distribution(data, variable) value = dist.modus() else: raise TypeError var = copy.copy(variable) var.compute_value = ReplaceUnknowns(variable, value) return var
def column_imputer_average(variable, table): stats = basic_stats.BasicStats(table, variable) return column_imputer_defaults(variable, table, stats.mean)
def column_imputer_minimal(variable, table): stats = basic_stats.BasicStats(table, variable) return column_imputer_defaults(variable, table, stats.min)