def __call__(self, data): """ Compute and apply discretization of the given data. Returns a new data table. Parameters ---------- data : Orange.data.Table A data table to be discretized. """ def transform(var): if var.is_continuous: new_var = method(data, var) if new_var is not None and \ (len(new_var.values) >= 2 or not self.remove_const): return new_var else: return None else: return var def discretized(vars, do_discretize): if do_discretize: vars = (transform(var) for var in vars) vars = [var for var in vars if var is not None] return vars method = self.method or discretize.EqualFreq() domain = Orange.data.Domain( discretized(data.domain.attributes, True), discretized(data.domain.class_vars, self.discretize_classes), discretized(data.domain.metas, self.discretize_metas)) return data.transform(domain)
def __call__(self, data): if self.center is None and self.scale is None: return data def transform(var): dist = distribution.get_distribution(data, var) if self.center != self.NoCentering: c = self.center(dist) dist[0, :] -= c else: c = 0 if self.scale != self.NoScaling: s = self.scale(dist) if s < 1e-15: s = 1 else: s = 1 factor = 1 / s transformed_var = var.copy( compute_value=transformation.Normalizer(var, c, factor)) if s != 1: transformed_var.number_of_decimals = 3 return transformed_var newvars = [] for var in data.domain.attributes: if var.is_continuous: newvars.append(transform(var)) else: newvars.append(var) domain = Orange.data.Domain(newvars, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X replace_infs(data.X) elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance( data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X replace_infs(data.X) nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def __call__(self, data, axis=0): if data.domain is not self.domain: data = data.transform(self.domain) Xt = self.proj.transform(data.X, axis) if axis == 0: def cur_variable(i): var = data.domain[i] return var.copy(compute_value=Projector(self, i)) domain = Orange.data.Domain( [cur_variable(org_idx) for org_idx in self.features_], class_vars=data.domain.class_vars, ) transformed_data = Orange.data.Table(domain, Xt, data.Y) elif axis == 1: Y = data.Y[self.proj.samples_] metas = data.metas[self.proj.samples_] transformed_data = Orange.data.Table(data.domain, Xt, Y, metas=metas) else: raise TypeError( "CUR can select either columns " "(axis = 0) or rows (axis = 1)." ) return transformed_data
def __call__(self, data): data = self.transform_domain(data) if "edge_jump" in data.domain: edges = data.transform(Orange.data.Domain([data.domain["edge_jump"]])) I_jumps = edges.X[:, 0] else: raise NoEdgejumpProvidedException( 'Invalid meta data: Intensity jump at edge is missing') # order X by wavenumbers: # xs non ordered energies # xsind - indecies corresponding to the ordered energies # mon = True # X spectra as corresponding to the ordered energies xs, xsind, mon, X = transform_to_sorted_features(data) # for the missing data X, nans = nan_extend_edges_and_interpolate(xs[xsind], X) # TODO notify the user if some unknown values were interpolated # do the transformation X = self.transformed(X, xs[xsind], I_jumps) # k scores are always ordered, so do not restore order return X
def __call__(self, data): data = self.transform_domain(data) if "edge_jump" in data.domain: edges = data.transform(Orange.data.Domain([data.domain["edge_jump"]])) I_jumps = edges.X[:, 0] else: raise NoEdgejumpProvidedException( 'Invalid meta data: Intensity jump at edge is missing') # order X by wavenumbers: # xs non ordered energies # xsind - indecies corresponding to the ordered energies # mon = True # X spectra as corresponding to the ordered energies xs, xsind, mon, X = transform_to_sorted_features(data) # for the missing data X, nans = nan_extend_edges_and_interpolate(xs[xsind], X) # TODO notify the user if some unknown values were interpolated # Replace remaining NaNs (where whole rows were NaN) with # with some values so that the function does not crash. # Results are going to be discarded later. nan_rows = np.isnan(X).all(axis=1) X[nan_rows] = 1. # do the transformation X = self.transformed(X, xs[xsind], I_jumps) # discard nan rows X[nan_rows] = np.nan # k scores are always ordered, so do not restore order return X
def __call__(self, data): common = _NormalizeReferenceCommon(self.reference, data.domain) atts = [a.copy(compute_value=NormalizeFeature(i, common)) for i, a in enumerate(data.domain.attributes)] domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data): common = _NormalizePhaseReferenceCommon(self.reference, data.domain) atts = [a.copy(compute_value=NormalizeFeature(i, common)) for i, a in enumerate(data.domain.attributes)] domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data): threshold = self.threshold if self.threshold < 1: threshold *= data.X.shape[0] if self.filter0: if sp.issparse(data.X): data_csc = sp.csc_matrix(data.X) h, w = data_csc.shape sparseness = [ h - data_csc[:, i].count_nonzero() for i in range(w) ] else: sparseness = data.X.shape[0] - np.count_nonzero(data.X, axis=0) else: # filter by nans if sp.issparse(data.X): data_csc = sp.csc_matrix(data.X) sparseness = [ np.sum(np.isnan(data.X[:, i].data)) for i in range(data_csc.shape[1]) ] else: sparseness = np.sum(np.isnan(data.X), axis=0) att = [ a for a, s in zip(data.domain.attributes, sparseness) if s <= threshold ] domain = Orange.data.Domain(att, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def transformed(self, data): if data.X.shape[0] == 0: return data.X data = data.copy() if self.method == Normalize.Vector: nans = np.isnan(data.X) nan_num = nans.sum(axis=1, keepdims=True) ys = data.X if np.any(nan_num > 0): # interpolate nan elements for normalization x = getx(data) ys = interp1d_with_unknowns_numpy(x, ys, x) ys = np.nan_to_num(ys) # edge elements can still be zero data.X = sknormalize(ys, norm='l2', axis=1, copy=False) if np.any(nan_num > 0): # keep nans where they were data.X[nans] = float("nan") elif self.method == Normalize.Area: norm_data = Integrate(methods=self.int_method, limits=[[self.lower, self.upper]])(data) data.X /= norm_data.X elif self.method == Normalize.Attribute: if self.attr in data.domain and isinstance(data.domain[self.attr], Orange.data.ContinuousVariable): ndom = Orange.data.Domain([data.domain[self.attr]]) factors = data.transform(ndom) data.X /= factors.X nd = data.domain[self.attr] else: # invalid attribute for normalization data.X *= float("nan") return data.X
def __call__(self, data): if data.X.shape[1] > 0: # --- compute K energies = np.sort(getx(data)) # input data can be in any order start_idx, end_idx = extra_exafs.get_idx_bounds( energies, self.edge, self.extra_from, self.extra_to) k_interp, k_points = extra_exafs.get_K_points( energies, self.edge, start_idx, end_idx) # ---------- common = _ExtractEXAFSCommon(self.edge, self.extra_from, self.extra_to, self.poly_deg, self.kweight, self.m, k_interp, data.domain) newattrs = [ ContinuousVariable(name=str(var), compute_value=ExtractEXAFSFeature( i, common)) for i, var in enumerate(k_interp) ] else: newattrs = [] domain = Orange.data.Domain(newattrs, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data): from . import continuize continuizer = continuize.DomainContinuizer( zero_based=self.zero_based, multinomial_treatment=self.multinomial_treatment) domain = continuizer(data) return data.transform(domain)
def __call__(self, data): X = _InterpolateCommon(self.points, self.kind, None, handle_nans=self.handle_nans, interpfn=self.interpfn)(data) domain = Orange.data.Domain(self.target.domain.attributes, data.domain.class_vars, data.domain.metas) data = data.transform(domain) data.X = X return data
def add_meta_to_table(data, var, values): """ Take an existing Table and add a meta variable """ metas = data.domain.metas + (var,) newdomain = Domain(data.domain.attributes, data.domain.class_vars, metas) newtable = data.transform(newdomain) newtable[:, var] = np.atleast_1d(values).reshape(-1, 1) return newtable
def __call__(self, data, variable): variable = data.domain[variable] domain = domain_with_class_var(data.domain, variable) if self.learner.check_learner_adequacy(domain): data = data.transform(domain) model = self.learner(data) assert model.domain.class_var == variable return variable.copy( compute_value=ReplaceUnknownsModel(variable, model)) else: raise ValueError("`{}` doesn't support domain type" .format(self.learner.name))
def image_values(self): if self.value_type == 0: # integrals imethod = self.integration_methods[self.integration_method] if imethod != Integrate.PeakAt: return Integrate(methods=imethod, limits=[[self.lowlim, self.highlim]]) else: return Integrate(methods=imethod, limits=[[self.choose, self.choose]]) else: return lambda data, attr=self.attr_value: \ data.transform(Domain([data.domain[attr]]))
def __call__(self, data, variable): variable = data.domain[variable] domain = domain_with_class_var(data.domain, variable) if self.learner.check_learner_adequacy(domain): data = data.transform(domain) model = self.learner(data) assert model.domain.class_var == variable return variable.copy( compute_value=ReplaceUnknownsModel(variable, model)) else: raise ValueError("`{}` doesn't support domain type".format( self.learner.name))
def __call__(self, data, variable): variable = data.domain[variable] domain = domain_with_class_var(data.domain, variable) incompatibility_reason = self.learner.incompatibility_reason(domain) if incompatibility_reason is None: data = data.transform(domain) model = self.learner(data) assert model.domain.class_var == variable return variable.copy( compute_value=ReplaceUnknownsModel(variable, model)) else: raise ValueError("`{}` doesn't support domain type".format( self.learner.name))
def __call__(self, data): if sp.issparse(data.X): data_csc = sp.csc_matrix(data.X) h, w = data_csc.shape sparsness = [data_csc[:, i].count_nonzero() / h for i in range(w)] else: sparsness = np.count_nonzero(data.X, axis=0) / data.X.shape[0] att = [ a for a, s in zip(data.domain.attributes, sparsness) if s >= self.threshold ] domain = Orange.data.Domain(att, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def commit(self): """ Apply the changes to the input data and send the changed data to output. """ self._set_modified(False) self.Error.duplicate_var_name.clear() data = self.data if data is None: self.Outputs.data.send(None) return model = self.variables_model def state(i): # type: (int) -> Tuple[Variable, List[Transform]] midx = self.variables_model.index(i, 0) return (model.data(midx, Qt.EditRole), model.data(midx, TransformRole)) state = [state(i) for i in range(model.rowCount())] if all(tr is None or not tr for _, tr in state): self.Outputs.data.send(data) return output_vars = [] input_vars = data.domain.variables + data.domain.metas assert all(v_.name == v.name for v, (v_, _) in zip(input_vars, state)) for (_, tr), v in zip(state, input_vars): if tr is not None and len(tr) > 0: var = apply_transform(v, tr) else: var = v output_vars.append(var) if len(output_vars) != len({v.name for v in output_vars}): self.Error.duplicate_var_name() self.Outputs.data.send(None) return domain = data.domain nx = len(domain.attributes) ny = len(domain.class_vars) domain = Orange.data.Domain( output_vars[:nx], output_vars[nx: nx + ny], output_vars[nx + ny:] ) new_data = data.transform(domain) # print(new_data) self.Outputs.data.send(new_data)
def __call__(self, data): """ Remove columns with constant values from the dataset and return the resulting data table. Parameters ---------- data : an input dataset """ oks = bn.nanmin(data.X, axis=0) != bn.nanmax(data.X, axis=0) atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok] domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data, threshold=None): # missing entries in sparse data are treated as zeros so we skip removing NaNs if sp.issparse(data.X): return data if threshold is None: threshold = data.X.shape[0] if self.threshold is None else \ self.threshold if isinstance(threshold, float): threshold = threshold * data.X.shape[0] nans = np.sum(np.isnan(data.X), axis=0) att = [a for a, n in zip(data.domain.attributes, nans) if n < threshold] domain = Orange.data.Domain(att, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data): """ Apply an imputation method to the given dataset. Returns a new data table with missing values replaced by their imputations. Parameters ---------- data : Orange.data.Table An input data table. """ method = self.method or impute.Average() newattrs = [method(data, var) for var in data.domain.attributes] domain = Orange.data.Domain(newattrs, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data): """ Remove columns with constant values from the dataset and return the resulting data table. Parameters ---------- data : an input dataset """ oks = np.logical_and(~bn.allnan(data.X, axis=0), bn.nanmin(data.X, axis=0) != bn.nanmax(data.X, axis=0)) atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok] domain = Orange.data.Domain(atts, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data): """ Apply an imputation method to the given dataset. Returns a new data table with missing values replaced by their imputations. Parameters ---------- data : Orange.data.Table An input data table. """ method = self.method or impute.Average() newattrs = [method(data, var) for var in data.domain.attributes] domain = Orange.data.Domain( newattrs, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data): if isinstance(data, Orange.data.Instance): data = Orange.data.Table.from_list(data.domain, [data]) domain = data.domain column = np.array(data.get_column_view(self.variable)[0], copy=True) mask = np.isnan(column) if not np.any(mask): return column if domain.class_vars: # cannot have class var in domain (due to backmappers in model) data = data.transform( Orange.data.Domain(domain.attributes, None, domain.metas)) predicted = self.model(data[mask]) column[mask] = predicted return column
def __call__(self, data): from Orange.data.sql.table import SqlTable if isinstance(data, SqlTable): return Impute()(data) imputer = skl_preprocessing.Imputer(strategy=self.strategy) X = imputer.fit_transform(data.X) # Create new variables with appropriate `compute_value`, but # drop the ones which do not have valid `imputer.statistics_` # (i.e. all NaN columns). `sklearn.preprocessing.Imputer` already # drops them from the transformed X. features = [impute.Average()(data, var, value) for var, value in zip(data.domain.attributes, imputer.statistics_) if not np.isnan(value)] assert X.shape[1] == len(features) domain = Orange.data.Domain(features, data.domain.class_vars, data.domain.metas) new_data = data.transform(domain) new_data.X = X return new_data
def __call__(self, data): from Orange.data.sql.table import SqlTable if isinstance(data, SqlTable): return Impute()(data) imputer = SimpleImputer(strategy=self.strategy) X = imputer.fit_transform(data.X) # Create new variables with appropriate `compute_value`, but # drop the ones which do not have valid `imputer.statistics_` # (i.e. all NaN columns). `sklearn.preprocessing.Imputer` already # drops them from the transformed X. features = [impute.Average()(data, var, value) for var, value in zip(data.domain.attributes, imputer.statistics_) if not np.isnan(value)] assert X.shape[1] == len(features) domain = Orange.data.Domain(features, data.domain.class_vars, data.domain.metas) new_data = data.transform(domain) new_data.X = X return new_data
def compute_image(data: Orange.data.Table, attr_x, attr_y, integrate_fn, state: TaskState): def progress_interrupt(i: float): if state.is_interruption_requested(): raise InterruptException class Result(): pass res = Result() xat = data.domain[attr_x] yat = data.domain[attr_y] ndom = Domain([xat, yat]) datam = data.transform(ndom) progress_interrupt(0) res.coorx = datam.X[:, 0] res.coory = datam.X[:, 1] res.data_points = datam.X res.lsx = lsx = values_to_linspace(res.coorx) res.lsy = lsy = values_to_linspace(res.coory) progress_interrupt(0) if lsx[-1] * lsy[-1] > IMAGE_TOO_BIG: raise ImageTooBigException((lsx[-1], lsy[-1])) # the code bellow does this, but part-wise: # d = integrate_fn(data).X[:, 0] parts = [] for slice in split_to_size(len(data), 10000): part = integrate_fn(data[slice]).X[:, 0] parts.append(part) progress_interrupt(0) d = np.concatenate(parts) res.d = d progress_interrupt(0) return res
def __call__(self, data): if data.X.shape[1] > 0: # --- compute K energies = np.sort(getx(data)) # input data can be in any order start_idx, end_idx = extra_exafs.get_idx_bounds(energies, self.edge, self.extra_from, self.extra_to) k_interp, k_points = extra_exafs.get_K_points(energies, self.edge, start_idx, end_idx) # ---------- common = _ExtractEXAFSCommon(self.edge, self.extra_from, self.extra_to, self.poly_deg, self.kweight, self.m, k_interp, data.domain) newattrs = [ContinuousVariable(name=str(var), compute_value=ExtractEXAFSFeature(i, common)) for i, var in enumerate(k_interp)] else: newattrs = [] domain = Orange.data.Domain(newattrs, data.domain.class_vars, data.domain.metas) return data.transform(domain)
def __call__(self, data, axis=0): if data.domain is not self.domain: data = data.transform(self.domain) Xt = self.proj.transform(data.X, axis) if axis == 0: def cur_variable(i): var = data.domain[i] return var.copy(compute_value=Projector(self, i)) domain = Orange.data.Domain( [cur_variable(org_idx) for org_idx in self.features_], class_vars=data.domain.class_vars) transformed_data = Orange.data.Table(domain, Xt, data.Y) elif axis == 1: Y = data.Y[self.proj.samples_] metas = data.metas[self.proj.samples_] transformed_data = Orange.data.Table(data.domain, Xt, Y, metas=metas) else: raise TypeError('CUR can select either columns ' '(axis = 0) or rows (axis = 1).') return transformed_data
def __call__(self, data): return data.transform(self.domain)
def __call__(self, data): if data.domain != self.projection.pre_domain: data = data.transform(self.projection.pre_domain) return self.projection.transform(data.X)
def __call__(self, data): if data.domain != self.lda.pre_domain: data = data.transform(self.lda.pre_domain) return self.lda.transform(data.X)
def extract_col(data, var): nd = Domain([var]) d = data.transform(nd) return d.X[:, 0]