def from_data(cls, variable, data): variable = _get_variable(data, variable) try: dist, unknowns = data._compute_distributions([variable])[0] except NotImplementedError: col = data[:, variable] dtype = col.dtype if data.has_weights(): if not "float" in dtype.name and "float" in col.dtype.name: dtype = col.dtype.name dist = np.empty((2, len(col)), dtype=dtype) dist[0, :] = col dist[1, :] = data.W else: dist = np.ones((2, len(col)), dtype=dtype) dist[0, :] = col dist.sort(axis=0) dist = np.array(_orange.valuecount(dist)) unknowns = len(col) - dist.shape[1] self = super().__new__(cls, dist.shape) self[:] = dist self.unknowns = unknowns self.variable = variable return self
def from_data(cls, data, variable): variable = _get_variable(data, variable) try: dist, unknowns = data._compute_distributions([variable])[0] self = super().__new__(cls, len(dist)) self[:] = dist self.unknowns = unknowns except NotImplementedError: self = super().__new__(cls, len(variable.values)) self[:] = np.zeros(len(variable.values)) self.unknowns = 0 if data.has_weights(): for inst, w in zip(data, data.W): val = inst[variable] if not np.isnan(val): self[int(val)] += w else: self.unknowns += w else: for inst in data: val = inst[variable] if val == val: self[int(val)] += 1 else: self.unknowns += 1 self.variable = variable return self
def from_data(cls, data, variable): variable = _get_variable(data, variable) try: dist, unknowns = data._compute_distributions([variable])[0] self = super().__new__(cls, len(dist)) self[:] = dist self.unknowns = unknowns except NotImplementedError: self = np.zeros(len(variable.values)) self.unknowns = 0 if data.has_weights(): for val, w in zip(data[:, variable], data.W): if not math.isnan(val): self[val] += w else: self.unknowns += w else: for inst in data: val = inst[variable] if val == val: self[val] += 1 else: self.unknowns += 1 self.variable = variable return self
def __init__(self, data, learners, store_data=False, store_models=False, preprocessor=None, callback=None): super().__init__( data, len(learners), store_data=store_data, store_models=store_models, preprocessor=preprocessor, callback=callback, ) domain = data.domain X = data.X.copy() Y = data._Y.copy() metas = data.metas.copy() teX, trX = X[:1], X[1:] teY, trY = Y[:1], Y[1:] te_metas, tr_metas = metas[:1], metas[1:] if data.has_weights(): W = data.W.copy() teW, trW = W[:1], W[1:] else: W = teW = trW = None self.row_indices = np.arange(len(data)) if self.store_models: self.models = [] self.actual = Y.flatten() nmethods = len(learners) n_callbacks = nmethods * len(data) for test_idx in self.row_indices: X[[0, test_idx]] = X[[test_idx, 0]] Y[[0, test_idx]] = Y[[test_idx, 0]] metas[[0, test_idx]] = metas[[test_idx, 0]] if W: W[[0, test_idx]] = W[[test_idx, 0]] test_data = Table.from_numpy(domain, teX, teY, te_metas, teW) train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW) if self.preprocessor is not None: train_data = self.preprocessor(train_data) if self.store_models: fold_models = [None] * nmethods self.models.append(fold_models) for i, learner in enumerate(learners): model = self.train_if_succ(i, learner, train_data) self.call_callback((test_idx * nmethods + i) / n_callbacks) if not model: continue if self.store_models: fold_models[i] = model if data.domain.has_discrete_class: values, probs = model(test_data, model.ValueProbs) self.predicted[i][test_idx] = values self.probabilities[i][test_idx, :] = probs elif data.domain.has_continuous_class: values = model(test_data, model.Value) self.predicted[i][test_idx] = values self.call_callback(1)
def __call__(self, data): if len(data.domain.class_vars) > 1 and not self.supports_multiclass: raise TypeError("fitter doesn't support multiple class variables") self.domain = data.domain if type(self).fit is Fitter.fit: clf = self.fit_storage(data) else: X, Y, W = data.X, data.Y, data.W if data.has_weights() else None clf = self.fit(X, Y, W) clf.domain = data.domain clf.supports_multiclass = self.supports_multiclass return clf
def __init__(self, data, learners, store_data=False, store_models=False): super().__init__(data, len(learners), store_data=store_data, store_models=store_models) domain = data.domain X = data.X.copy() Y = data._Y.copy() metas = data.metas.copy() teX, trX = X[:1], X[1:] teY, trY = Y[:1], Y[1:] te_metas, tr_metas = metas[:1], metas[1:] if data.has_weights(): W = data.W.copy() teW, trW = W[:1], W[1:] else: W = teW = trW = None self.row_indices = np.arange(len(data)) if self.store_models: self.models = [] self.actual = Y.flatten() class_var = data.domain.class_var for test_idx in self.row_indices: X[[0, test_idx]] = X[[test_idx, 0]] Y[[0, test_idx]] = Y[[test_idx, 0]] metas[[0, test_idx]] = metas[[test_idx, 0]] if W: W[[0, test_idx]] = W[[test_idx, 0]] test_data = Table.from_numpy(domain, teX, teY, te_metas, teW) train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW) if self.store_models: fold_models = [] self.models.append(fold_models) for i, learner in enumerate(learners): model = learner(train_data) if self.store_models: fold_models.append(model) if is_discrete(class_var): values, probs = model(test_data, model.ValueProbs) self.predicted[i][test_idx] = values self.probabilities[i][test_idx, :] = probs elif is_continuous(class_var): values = model(test_data, model.Value) self.predicted[i][test_idx] = values
def __call__(self, data, fitters): results = Results(data, len(fitters), store_data=self.store_data) domain = data.domain X = data.X.copy() Y = data.Y.copy() metas = data.metas.copy() teX, trX = X[:1], X[1:] teY, trY = Y[:1], Y[1:] te_metas, tr_metas = metas[:1], metas[1:] if data.has_weights(): W = data.W.copy() teW, trW = W[:1], W[1:] else: W = teW = trW = None results.row_indices = np.arange(len(data)) if self.store_models: results.models = [] results.actual = Y.flatten() class_var = data.domain.class_var for test_idx in results.row_indices: X[[0, test_idx]] = X[[test_idx, 0]] Y[[0, test_idx]] = Y[[test_idx, 0]] metas[[0, test_idx]] = metas[[test_idx, 0]] if W: W[[0, test_idx]] = W[[test_idx, 0]] test_data = Table.from_numpy(domain, teX, teY, te_metas, teW) train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW) if self.store_models: fold_models = [] results.models.append(fold_models) for i, fitter in enumerate(fitters): model = fitter(train_data) if self.store_models: fold_models.append(model) if is_discrete(class_var): values, probs = model(test_data, model.ValueProbs) results.predicted[i][test_idx] = values results.probabilities[i][test_idx, :] = probs elif is_continuous(class_var): values = model(test_data, model.Value) results.predicted[i][test_idx] = values return results
def __call__(self, data): data = self.preprocess(data) if len(data.domain.class_vars) > 1 and not self.supports_multiclass: raise TypeError("%s doesn't support multiple class variables" % self.__class__.__name__) self.domain = data.domain if type(self).fit is Learner.fit: clf = self.fit_storage(data) else: X, Y, W = data.X, data.Y, data.W if data.has_weights() else None clf = self.fit(X, Y, W) clf.domain = data.domain clf.supports_multiclass = self.supports_multiclass return clf
def __init__(self, data, learners, store_data=False, store_models=False): super().__init__(data, len(learners), store_data=store_data, store_models=store_models) domain = data.domain X = data.X.copy() Y = data._Y.copy() metas = data.metas.copy() teX, trX = X[:1], X[1:] teY, trY = Y[:1], Y[1:] te_metas, tr_metas = metas[:1], metas[1:] if data.has_weights(): W = data.W.copy() teW, trW = W[:1], W[1:] else: W = teW = trW = None self.row_indices = np.arange(len(data)) if self.store_models: self.models = [] self.actual = Y.flatten() for test_idx in self.row_indices: X[[0, test_idx]] = X[[test_idx, 0]] Y[[0, test_idx]] = Y[[test_idx, 0]] metas[[0, test_idx]] = metas[[test_idx, 0]] if W: W[[0, test_idx]] = W[[test_idx, 0]] test_data = Table.from_numpy(domain, teX, teY, te_metas, teW) train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW) if self.store_models: fold_models = [] self.models.append(fold_models) for i, learner in enumerate(learners): model = learner(train_data) if self.store_models: fold_models.append(model) if data.domain.has_discrete_class: values, probs = model(test_data, model.ValueProbs) self.predicted[i][test_idx] = values self.probabilities[i][test_idx, :] = probs elif data.domain.has_continuous_class: values = model(test_data, model.Value) self.predicted[i][test_idx] = values