예제 #1
0
    def from_data(cls, variable, data):
        variable = _get_variable(data, variable)
        try:
            dist, unknowns = data._compute_distributions([variable])[0]
        except NotImplementedError:
            col = data[:, variable]
            dtype = col.dtype
            if data.has_weights():
                if not "float" in dtype.name and "float" in col.dtype.name:
                    dtype = col.dtype.name
                dist = np.empty((2, len(col)), dtype=dtype)
                dist[0, :] = col
                dist[1, :] = data.W
            else:
                dist = np.ones((2, len(col)), dtype=dtype)
                dist[0, :] = col
            dist.sort(axis=0)
            dist = np.array(_orange.valuecount(dist))
            unknowns = len(col) - dist.shape[1]

        self = super().__new__(cls, dist.shape)
        self[:] = dist
        self.unknowns = unknowns
        self.variable = variable
        return self
예제 #2
0
    def from_data(cls, variable, data):
        variable = _get_variable(data, variable)
        try:
            dist, unknowns = data._compute_distributions([variable])[0]
        except NotImplementedError:
            col = data[:, variable]
            dtype = col.dtype
            if data.has_weights():
                if not "float" in dtype.name and "float" in col.dtype.name:
                    dtype = col.dtype.name
                dist = np.empty((2, len(col)), dtype=dtype)
                dist[0, :] = col
                dist[1, :] = data.W
            else:
                dist = np.ones((2, len(col)), dtype=dtype)
                dist[0, :] = col
            dist.sort(axis=0)
            dist = np.array(_orange.valuecount(dist))
            unknowns = len(col) - dist.shape[1]

        self = super().__new__(cls, dist.shape)
        self[:] = dist
        self.unknowns = unknowns
        self.variable = variable
        return self
예제 #3
0
 def from_data(cls, data, variable):
     variable = _get_variable(data, variable)
     try:
         dist, unknowns = data._compute_distributions([variable])[0]
         self = super().__new__(cls, len(dist))
         self[:] = dist
         self.unknowns = unknowns
     except NotImplementedError:
         self = super().__new__(cls, len(variable.values))
         self[:] = np.zeros(len(variable.values))
         self.unknowns = 0
         if data.has_weights():
             for inst, w in zip(data, data.W):
                 val = inst[variable]
                 if not np.isnan(val):
                     self[int(val)] += w
                 else:
                     self.unknowns += w
         else:
             for inst in data:
                 val = inst[variable]
                 if val == val:
                     self[int(val)] += 1
                 else:
                     self.unknowns += 1
     self.variable = variable
     return self
예제 #4
0
 def from_data(cls, data, variable):
     variable = _get_variable(data, variable)
     try:
         dist, unknowns = data._compute_distributions([variable])[0]
         self = super().__new__(cls, len(dist))
         self[:] = dist
         self.unknowns = unknowns
     except NotImplementedError:
         self = np.zeros(len(variable.values))
         self.unknowns = 0
         if data.has_weights():
             for val, w in zip(data[:, variable], data.W):
                 if not math.isnan(val):
                     self[val] += w
                 else:
                     self.unknowns += w
         else:
             for inst in data:
                 val = inst[variable]
                 if val == val:
                     self[val] += 1
                 else:
                     self.unknowns += 1
     self.variable = variable
     return self
예제 #5
0
파일: testing.py 프로젝트: thocevar/orange3
    def __init__(self, data, learners, store_data=False, store_models=False, preprocessor=None, callback=None):
        super().__init__(
            data,
            len(learners),
            store_data=store_data,
            store_models=store_models,
            preprocessor=preprocessor,
            callback=callback,
        )
        domain = data.domain
        X = data.X.copy()
        Y = data._Y.copy()
        metas = data.metas.copy()

        teX, trX = X[:1], X[1:]
        teY, trY = Y[:1], Y[1:]
        te_metas, tr_metas = metas[:1], metas[1:]
        if data.has_weights():
            W = data.W.copy()
            teW, trW = W[:1], W[1:]
        else:
            W = teW = trW = None

        self.row_indices = np.arange(len(data))
        if self.store_models:
            self.models = []
        self.actual = Y.flatten()
        nmethods = len(learners)
        n_callbacks = nmethods * len(data)
        for test_idx in self.row_indices:
            X[[0, test_idx]] = X[[test_idx, 0]]
            Y[[0, test_idx]] = Y[[test_idx, 0]]
            metas[[0, test_idx]] = metas[[test_idx, 0]]
            if W:
                W[[0, test_idx]] = W[[test_idx, 0]]
            test_data = Table.from_numpy(domain, teX, teY, te_metas, teW)
            train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW)
            if self.preprocessor is not None:
                train_data = self.preprocessor(train_data)
            if self.store_models:
                fold_models = [None] * nmethods
                self.models.append(fold_models)
            for i, learner in enumerate(learners):
                model = self.train_if_succ(i, learner, train_data)
                self.call_callback((test_idx * nmethods + i) / n_callbacks)
                if not model:
                    continue
                if self.store_models:
                    fold_models[i] = model
                if data.domain.has_discrete_class:
                    values, probs = model(test_data, model.ValueProbs)
                    self.predicted[i][test_idx] = values
                    self.probabilities[i][test_idx, :] = probs
                elif data.domain.has_continuous_class:
                    values = model(test_data, model.Value)
                    self.predicted[i][test_idx] = values
        self.call_callback(1)
예제 #6
0
 def __call__(self, data):
     if len(data.domain.class_vars) > 1 and not self.supports_multiclass:
         raise TypeError("fitter doesn't support multiple class variables")
     self.domain = data.domain
     if type(self).fit is Fitter.fit:
         clf = self.fit_storage(data)
     else:
         X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
         clf = self.fit(X, Y, W)
     clf.domain = data.domain
     clf.supports_multiclass = self.supports_multiclass
     return clf
예제 #7
0
파일: __init__.py 프로젝트: r0k3/orange3
 def __call__(self, data):
     if len(data.domain.class_vars) > 1 and not self.supports_multiclass:
         raise TypeError("fitter doesn't support multiple class variables")
     self.domain = data.domain
     if type(self).fit is Fitter.fit:
         clf = self.fit_storage(data)
     else:
         X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
         clf = self.fit(X, Y, W)
     clf.domain = data.domain
     clf.supports_multiclass = self.supports_multiclass
     return clf
예제 #8
0
파일: testing.py 프로젝트: chkothe/orange3
    def __init__(self, data, learners, store_data=False, store_models=False):
        super().__init__(data,
                         len(learners),
                         store_data=store_data,
                         store_models=store_models)

        domain = data.domain
        X = data.X.copy()
        Y = data._Y.copy()
        metas = data.metas.copy()

        teX, trX = X[:1], X[1:]
        teY, trY = Y[:1], Y[1:]
        te_metas, tr_metas = metas[:1], metas[1:]
        if data.has_weights():
            W = data.W.copy()
            teW, trW = W[:1], W[1:]
        else:
            W = teW = trW = None

        self.row_indices = np.arange(len(data))
        if self.store_models:
            self.models = []
        self.actual = Y.flatten()
        class_var = data.domain.class_var
        for test_idx in self.row_indices:
            X[[0, test_idx]] = X[[test_idx, 0]]
            Y[[0, test_idx]] = Y[[test_idx, 0]]
            metas[[0, test_idx]] = metas[[test_idx, 0]]
            if W:
                W[[0, test_idx]] = W[[test_idx, 0]]
            test_data = Table.from_numpy(domain, teX, teY, te_metas, teW)
            train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW)
            if self.store_models:
                fold_models = []
                self.models.append(fold_models)
            for i, learner in enumerate(learners):
                model = learner(train_data)
                if self.store_models:
                    fold_models.append(model)

                if is_discrete(class_var):
                    values, probs = model(test_data, model.ValueProbs)
                    self.predicted[i][test_idx] = values
                    self.probabilities[i][test_idx, :] = probs
                elif is_continuous(class_var):
                    values = model(test_data, model.Value)
                    self.predicted[i][test_idx] = values
예제 #9
0
파일: testing.py 프로젝트: CHANAYA/orange3
    def __call__(self, data, fitters):
        results = Results(data, len(fitters), store_data=self.store_data)

        domain = data.domain
        X = data.X.copy()
        Y = data.Y.copy()
        metas = data.metas.copy()

        teX, trX = X[:1], X[1:]
        teY, trY = Y[:1], Y[1:]
        te_metas, tr_metas = metas[:1], metas[1:]
        if data.has_weights():
            W = data.W.copy()
            teW, trW = W[:1], W[1:]
        else:
            W = teW = trW = None

        results.row_indices = np.arange(len(data))
        if self.store_models:
            results.models = []
        results.actual = Y.flatten()
        class_var = data.domain.class_var
        for test_idx in results.row_indices:
            X[[0, test_idx]] = X[[test_idx, 0]]
            Y[[0, test_idx]] = Y[[test_idx, 0]]
            metas[[0, test_idx]] = metas[[test_idx, 0]]
            if W:
                W[[0, test_idx]] = W[[test_idx, 0]]
            test_data = Table.from_numpy(domain, teX, teY, te_metas, teW)
            train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW)
            if self.store_models:
                fold_models = []
                results.models.append(fold_models)
            for i, fitter in enumerate(fitters):
                model = fitter(train_data)
                if self.store_models:
                    fold_models.append(model)

                if is_discrete(class_var):
                    values, probs = model(test_data, model.ValueProbs)
                    results.predicted[i][test_idx] = values
                    results.probabilities[i][test_idx, :] = probs
                elif is_continuous(class_var):
                    values = model(test_data, model.Value)
                    results.predicted[i][test_idx] = values

        return results
예제 #10
0
파일: base.py 프로젝트: jzbontar/orange3
    def __call__(self, data):
        data = self.preprocess(data)

        if len(data.domain.class_vars) > 1 and not self.supports_multiclass:
            raise TypeError("%s doesn't support multiple class variables" %
                            self.__class__.__name__)

        self.domain = data.domain

        if type(self).fit is Learner.fit:
            clf = self.fit_storage(data)
        else:
            X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
            clf = self.fit(X, Y, W)
        clf.domain = data.domain
        clf.supports_multiclass = self.supports_multiclass
        return clf
예제 #11
0
    def __call__(self, data):
        data = self.preprocess(data)

        if len(data.domain.class_vars) > 1 and not self.supports_multiclass:
            raise TypeError("%s doesn't support multiple class variables" %
                            self.__class__.__name__)

        self.domain = data.domain

        if type(self).fit is Learner.fit:
            clf = self.fit_storage(data)
        else:
            X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
            clf = self.fit(X, Y, W)
        clf.domain = data.domain
        clf.supports_multiclass = self.supports_multiclass
        return clf
예제 #12
0
파일: testing.py 프로젝트: VesnaT/orange3
    def __init__(self, data, learners, store_data=False, store_models=False):
        super().__init__(data, len(learners), store_data=store_data,
                         store_models=store_models)

        domain = data.domain
        X = data.X.copy()
        Y = data._Y.copy()
        metas = data.metas.copy()

        teX, trX = X[:1], X[1:]
        teY, trY = Y[:1], Y[1:]
        te_metas, tr_metas = metas[:1], metas[1:]
        if data.has_weights():
            W = data.W.copy()
            teW, trW = W[:1], W[1:]
        else:
            W = teW = trW = None

        self.row_indices = np.arange(len(data))
        if self.store_models:
            self.models = []
        self.actual = Y.flatten()
        for test_idx in self.row_indices:
            X[[0, test_idx]] = X[[test_idx, 0]]
            Y[[0, test_idx]] = Y[[test_idx, 0]]
            metas[[0, test_idx]] = metas[[test_idx, 0]]
            if W:
                W[[0, test_idx]] = W[[test_idx, 0]]
            test_data = Table.from_numpy(domain, teX, teY, te_metas, teW)
            train_data = Table.from_numpy(domain, trX, trY, tr_metas, trW)
            if self.store_models:
                fold_models = []
                self.models.append(fold_models)
            for i, learner in enumerate(learners):
                model = learner(train_data)
                if self.store_models:
                    fold_models.append(model)

                if data.domain.has_discrete_class:
                    values, probs = model(test_data, model.ValueProbs)
                    self.predicted[i][test_idx] = values
                    self.probabilities[i][test_idx, :] = probs
                elif data.domain.has_continuous_class:
                    values = model(test_data, model.Value)
                    self.predicted[i][test_idx] = values