Ejemplo n.º 1
0
 def calculate(self, Y_hat, Y, average='micro', weights=None):
     Y_hat, Y = check_XY(X=Y_hat, Y=Y)
     Y_hat, Y = np.argmax(Y_hat, axis=1), np.argmax(Y, axis=1)
     return metrics.recall_score(Y,
                                 Y_hat,
                                 average=average,
                                 sample_weight=weights)
Ejemplo n.º 2
0
    def predict_proba(self, X, *args, **kwargs):
        """
		Predict class probabilities for each sample in `X`.

		Parameters
		----------
		X : array-like, shape=(n_samples, n_features)
			Data to predict.

		Returns
		-------
		proba : array-like, shape=(n_samples, n_classes)
			Class probabilities of input data.
			The order of classes is in sorted ascending order.
		"""
        if not self._is_fitted():
            raise RuntimeError("Model is not fitted")
        X = check_XY(X=X)
        if X.shape[1] != self.n_features_:
            raise ValueError(
                "Model takes %d features as input" % self.n_features_,
                "but data has %d features" % X.shape[1])
        if self.verbose > 0 : print("Predicting %d samples." % \
              X.shape[0])
        return self._forward(X)
Ejemplo n.º 3
0
 def calculate(self, Y_hat, Y, average='macro', weights=None):
     Y_hat, Y = check_XY(X=Y_hat, Y=Y)
     Y_hat, Y = np.argmax(Y_hat, axis=1), np.argmax(Y, axis=1)
     return metrics.roc_auc_score(Y,
                                  Y_hat,
                                  average=average,
                                  multi_class=self.multi_class,
                                  sample_weight=weights)
Ejemplo n.º 4
0
    def fit(self, X, Y, weights=None):
        """
		Train the model on the given data and labels.

		Parameters
		----------
		X : array-like, shape=(n_samples, n_features)
			Training data.

		Y : array-like, shape=(n_samples,)
			Target labels as integers.

		weights : array-like, shape=(n_samples,), default=None
			Sample weights. If None, then samples are equally weighted.

		Returns
		-------
		self : Base
			Fitted estimator.
		"""
        X, Y = check_XY(X=X, Y=Y)
        if self.n_classes_ is None: self.n_classes_ = len(set(decode(Y)))
        if self.n_features_ is None: self.n_features_ = X.shape[1]
        try:
            Y = one_hot(Y, cols=self.n_classes_)
        except:
            raise
        bootstrap = calculate_bootstrap(self.bootstrap_size, len(X))
        batch_size = calculate_batch(self.batch_size, len(Y))
        ds = BatchDataset(
            X, Y, weights,
            seed=self.random_state).shuffle().repeat().batch(bootstrap)
        if not self.warm_start or not self._is_fitted():
            if self.verbose > 0: print("Initializing model")
            self._initialize()
        if self.verbose > 0:            print("Training model for %d epochs" % self.max_iter,
                  "on %d samples in batches of %d." % \
(X.shape[0], batch_size))
        if self.verbose > 0:
            print("Training model with %d estimators." % self.n)
        if self.verbose == 1: estimators = trange(len(self.estimators_))
        else: estimators = range(len(self.estimators_))
        for e in estimators:
            if self.verbose == 1:
                estimators.set_description("Estimator %d" % (e + 1))
            elif self.verbose > 1:
                print("Fitting estimator %d" % e + 1)
            X_, Y_, weights_ = ds.next()
            ds.i = 0
            self.estimators_[e].fit(X_, Y_, weights=weights_)
        self.fitted_ = True
        if self.verbose > 0: print("Training complete.")
        return self
Ejemplo n.º 5
0
	def fit(self, X, Y, weights=None):
		"""
		Train the model on the given data and labels.

		Parameters
		----------
		X : array-like, shape=(n_samples, n_features)
			Training data.

		Y : array-like, shape=(n_samples,)
			Target labels as integers.

		weights : array-like, shape=(n_samples,), default=None
			Sample weights for ensemble.
			If None, then samples are equally weighted.

		Returns
		-------
		self : Base
			Fitted estimator.
		"""
		X, Y = check_XY(X=X, Y=Y)
		if not self.ensemble._is_fitted():
			raise ValueError("Ensemble must already be trained")
		if X.shape[1] != self.ensemble.n_features_:
			raise ValueError("Ensemble accepts data with %d features" % \
								self.ensemble.n_features_,
								"but encountered data with %d features." % \
								X.shape[1])
		if self.verbose > 0 : print("Initializing and training model")
		if self.n_classes_ is None : self.n_classes_ = self.ensemble.n_classes_
		if self.n_features_ is None : self.n_features_ = self.ensemble.n_features_
		self.data_, self.targets_ = X, Y.reshape(-1)
		if self.verbose > 2 : print("Fitting Nearest Neighbors")
		self.knn.fit(X)
		if self.verbose > 2 : print("Scoring ensemble")
		for e in self.ensemble.estimators_:
			p = np.where(e.predict(X) == Y.reshape(-1), 1, 0)
			self.scores_ = np.concatenate((self.scores_, p))
		self.scores_ = self.scores_.reshape(-1, len(X)).T
		self.fitted_ = True
		if self.verbose > 0 : print("Training complete.")
		return self
Ejemplo n.º 6
0
    def feature_importance(self, X, Y):
        """
		Calculate the feature importances by permuting
		each feature separately and measuring the
		increase in loss.

		Parameters
		----------
		X : array-like, shape=(n_samples, n_features)
			Training data.

		Y : array-like, shape=(n_samples,)
			Target labels as integers.

		Returns
		-------
		importances : list, shape=(n_features,)
			List of feature importances by error increase,
			in order of features as they appear in the data.
			The larger the error increase, the more
			important the feature.
		"""
        if not self._is_fitted():
            raise RuntimeError("Model is not fitted")
        X, Y = check_XY(X=X, Y=Y)
        try:
            Y = one_hot(Y, cols=self.n_classes_)
        except:
            raise
        if X.shape[1] != self.n_features_:
            raise ValueError(
                "Model takes %d features as input" % self.n_features_,
                "but data has %d features" % X.shape[1])
        if self.verbose > 0: print("Calculating feature importances")
        loss = np.exp(self.loss.loss(self.predict_proba(X), Y))
        importances = []
        for f in range(X.shape[1]):
            X_ = copy(X)
            self.random_state.shuffle(X_[:, f])
            loss_ = self.loss.loss(self.predict_proba(X_), Y)
            importances.append(np.exp(loss_) / loss)
        return importances
Ejemplo n.º 7
0
	def predict_proba(self, X):
		"""
		Predict class probabilities for each sample in `X`.

		Parameters
		----------
		X : array-like, shape=(n_samples, n_features)
			Data to predict.

		Returns
		-------
		proba : array-like, shape=(n_samples, n_classes)
			Class probabilities of input data.
			The order of classes is in sorted ascending order.
		"""
		if not self._is_fitted():
			raise RuntimeError("Model is not fitted")
		X = check_XY(X=X)
		if X.shape[1] != self.n_features_:
			raise ValueError("Model takes %d features as input" % self.n_features_,
								"but data has %d features" % X.shape[1])
		if self.verbose > 0 : print("Predicting %d samples." % \
								X.shape[0])
		d, i = self.knn.kneighbors(X)
		competence = self._calculate_competence(i) #NE
		i_estimators = self._select(competence) # Ne
		estimators = self.ensemble.estimators_
		n_estimators = i_estimators.shape[1]
		pred = np.zeros((len(X), self.n_classes_))
		for n in range(len(X)):
			for e in range(n_estimators):
				estimator = estimators[i_estimators[n, e]]
				p = estimator.predict_proba(X[n].reshape(1,-1)) # NC
				if self.rank : p *= competence[n, i_estimators[n, e]]
				pred[n] += p.reshape(-1)
		return pred / n_estimators
Ejemplo n.º 8
0
 def calculate(self, Y_hat, Y, weights=None):
     Y_hat, Y = check_XY(X=Y_hat, Y=Y)
     Y_hat, Y = np.argmax(Y_hat, axis=1), np.argmax(Y, axis=1)
     return metrics.accuracy_score(Y, Y_hat, sample_weight=weights)
Ejemplo n.º 9
0
    def fit(self, X, Y, *args, weights=None, **kwargs):
        """
		Train the model on the given data and labels.

		Parameters
		----------
		X : array-like, shape=(n_samples, n_features)
			Training data.

		Y : array-like, shape=(n_samples,)
			Target labels as integers.

		weights : array-like, shape=(n_samples,), default=None
			Sample weights. If None, then samples are equally weighted.

		Returns
		-------
		self : Base
			Fitted estimator.
		"""
        X, Y = check_XY(X=X, Y=Y)
        if self.n_classes_ is None: self.n_classes_ = len(set(decode(Y)))
        if self.n_features_ is None: self.n_features_ = X.shape[1]
        try:
            Y = one_hot(Y, cols=self.n_classes_)
        except:
            raise
        batch_size = calculate_batch(self.batch_size, len(Y))
        ds = BatchDataset(
            X, Y, weights,
            seed=self.random_state).shuffle().repeat().batch(batch_size)
        if not self.warm_start or not self._is_fitted():
            if self.verbose > 0: print("Initializing model")
            self._initialize()
        if self.verbose > 0:            print("Training model for %d epochs" % self.max_iter,
                  "on %d samples in batches of %d." % \
(X.shape[0], batch_size))
        loss_prev, early_stop, e = np.inf, False, 0
        if self.verbose == 1: epochs = trange(self.max_iter)
        else: epochs = range(self.max_iter)
        for e in epochs:
            batches = range(ds.n_batches)
            if self.verbose == 2: batches = trange(ds.n_batches)
            elif self.verbose > 2: print("Epoch %d" % e)
            for b in batches:
                X_batch, Y_batch, weights = ds.next()
                if len(X_batch) == 0:
                    if self.verbose > 0:
                        print("No more data to train. Ending training.")
                    early_stop = True
                    break
                Y_hat = self._forward(X_batch)
                loss = np.mean(np.sum(self.loss.loss(Y_hat, Y_batch), axis=1))
                metric = self.score(Y_batch, Y_hat=Y_hat, weights=weights)
                msg = 'loss: %.4f' % loss + ', ' + self.metric.name + ': %.4f' % metric
                if self.verbose == 1: epochs.set_description(msg)
                elif self.verbose == 2: batches.set_description(msg)
                elif self.verbose > 2:
                    print("Epoch %d, Batch %d completed." % (e + 1, b + 1),
                          msg)
                if self.tol is not None and np.abs(loss -
                                                   loss_prev) < self.tol:
                    early_stop = True
                    break
                self._backward(Y_hat, Y_batch, weights=weights)
                loss_prev = loss
            if early_stop: break
        self.fitted_ = True
        if self.verbose > 0: print("Training complete.")
        return self