Esempio n. 1
0
 def __init__(self, engine, learn_rate, l1_penalty, l2_penalty):
     BaseEngineModel.__init__(self, engine)
     self.learn_rate = learn_rate
     self.l1_penalty = l1_penalty
     self.l2_penalty = l2_penalty
     self._activator = Activators(self.engine)
     self._accuracy = None
     self._cost_history = Series()  # Mistake Recorder
Esempio n. 2
0
def clf_multilabel(seq, groupby=None):
    if is_seq(groupby):
        groupby = dict(enumerate(map(str, groupby)))
    if not groupby:
        groupby = dict()
    assert isinstance(groupby,
                      dict), '`labels` must be a list of str or dict object.'
    max_ind = seq.argmax(axis=1).T.tolist()[0]
    return Series(groupby.get(int(_), _) for _ in max_ind)
Esempio n. 3
0
    def transform(self, stochastic_matrix, init_weight=None, min_error=0.001, max_iter=1000):
        if init_weight is None:
            init_weight = Series([1.0 / len(stochastic_matrix)] * len(stochastic_matrix))
        init_weight = self._mat(init_weight).T
        if stochastic_matrix is False:
            weight = self._weight
        self._weight = weight = self._mat(stochastic_matrix)
        assert isinstance(max_iter, int) and max_iter >= 1
        assert init_weight.shape[1] == 1, '`init_weight` should be 1-D sequence'
        assert init_weight.shape[0] == weight.shape[1], 'items in init_weight not fit the shape of weight matrix'
        
        for round_ in range(max_iter):
            X_next = self._alpha * self._dot(weight, init_weight) + (1.0 - self._alpha) / init_weight.shape[0]
            error = self._sum(self._abs(X_next - init_weight))
            init_weight = X_next
            if error < min_error:
##                LogInfo('   Early stopped iteration')
                break
        return Series(init_weight.T.tolist()[0])
Esempio n. 4
0
def Performance(predictor, data, target, mode='reg'):
    assert mode in ('clf', 'reg'), "`mode` must be `clf` or `reg` only."
    assert len(data) == len(
        target), "the number of target data is not equal to variable data"

    if mode == 'clf':
        result = predictor.predict(data)
        if hasattr(result, 'shape') is False:
            result = SeriesSet(result)
        if hasattr(target, 'shape') is False:
            target = SeriesSet(target)
            assert target.shape[1] == 1, 'testify target must be a sequence'
            target = target[target.columns[0]]
        if hasattr(predictor, 'labels'):
            labels = predictor.labels
        else:
            labels = sorted(set(result) | set(target))

        confuMat = ConfuMat(target, result, labels)
        LogInfo('Classification Accuracy: %.4f' % Accuracy(confuMat))
        LogInfo('Classification Kappa: %.4f' % Kappa(confuMat))
        if confuMat.shape[1] == 3:
            proba = predictor.predict_proba(data)
            if proba.shape[1] == 2:
                proba = proba[:, 0]
            target = Series(1 if _ == labels[0] else 0 for _ in target)
            LogInfo('Classification AUC: %.4f' % Auc(target, proba))
        return confuMat

    elif mode == 'reg':
        target = Series(target)
        predict = Series(predictor.predict(data).T.tolist()[0])
        mean_abs_err = Score.MAE(target, predict)
        mean_sqrt_err = Score.MSE(target, predict)
        R2 = Score.R2_score(target, predict)
        mean_abs_percent_erro = Score.MAPE(target, predict)
        LogInfo('Regression MAE: %.4f' % mean_abs_err)
        LogInfo('Regression MSE: %.4f' % mean_sqrt_err)
        LogInfo('Regression MAPE: %.4f' % mean_abs_percent_erro)
        LogInfo(u'Regression R²: %.4f' % R2)
Esempio n. 5
0
    def transform(self,
                  X_mat,
                  stochastic_matrix=None,
                  min_error=0.0001,
                  max_iter=1000):
        X_mat = self._mat(X_mat).T
        if stochastic_matrix is False:
            weight = self._weight
        self._weight = weight = self._mat(stochastic_matrix)
        assert isinstance(max_iter, int) and max_iter >= 1
        assert X_mat.shape[1] == 1, 'X should be 1-D sequence'
        assert X_mat.shape[0] == weight.shape[
            1], 'items in the X not fit the shape of weight matrix'

        for round_ in range(max_iter):
            X_next = self._alpha * self._dot(
                weight, X_mat) + (1.0 - self._alpha) / X_mat.shape[0]
            error = self._sum(self._abs(X_next - X_mat))
            X_mat = X_next
            if error < min_error:
                LogInfo('   Early stopped iteration')
                break
        return Series(X_mat.T.tolist()[0])
Esempio n. 6
0
class BaseBPModel(BaseEngineModel):
    def __init__(self, engine, learn_rate, l1_penalty, l2_penalty):
        BaseEngineModel.__init__(self, engine)
        self.learn_rate = learn_rate
        self.l1_penalty = l1_penalty
        self.l2_penalty = l2_penalty
        self._activator = Activators(self.engine)
        self._accuracy = None
        self._cost_history = Series()  # Mistake Recorder

    @property
    def accuracy(self):
        return self._accuracy

    @property
    def cost_history(self):
        return self._cost_history

    @property
    def learn_rate(self):
        return self._learn_rate

    @learn_rate.setter
    def learn_rate(self, new_rate):
        assert isinstance(new_rate, (int, float))
        assert 0 < new_rate < 1, '`learnning rate` must be between 0 to 1'
        self._learn_rate = new_rate

    @property
    def l1_penalty(self):
        return self._l1

    @l1_penalty.setter
    def l1_penalty(self, new_l1):
        assert isinstance(new_l1, (int, float))
        assert new_l1 >= 0, '`l1 penalty` must be greater than 0'
        self._l1 = new_l1

    @property
    def l2_penalty(self):
        return self._l2

    @l2_penalty.setter
    def l2_penalty(self, new_l2):
        assert isinstance(new_l2, (int, float))
        assert new_l2 >= 0, '`l2_penalty` must be greater than 0'
        self._l2 = new_l2

    def __getstate__(self):
        pkl = BaseEngineModel.__getstate__(self)
        del pkl['_activator']
        return pkl

    def __setstate__(self, pkl):
        BaseEngineModel.__setstate__(self, pkl)
        self._activator = Activators(self.engine)
        self.learn_rate = pkl['_learn_rate']
        self.l1_penalty = pkl['_l1']
        self.l2_penalty = pkl['_l2']
        self._cost_history = pkl['_cost_history']
        self._accuracy = pkl['_accuracy']

    def _train(self, X, Y, epoch=500, verbose=True, early_stop=False):
        assert early_stop in (True, False)
        show_log, log_level = 1, 1

        start = clock()
        for term in range(1, epoch + 1):
            # foreward propagation
            predict = self._forecast(X)
            # record the errors
            self._accuracy = self._calculate_accuracy(predict, Y)
            diff = self._calculate_backward_error(predict, Y)
            self._cost_history.append(self._sum(self._abs(diff)) / len(X))
            # back propagation
            self._backward(X, diff)

            # check whather to early stop the iteration
            if early_stop and len(self._cost_history) > 10:
                upper_term = 0
                for i in range(1, 11):
                    if self._cost_history[-i] >= self._cost_history[-i - 1]:
                        upper_term += 1
                if upper_term >= 10:
                    LogInfo('Early stoped')
                    break

            # print training information
            if verbose and term % show_log == 0:
                spent = clock() - start
                finish_rate = (term / (epoch + 1.0)) * 100
                last = spent / (finish_rate / 100) - spent
                LogInfo('Finished: %.1f' % finish_rate + '%\t' +\
                        'Epoch: %d\t' % term +\
                        'Rest Time: %.2fs\t' % last +\
                        'Accuracy: %.2f' % self._accuracy + '%')
                if term > 10**(1 + log_level):
                    log_level += 1
                    show_log = 10**log_level

        time = clock() - start
        LogInfo('Finish Train | Time:%.1fs\tEpoch:%d\tAccuracy:%.2f' %
                (time, term, self._accuracy * 100) + '%')

    def plot_error(self):
        '''use matplotlib library to draw the error curve during the training.
        '''
        try:
            import matplotlib.pyplot as plt
            plt.title('Model Training Result')
            plt.plot(self._cost_history[1:])
            plt.ylabel('Error %')
            plt.xlabel('Epoch')
            plt.show()
        except ImportError:
            raise ImportError(
                'DaPy uses `matplotlib` to draw picture, try: pip install matplotlib.'
            )
Esempio n. 7
0
def clf_binlabel(seq, labels, cutpoint=0.5):
    return Series(labels[0] if _ >= cutpoint else labels[1] for _ in seq)