def __init__(self, engine, learn_rate, l1_penalty, l2_penalty): BaseEngineModel.__init__(self, engine) self.learn_rate = learn_rate self.l1_penalty = l1_penalty self.l2_penalty = l2_penalty self._activator = Activators(self.engine) self._accuracy = None self._cost_history = Series() # Mistake Recorder
def clf_multilabel(seq, groupby=None): if is_seq(groupby): groupby = dict(enumerate(map(str, groupby))) if not groupby: groupby = dict() assert isinstance(groupby, dict), '`labels` must be a list of str or dict object.' max_ind = seq.argmax(axis=1).T.tolist()[0] return Series(groupby.get(int(_), _) for _ in max_ind)
def transform(self, stochastic_matrix, init_weight=None, min_error=0.001, max_iter=1000): if init_weight is None: init_weight = Series([1.0 / len(stochastic_matrix)] * len(stochastic_matrix)) init_weight = self._mat(init_weight).T if stochastic_matrix is False: weight = self._weight self._weight = weight = self._mat(stochastic_matrix) assert isinstance(max_iter, int) and max_iter >= 1 assert init_weight.shape[1] == 1, '`init_weight` should be 1-D sequence' assert init_weight.shape[0] == weight.shape[1], 'items in init_weight not fit the shape of weight matrix' for round_ in range(max_iter): X_next = self._alpha * self._dot(weight, init_weight) + (1.0 - self._alpha) / init_weight.shape[0] error = self._sum(self._abs(X_next - init_weight)) init_weight = X_next if error < min_error: ## LogInfo(' Early stopped iteration') break return Series(init_weight.T.tolist()[0])
def Performance(predictor, data, target, mode='reg'): assert mode in ('clf', 'reg'), "`mode` must be `clf` or `reg` only." assert len(data) == len( target), "the number of target data is not equal to variable data" if mode == 'clf': result = predictor.predict(data) if hasattr(result, 'shape') is False: result = SeriesSet(result) if hasattr(target, 'shape') is False: target = SeriesSet(target) assert target.shape[1] == 1, 'testify target must be a sequence' target = target[target.columns[0]] if hasattr(predictor, 'labels'): labels = predictor.labels else: labels = sorted(set(result) | set(target)) confuMat = ConfuMat(target, result, labels) LogInfo('Classification Accuracy: %.4f' % Accuracy(confuMat)) LogInfo('Classification Kappa: %.4f' % Kappa(confuMat)) if confuMat.shape[1] == 3: proba = predictor.predict_proba(data) if proba.shape[1] == 2: proba = proba[:, 0] target = Series(1 if _ == labels[0] else 0 for _ in target) LogInfo('Classification AUC: %.4f' % Auc(target, proba)) return confuMat elif mode == 'reg': target = Series(target) predict = Series(predictor.predict(data).T.tolist()[0]) mean_abs_err = Score.MAE(target, predict) mean_sqrt_err = Score.MSE(target, predict) R2 = Score.R2_score(target, predict) mean_abs_percent_erro = Score.MAPE(target, predict) LogInfo('Regression MAE: %.4f' % mean_abs_err) LogInfo('Regression MSE: %.4f' % mean_sqrt_err) LogInfo('Regression MAPE: %.4f' % mean_abs_percent_erro) LogInfo(u'Regression R²: %.4f' % R2)
def transform(self, X_mat, stochastic_matrix=None, min_error=0.0001, max_iter=1000): X_mat = self._mat(X_mat).T if stochastic_matrix is False: weight = self._weight self._weight = weight = self._mat(stochastic_matrix) assert isinstance(max_iter, int) and max_iter >= 1 assert X_mat.shape[1] == 1, 'X should be 1-D sequence' assert X_mat.shape[0] == weight.shape[ 1], 'items in the X not fit the shape of weight matrix' for round_ in range(max_iter): X_next = self._alpha * self._dot( weight, X_mat) + (1.0 - self._alpha) / X_mat.shape[0] error = self._sum(self._abs(X_next - X_mat)) X_mat = X_next if error < min_error: LogInfo(' Early stopped iteration') break return Series(X_mat.T.tolist()[0])
class BaseBPModel(BaseEngineModel): def __init__(self, engine, learn_rate, l1_penalty, l2_penalty): BaseEngineModel.__init__(self, engine) self.learn_rate = learn_rate self.l1_penalty = l1_penalty self.l2_penalty = l2_penalty self._activator = Activators(self.engine) self._accuracy = None self._cost_history = Series() # Mistake Recorder @property def accuracy(self): return self._accuracy @property def cost_history(self): return self._cost_history @property def learn_rate(self): return self._learn_rate @learn_rate.setter def learn_rate(self, new_rate): assert isinstance(new_rate, (int, float)) assert 0 < new_rate < 1, '`learnning rate` must be between 0 to 1' self._learn_rate = new_rate @property def l1_penalty(self): return self._l1 @l1_penalty.setter def l1_penalty(self, new_l1): assert isinstance(new_l1, (int, float)) assert new_l1 >= 0, '`l1 penalty` must be greater than 0' self._l1 = new_l1 @property def l2_penalty(self): return self._l2 @l2_penalty.setter def l2_penalty(self, new_l2): assert isinstance(new_l2, (int, float)) assert new_l2 >= 0, '`l2_penalty` must be greater than 0' self._l2 = new_l2 def __getstate__(self): pkl = BaseEngineModel.__getstate__(self) del pkl['_activator'] return pkl def __setstate__(self, pkl): BaseEngineModel.__setstate__(self, pkl) self._activator = Activators(self.engine) self.learn_rate = pkl['_learn_rate'] self.l1_penalty = pkl['_l1'] self.l2_penalty = pkl['_l2'] self._cost_history = pkl['_cost_history'] self._accuracy = pkl['_accuracy'] def _train(self, X, Y, epoch=500, verbose=True, early_stop=False): assert early_stop in (True, False) show_log, log_level = 1, 1 start = clock() for term in range(1, epoch + 1): # foreward propagation predict = self._forecast(X) # record the errors self._accuracy = self._calculate_accuracy(predict, Y) diff = self._calculate_backward_error(predict, Y) self._cost_history.append(self._sum(self._abs(diff)) / len(X)) # back propagation self._backward(X, diff) # check whather to early stop the iteration if early_stop and len(self._cost_history) > 10: upper_term = 0 for i in range(1, 11): if self._cost_history[-i] >= self._cost_history[-i - 1]: upper_term += 1 if upper_term >= 10: LogInfo('Early stoped') break # print training information if verbose and term % show_log == 0: spent = clock() - start finish_rate = (term / (epoch + 1.0)) * 100 last = spent / (finish_rate / 100) - spent LogInfo('Finished: %.1f' % finish_rate + '%\t' +\ 'Epoch: %d\t' % term +\ 'Rest Time: %.2fs\t' % last +\ 'Accuracy: %.2f' % self._accuracy + '%') if term > 10**(1 + log_level): log_level += 1 show_log = 10**log_level time = clock() - start LogInfo('Finish Train | Time:%.1fs\tEpoch:%d\tAccuracy:%.2f' % (time, term, self._accuracy * 100) + '%') def plot_error(self): '''use matplotlib library to draw the error curve during the training. ''' try: import matplotlib.pyplot as plt plt.title('Model Training Result') plt.plot(self._cost_history[1:]) plt.ylabel('Error %') plt.xlabel('Epoch') plt.show() except ImportError: raise ImportError( 'DaPy uses `matplotlib` to draw picture, try: pip install matplotlib.' )
def clf_binlabel(seq, labels, cutpoint=0.5): return Series(labels[0] if _ >= cutpoint else labels[1] for _ in seq)