def handle_five(clf): """ 类支持度和无显著性差异的结合 :param clf: :return: """ predict_true = handle(clf, "zero") if predict_true: return predict_true fit_for_class_support = handle(clf, "third") print "The result of class-support: %d samples" % len( fit_for_class_support) # fit_for_class_support = filter(lambda x: x[4] > clf.bayes.class_log_prior_[np.where(clf.bayes.classes_ == x[2])[0][0]], fit_for_class_support) # print "The result of class-support: %d samples" % len(fit_for_class_support) # My Own Idea # 存放 Test 的结果 f_res = [] origin_class_log_prob_ = clf.bayes.class_log_prior_ origin_feature_log_prob_ = clf.bayes.feature_log_prob_ origin_proba = clf.predict_max_proba(test_datas) origin_label = clf.predict(test_datas) for i0 in range(len(fit_for_class_support)): text0 = fit_for_class_support[i0][1] c_pred0 = fit_for_class_support[i0][2] clf.bayes.class_log_prior_, clf.bayes.feature_log_prob_ = clf.bayes.update( c_pred0, text0, copy=True) test_proba = clf.predict_max_proba(test_datas) label = clf.predict(test_datas) # 考虑到类别的影响 # 会出现以下的情况:某个样本属于某个类的概率很高,update后属于某个类别的概率也很高,但是 # 前后两个类别可能不一致 smooth = np.asarray([ 1 if origin_label[j] == label[j] else -1 for j in range(len(origin_label)) ]) np.multiply(test_proba, smooth, test_proba) f_test0 = pair_test(origin_proba, test_proba) if f_test0: loss0 = clf.metrics_another_zero_one_loss( origin_proba, test_proba) else: loss0 = -1 f_res.append((loss0, text0, c_pred0, i0, f_test0)) clf.bayes.class_log_prior_ = origin_class_log_prob_ clf.bayes.feature_log_prob_ = origin_feature_log_prob_ res = filter(lambda x: x[4], f_res) return [(r[0], r[1], r[2], r[3]) for r in res]
def handle_five(clf): """ 类支持度和无显著性差异的结合 :param clf: :return: """ predict_true = handle(clf, "zero") if predict_true: return predict_true fit_for_class_support = handle(clf, "third") print "The result of class-support: %d samples" % len(fit_for_class_support) # fit_for_class_support = filter(lambda x: x[4] > clf.bayes.class_log_prior_[np.where(clf.bayes.classes_ == x[2])[0][0]], fit_for_class_support) # print "The result of class-support: %d samples" % len(fit_for_class_support) # My Own Idea # 存放 Test 的结果 f_res = [] origin_class_log_prob_ = clf.bayes.class_log_prior_ origin_feature_log_prob_ = clf.bayes.feature_log_prob_ origin_proba = clf.predict_max_proba(test_datas) origin_label = clf.predict(test_datas) for i0 in range(len(fit_for_class_support)): text0 = fit_for_class_support[i0][1] c_pred0 = fit_for_class_support[i0][2] clf.bayes.class_log_prior_, clf.bayes.feature_log_prob_ = clf.bayes.update(c_pred0, text0, copy=True) test_proba = clf.predict_max_proba(test_datas) label = clf.predict(test_datas) # 考虑到类别的影响 # 会出现以下的情况:某个样本属于某个类的概率很高,update后属于某个类别的概率也很高,但是 # 前后两个类别可能不一致 smooth = np.asarray([1 if origin_label[j] == label[j] else -1 for j in range(len(origin_label))]) np.multiply(test_proba, smooth, test_proba) f_test0 = pair_test(origin_proba, test_proba) if f_test0: loss0 = clf.metrics_another_zero_one_loss(origin_proba, test_proba) else: loss0 = -1 f_res.append((loss0, text0, c_pred0, i0, f_test0)) clf.bayes.class_log_prior_ = origin_class_log_prob_ clf.bayes.feature_log_prob_ = origin_feature_log_prob_ res = filter(lambda x: x[4], f_res) return [(r[0], r[1], r[2], r[3]) for r in res]
def handle_four(clf): # My Own Idea # 存放 Test 的结果 predict_true = handle(clf, "zero") if predict_true: return predict_true f_res = [] origin_class_log_prob_ = clf.bayes.class_log_prior_ origin_feature_log_prob_ = clf.bayes.feature_log_prob_ origin_proba = clf.predict_max_proba(test_datas) origin_label = clf.predict(test_datas) for i0 in range(fit_incr_datas.shape[0]): text0 = fit_incr_datas.getrow(i0) c_pred0 = clf.predict(text0)[0] clf.bayes.class_log_prior_, clf.bayes.feature_log_prob_ = clf.bayes.update( c_pred0, text0, copy=True) test_proba = clf.predict_max_proba(test_datas) label = clf.predict(test_datas) # 考虑到类别的影响 # 会出现以下的情况:某个样本属于某个类的概率很高,update后属于某个类别的概率也很高,但是 # 前后两个类别可能不一致 smooth = np.asarray([ 1 if origin_label[j] == label[j] else -1 for j in range(len(origin_label)) ]) np.multiply(test_proba, smooth, test_proba) f_test0 = pair_test(origin_proba, test_proba) if f_test0: loss0 = clf.metrics_another_zero_one_loss( origin_proba, test_proba) else: loss0 = -1 f_res.append((loss0, text0, c_pred0, i0, f_test0)) clf.bayes.class_log_prior_ = origin_class_log_prob_ clf.bayes.feature_log_prob_ = origin_feature_log_prob_ res = filter(lambda x: x[4], f_res) return [(r[0], r[1], r[2], r[3]) for r in res]
def handle_four(clf): # My Own Idea # 存放 Test 的结果 predict_true = handle(clf, "zero") if predict_true: return predict_true f_res = [] origin_class_log_prob_ = clf.bayes.class_log_prior_ origin_feature_log_prob_ = clf.bayes.feature_log_prob_ origin_proba = clf.predict_max_proba(test_datas) origin_label = clf.predict(test_datas) for i0 in range(fit_incr_datas.shape[0]): text0 = fit_incr_datas.getrow(i0) c_pred0 = clf.predict(text0)[0] clf.bayes.class_log_prior_, clf.bayes.feature_log_prob_ = clf.bayes.update(c_pred0, text0, copy=True) test_proba = clf.predict_max_proba(test_datas) label = clf.predict(test_datas) # 考虑到类别的影响 # 会出现以下的情况:某个样本属于某个类的概率很高,update后属于某个类别的概率也很高,但是 # 前后两个类别可能不一致 smooth = np.asarray([1 if origin_label[j] == label[j] else -1 for j in range(len(origin_label))]) np.multiply(test_proba, smooth, test_proba) f_test0 = pair_test(origin_proba, test_proba) if f_test0: loss0 = clf.metrics_another_zero_one_loss(origin_proba, test_proba) else: loss0 = -1 f_res.append((loss0, text0, c_pred0, i0, f_test0)) clf.bayes.class_log_prior_ = origin_class_log_prob_ clf.bayes.feature_log_prob_ = origin_feature_log_prob_ res = filter(lambda x: x[4], f_res) return [(r[0], r[1], r[2], r[3]) for r in res]