def __classifict(self, feature, sentences, incr=False): if isinstance(sentences, basestring): sentences = [sentences] # 获得主客观分类器 feature.subjective = False objective_clf = get_objective_classification(feature) # 主客观部分 test_datas_objective, c_true_objective, danger_index_objective = feature.get_key_words( sentences) test_objective = test_datas_objective if not sp.issparse(test_datas_objective): test_objective = feature.cal_weight_improve( test_datas_objective, c_true_objective) c_pred_objective = objective_clf.predict(test_objective) # 获得情绪分类器 feature.subjective = True emotion_clf = get_emotion_classification(feature, incr=incr) # 情绪部分 test_datas, c_true, danger_index = feature.get_key_words(sentences) test = test_datas if not sp.issparse(test_datas): test = feature.cal_weight_improve(test_datas, c_true) c_pred = [] for i in range(len(sentences)): if i not in danger_index_objective and i not in danger_index: before_i_in_danger_obj = np.sum( np.asarray(danger_index_objective) < i) before_i_in_danger_ = np.sum(np.asarray(danger_index) < i) c = emotion_clf.predict(test[i - before_i_in_danger_])[0] if c_pred_objective[i - before_i_in_danger_obj] == "Y" \ else c_pred_objective[i - before_i_in_danger_obj] c_pred.append(c) else: c_pred.append("none(insufficient key_words)") return c_pred
def __classifict(self, feature, sentences, incr=False): if isinstance(sentences, basestring): sentences = [sentences] # 获得主客观分类器 feature.subjective = False objective_clf = get_objective_classification(feature) # 主客观部分 test_datas_objective, c_true_objective, danger_index_objective = feature.get_key_words(sentences) test_objective = test_datas_objective if not sp.issparse(test_datas_objective): test_objective = feature.cal_weight_improve(test_datas_objective, c_true_objective) c_pred_objective = objective_clf.predict(test_objective) # 获得情绪分类器 feature.subjective = True emotion_clf = get_emotion_classification(feature, incr=incr) # 情绪部分 test_datas, c_true, danger_index = feature.get_key_words(sentences) test = test_datas if not sp.issparse(test_datas): test = feature.cal_weight_improve(test_datas, c_true) c_pred = [] for i in range(len(sentences)): if i not in danger_index_objective and i not in danger_index: before_i_in_danger_obj = np.sum(np.asarray(danger_index_objective) < i) before_i_in_danger_ = np.sum(np.asarray(danger_index) < i) c = emotion_clf.predict(test[i - before_i_in_danger_])[0] if c_pred_objective[i - before_i_in_danger_obj] == "Y" \ else c_pred_objective[i - before_i_in_danger_obj] c_pred.append(c) else: c_pred.append("none(insufficient key_words)") return c_pred
def classifict(feature, sentences, incr=False, out=False): if isinstance(sentences, basestring): sentences = [sentences] # 获得主客观分类器 feature.subjective = False objective_clf = get_objective_classification(feature) # 测试集 # 主客观部分 test_datas_objective, c_true_objective, danger_index_objective = feature.get_key_words(sentences) test_objective = test_datas_objective if not sp.issparse(test_datas_objective): test_objective = feature.cal_weight_improve(test_datas_objective, c_true_objective) c_pred_objective = objective_clf.predict(test_objective) # 获得情绪分类器 feature.subjective = True emotion_clf = get_emotion_classification(feature, incr=incr) # 测试集 # 情绪部分 test_datas, c_true, danger_index = feature.get_key_words(sentences) test = test_datas if not sp.issparse(test_datas): test = feature.cal_weight_improve(test_datas, c_true) c_pred = [] for i in range(len(sentences)): if i not in danger_index_objective and i not in danger_index: before_i_in_danger_obj = np.sum(np.asarray(danger_index_objective) < i) before_i_in_danger_ = np.sum(np.asarray(danger_index) < i) c = emotion_clf.predict(test[i - before_i_in_danger_])[0] if c_pred_objective[i - before_i_in_danger_obj] == "Y"\ else c_pred_objective[i - before_i_in_danger_obj] c_pred.append(c) if out: dir_ = os.path.join(OUT_BASE_URL, "out0") FileUtil.mkdirs(dir_) current = time.strftime('%Y-%m-%d %H:%M:%S') o = os.path.join(dir_, current + ".xml") with open(o, "w") as fp: for i, s in enumerate(sentences): if i not in danger_index_objective and i not in danger_index: before_i_in_danger_obj = np.sum(np.asarray(danger_index_objective) < i) before_i_in_danger_ = np.sum(np.asarray(danger_index) < i) fp.write( """<weibo emotion-type="%s"> <sentence emotion-1-type="%s" emotion-2-type="none" emotion-tag="%s"> %s </sentence> </weibo> """ % (c_pred[i - before_i_in_danger_], c_pred[i - before_i_in_danger_], "N" if c_pred_objective[i - before_i_in_danger_obj] == "N" else "Y", s)) else: fp.write( """<weibo emotion-type="%s"> <sentence emotion-1-type="%s" emotion-2-type="none" emotion-tag="%s"> %s </sentence> </weibo> """ % ("None", "None", "N", s + "\n Can't recognize because it has insufficient key_words")) else: print c_pred