def sentence_sentiment_score(dataset): cuted_review = [] for cell in dataset: cuted_review.append(tp.cut_sentence_2(cell)) single_review_count = [] all_review_count = [] for review in cuted_review: for sent in review: seg_sent = tp.segmentation(sent, "list") # 分词 i = 0 # word position counter a = 0 # sentiment word position poscount = 0 # count a pos word negcount = 0 for word in seg_sent: if word in posdict: poscount += 1 for w in seg_sent[a:i]: poscount = match(w, poscount) a = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[a:i]: negcount = match(w, negcount) a = i + 1 elif word == "!".decode("utf8") or word == "!".decode("utf8"): for w2 in seg_sent[::-1]: if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break else: pos = tp.postagger(word, "list") for k in pos: if k[1] == "a": fo.write(word.encode("utf8") + "\n") elif k[1] == "d": fo.write(word.encode("utf8") + "\n") elif k[1] == "v": fo.write(word.encode("utf8") + "\n") elif k[1] == "n": fo.write(word.encode("utf8") + "\n") i += 1 single_review_count.append(transform_to_positive_num(poscount, negcount)) # 评论分句子后,每条句子的正负权值 all_review_count.append( single_review_count ) # [[[s11_score], [s12_score], ...], [[s21_score], [s22_score], ...], ...] single_review_count = [] return all_review_count
def count_adj_adv(dataset): adj_adv_num = [] a = 0 d = 0 v = 0 for review in dataset: pos = tp.postagger(review, "list") for i in pos: if i[1] == "a": a += 1 elif i[1] == "d": d += 1 elif i[1] == "v": v += 1 adj_adv_num.append((a, d, v)) a = 0 d = 0 v = 0 return adj_adv_num
def count_adj_adv(dataset): adj_adv_num = [] a = 0 d = 0 v = 0 for review in dataset: pos = tp.postagger(review, 'list') for i in pos: if i[1] == 'a': a += 1 elif i[1] == 'd': d += 1 elif i[1] == 'v': v += 1 adj_adv_num.append((a, d, v)) a = 0 d = 0 v = 0 return adj_adv_num