def find_keyword(word_list,lentext=3): ''' :param list word_list: a list of thai text :param int lentext: a number of keyword :return: dict ''' filtered_words = [word for word in word_list if word not in set(stopwords.words('thai'))] word_list=rank(filtered_words) return {k:v for k, v in word_list.items() if v>=lentext}
def find_keyword(word_list, lentext=3): ''' :param list word_list: a list of thai text :param int lentext: a number of keyword :return: dict ''' filtered_words = [ word for word in word_list if word not in set(stopwords.words('thai')) ] word_list = rank(filtered_words) return {k: v for k, v in word_list.items() if v >= lentext}
def find_keyword(word_list,lentext=3): ''' ระบบค้นหาคำสำคัญ หลักการ ลบ stopword ออกแล้ว นับจำนวนคำที่ส่งค่าออกมา find_keyword(word_list,lentext=3) word_list คือ คำที่อยู่ใน list lentext คือ จำนวนคำที่มีอยู่ใน list สำหรับใช้กำหนดค่าหา keyword ค่าเริ่มต้นคือ 3 ''' filtered_words = [word for word in word_list if word not in set(stopwords.words('thai'))] word_list=rank(filtered_words) return {k:v for k, v in word_list.items() if v>=lentext}
def wordcloud(): if DEVELOPMENT_MODE: with open('wordcloud.json') as f: return jsonify(json.load(f)) else: graph = facebook.GraphAPI(access_token=FACEBOOK_USER_ACCESS_TOKEN, version='2.7') query_string = f'fields=feed.since({SINCE})' \ '{comments{comments{message,created_time,like_count},' \ 'message,created_time,like_count,reactions},' \ 'message,created_time,updated_time,reactions}' endpoint_url = f'{FACEBOOK_GROUP_ID}?{query_string}' feed = graph.request(endpoint_url).get('feed') text = '' for each in feed.get('data'): message = each.get('message') if message: text += message comments = each.get('comments') if comments: for comment in comments.get('data'): text += comment.get('message') comments_in_comment = comment.get('comments') if comments_in_comment: for comment_in_comment in comments_in_comment.get( 'data'): text += comment_in_comment.get('message') from pythainlp.rank import rank from pythainlp.tokenize import word_tokenize word_list = word_tokenize(text, engine='newmm') word_count = rank(word_list) from toolz.dicttoolz import dissoc new_word_count = dissoc(word_count, ' ') words = [] for each in new_word_count: d = {'word': each, 'value': new_word_count[each]} words.append(d) return jsonify(words)
def testRank(self): self.assertEqual(rank(["แมว","คน","แมว"]),Counter({'แมว': 2, 'คน': 1}))
from __future__ import absolute_import from __future__ import division from __future__ import print_function from pythainlp.segment import segment a = 'ฉันรักภาษาไทยเพราะฉันเป็นคนไทย' b = segment(a) print(b) from pythainlp.segment.dict import segment print(segment(a)) print(type(b)) from pythainlp.rank import rank aa = rank(b) print(aa) from pythainlp.romanization import romanization b=romanization("แมว") print(b) from pythainlp.change import * a="l;ylfu8iy[" a=texttothai(a) b="นามรสนอำันี" b=texttoeng(b) print(a) print(b) from pythainlp.segment.dict import segment print(segment('ปีคริสต์ศักราช')) from pythainlp.number import numtowords print("5611116.50") print(numtowords(5611116.50)) from pythainlp.postaggers import tag
from pythainlp.segment import segment a = 'ฉันรักภาษาไทยเพราะฉันเป็นคนไทย' b = segment(a) print(b) from pythainlp.segment.dict import segment print(segment(a)) print(type(b)) from pythainlp.rank import rank aa = rank(a) print(aa) from pythainlp.romanization import romanization b=romanization("ต้นกก") print(b) from pythainlp.change import * a="l;ylfu8iy[" a=texttothai(a) b="นามรสนอำันี" b=texttoeng(b) print(a) print(b) from pythainlp.segment.dict import segment print(segment('ฉันรักคุณ')) from pythainlp.number import numtowords print("5611116.50") print(numtowords(5611116.50))
from __future__ import absolute_import from __future__ import division from __future__ import print_function from pythainlp.segment import segment a = 'ฉันรักภาษาไทยเพราะฉันเป็นคนไทย' b = segment(a) print(b) from pythainlp.segment.dict import segment print(segment(a)) print(type(b)) from pythainlp.rank import rank aa = rank(b) print(aa) from pythainlp.romanization import romanization b = romanization("แมว") print(b) from pythainlp.change import * a = "l;ylfu8iy[" a = texttothai(a) b = "นามรสนอำันี" b = texttoeng(b) print(a) print(b) from pythainlp.segment.dict import segment print(segment('ปีคริสต์ศักราช')) from pythainlp.number import numtowords print("5611116.50") print(numtowords(5611116.50)) from pythainlp.postaggers import tag
word_all_mix.append(word) #print(pos_tag(word_all_thai,engine='old')) def preprocessingByList(beforeList,afterList): for word in beforeList: if (word not in stopwords and (word not in word_preposition) and (word not in double_char) and (len(word) > 1)): afterList.append(word) preprocessingByList(word_all_thai_unfiltered,word_all_thai_filtered) for word in word_all_thai_unfiltered: if(word in negative_adjective or word in positive_adjective): adjective_word.append(word) tag_counter = [] print(rank(word_all_thai_unfiltered)) print(rank(word_all_thai_filtered)) item = rank(word_all_thai_filtered) with open('document/general_data/data.json','w',encoding="utf-8") as fp: json.dump(item,fp,indent=4,ensure_ascii=False,sort_keys=True) json.dump(rank(word_all_thai_filtered),fp,indent=4,ensure_ascii=False,sort_keys=True) ###################################################### ########use adjective list for polarity train######### ###################################################### # pos tagger part # for (word,tag) in pos_tag(word_all_thai_filtered,engine="old"): # tag_counter.append(tag) # if(tag is not None and tag[0] == 'V'):
def test_rank(self): self.assertEqual(rank(["แมว","คน","แมว"]),Counter({'แมว': 2, 'คน': 1}))