예제 #1
0
파일: __init__.py 프로젝트: zkan/pythainlp
def find_keyword(word_list,lentext=3):
    '''
    :param list word_list: a list of thai text
    :param int lentext: a number of keyword
    :return: dict
    '''
    filtered_words = [word for word in word_list if word not in set(stopwords.words('thai'))]
    word_list=rank(filtered_words)
    return {k:v for k, v in word_list.items() if v>=lentext}
예제 #2
0
def find_keyword(word_list, lentext=3):
    '''
    :param list word_list: a list of thai text
    :param int lentext: a number of keyword
    :return: dict
    '''
    filtered_words = [
        word for word in word_list if word not in set(stopwords.words('thai'))
    ]
    word_list = rank(filtered_words)
    return {k: v for k, v in word_list.items() if v >= lentext}
예제 #3
0
def find_keyword(word_list,lentext=3):
    '''
    ระบบค้นหาคำสำคัญ
    หลักการ ลบ stopword ออกแล้ว นับจำนวนคำที่ส่งค่าออกมา

    find_keyword(word_list,lentext=3)
    word_list คือ คำที่อยู่ใน list
    lentext คือ จำนวนคำที่มีอยู่ใน list สำหรับใช้กำหนดค่าหา keyword ค่าเริ่มต้นคือ 3
    '''
    filtered_words = [word for word in word_list if word not in set(stopwords.words('thai'))]
    word_list=rank(filtered_words)
    return {k:v for k, v in word_list.items() if v>=lentext}
예제 #4
0
def wordcloud():
    if DEVELOPMENT_MODE:
        with open('wordcloud.json') as f:
            return jsonify(json.load(f))
    else:
        graph = facebook.GraphAPI(access_token=FACEBOOK_USER_ACCESS_TOKEN,
                                  version='2.7')

        query_string = f'fields=feed.since({SINCE})' \
            '{comments{comments{message,created_time,like_count},' \
            'message,created_time,like_count,reactions},' \
            'message,created_time,updated_time,reactions}'
        endpoint_url = f'{FACEBOOK_GROUP_ID}?{query_string}'
        feed = graph.request(endpoint_url).get('feed')

    text = ''
    for each in feed.get('data'):
        message = each.get('message')
        if message:
            text += message
            comments = each.get('comments')
            if comments:
                for comment in comments.get('data'):
                    text += comment.get('message')

                    comments_in_comment = comment.get('comments')
                    if comments_in_comment:
                        for comment_in_comment in comments_in_comment.get(
                                'data'):
                            text += comment_in_comment.get('message')

    from pythainlp.rank import rank
    from pythainlp.tokenize import word_tokenize

    word_list = word_tokenize(text, engine='newmm')
    word_count = rank(word_list)

    from toolz.dicttoolz import dissoc
    new_word_count = dissoc(word_count, ' ')
    words = []
    for each in new_word_count:
        d = {'word': each, 'value': new_word_count[each]}
        words.append(d)

    return jsonify(words)
예제 #5
0
	def testRank(self):
		self.assertEqual(rank(["แมว","คน","แมว"]),Counter({'แมว': 2, 'คน': 1}))
예제 #6
0
파일: test.py 프로젝트: nlsntcs/pythainlp
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from pythainlp.segment import segment
a = 'ฉันรักภาษาไทยเพราะฉันเป็นคนไทย'
b = segment(a)
print(b)
from pythainlp.segment.dict import segment
print(segment(a))
print(type(b))
from pythainlp.rank import rank
aa = rank(b)
print(aa)
from pythainlp.romanization import romanization
b=romanization("แมว")
print(b)
from pythainlp.change import *
a="l;ylfu8iy["
a=texttothai(a)
b="นามรสนอำันี"
b=texttoeng(b)
print(a)
print(b)
from pythainlp.segment.dict import segment
print(segment('ปีคริสต์ศักราช'))
from pythainlp.number import numtowords
print("5611116.50")
print(numtowords(5611116.50))

from pythainlp.postaggers import tag
예제 #7
0
파일: test.py 프로젝트: magma2/pythainlp
from pythainlp.segment import segment
a = 'ฉันรักภาษาไทยเพราะฉันเป็นคนไทย'
b = segment(a)
print(b)
from pythainlp.segment.dict import segment
print(segment(a))
print(type(b))
from pythainlp.rank import rank
aa = rank(a)
print(aa)
from pythainlp.romanization import romanization
b=romanization("ต้นกก")
print(b)
from pythainlp.change import *
a="l;ylfu8iy["
a=texttothai(a)
b="นามรสนอำันี"
b=texttoeng(b)
print(a)
print(b)
from pythainlp.segment.dict import segment
print(segment('ฉันรักคุณ'))
from pythainlp.number import numtowords
print("5611116.50")
print(numtowords(5611116.50))
예제 #8
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from pythainlp.segment import segment
a = 'ฉันรักภาษาไทยเพราะฉันเป็นคนไทย'
b = segment(a)
print(b)
from pythainlp.segment.dict import segment
print(segment(a))
print(type(b))
from pythainlp.rank import rank
aa = rank(b)
print(aa)
from pythainlp.romanization import romanization
b = romanization("แมว")
print(b)
from pythainlp.change import *
a = "l;ylfu8iy["
a = texttothai(a)
b = "นามรสนอำันี"
b = texttoeng(b)
print(a)
print(b)
from pythainlp.segment.dict import segment
print(segment('ปีคริสต์ศักราช'))
from pythainlp.number import numtowords
print("5611116.50")
print(numtowords(5611116.50))

from pythainlp.postaggers import tag
예제 #9
0
             word_all_mix.append(word)

#print(pos_tag(word_all_thai,engine='old'))
def preprocessingByList(beforeList,afterList):
    for word in beforeList:
        if (word not in stopwords and (word not in word_preposition) and (word not in double_char) and (len(word) > 1)):
            afterList.append(word)

preprocessingByList(word_all_thai_unfiltered,word_all_thai_filtered)

for word in word_all_thai_unfiltered:
    if(word in negative_adjective or word in positive_adjective):
        adjective_word.append(word)

tag_counter = []
print(rank(word_all_thai_unfiltered))
print(rank(word_all_thai_filtered))
item = rank(word_all_thai_filtered)

with open('document/general_data/data.json','w',encoding="utf-8") as fp:
    json.dump(item,fp,indent=4,ensure_ascii=False,sort_keys=True)
    json.dump(rank(word_all_thai_filtered),fp,indent=4,ensure_ascii=False,sort_keys=True)

######################################################
########use adjective list for polarity train#########
######################################################

# pos tagger part
# for (word,tag) in pos_tag(word_all_thai_filtered,engine="old"):
#     tag_counter.append(tag)
#     if(tag is not None and tag[0] == 'V'):
예제 #10
0
파일: __init__.py 프로젝트: zkan/pythainlp
	def test_rank(self):
		self.assertEqual(rank(["แมว","คน","แมว"]),Counter({'แมว': 2, 'คน': 1}))