Esempi in Python per get_sentences, esempi in Python per snownlp.normal.get_sentences

Esempio n. 1

0

Mostra file

File: KeywordsHandler.py Progetto: Dreamcatcher-GIS/ugc.aggregator

 def extract_keyword_by_thulac(self):
     sents = []
     comm_list = self.dao.get_hotel_comments()
     # 从语料中读取每一行并切分成子句
     for comm in comm_list:
         sents.extend(normal.get_sentences(comm[2]))
     print "length of sentences:%d"%len(sents)
     # 每个子句进行词性判读
     pos_sents = []
     for sent in sents:
         try:
             pos_sents.append(map(lambda x: x.split("_"), self.thu.cut(sent.encode("utf-8"))))
         except:
             print sent
             continue
     print "length of pos_sents:%d"%len(pos_sents)
     # 分拣出名词,并进行统计
     print "counting"
     noun_dict = {}
     for pos_sent in pos_sents:
         for word in pos_sent:
             if word[1] == "n":
                 if word[0] not in noun_dict:
                     noun_dict[word[0]] = 1
                 else:
                     noun_dict[word[0]] = noun_dict[word[0]] + 1
     a = sorted(noun_dict.iteritems(),key=lambda asd:asd[1],reverse=True)
     return a

Esempio n. 2

0

Mostra file

 def extract_keyword_by_thulac(self):
     sents = []
     comm_list = self.dao.get_hotel_comments()
     # 从语料中读取每一行并切分成子句
     for comm in comm_list:
         sents.extend(normal.get_sentences(comm[2]))
     print "length of sentences:%d" % len(sents)
     # 每个子句进行词性判读
     pos_sents = []
     for sent in sents:
         try:
             pos_sents.append(
                 map(lambda x: x.split("_"),
                     self.thu.cut(sent.encode("utf-8"))))
         except:
             print sent
             continue
     print "length of pos_sents:%d" % len(pos_sents)
     # 分拣出名词,并进行统计
     print "counting"
     noun_dict = {}
     for pos_sent in pos_sents:
         for word in pos_sent:
             if word[1] == "n":
                 if word[0] not in noun_dict:
                     noun_dict[word[0]] = 1
                 else:
                     noun_dict[word[0]] = noun_dict[word[0]] + 1
     a = sorted(noun_dict.iteritems(), key=lambda asd: asd[1], reverse=True)
     return a

Esempio n. 3

0

Mostra file

def main():
    t = normal.zh2hans(text)
    sents = normal.get_sentences(t)
    doc = []
    for sent in sents:
        words = seg.seg(sent)
        words = normal.filter_stop(words)
        doc.append(words)
    rank = textrank.TextRank(doc)
    rank.solve()
    for index in rank.top_index(5):
        print(sents[index])
    keyword_rank = textrank.KeywordTextRank(doc)
    keyword_rank.solve()
    for w in keyword_rank.top_index(5):
        print(w)

Esempio n. 4

0

Mostra file

File: simple_response.py Progetto: TechBridgeWeekly/WhatsGag

def parse_keyword(text):
    t = normal.zh2hans(text.decode("UTF-8"))
    sents = normal.get_sentences(t)
    doc = []
    for sent in sents:
        words = seg.seg(sent)
        words = normal.filter_stop(words)
        doc.append(words)
    
    keywords = []
    keyword_rank = textrank.KeywordTextRank(doc)
    keyword_rank.solve()
    for w in keyword_rank.top_index(5):
        keywords.append(w)
    
    return keywords

Esempio n. 5

0

Mostra file

File: meituan.py Progetto: GavinDon/OtaBacked

def query_xym():
    offset = request.values['offset']
    pagesize = request.values['pageSize']

    resp = XymTripModel.query.offset(offset).limit(pagesize)
    t = normal.zh2hans(
        "随着智能手机和平板电脑的普及，相机也变得无处不在，而且分享照片也越来越简单。MOOC的明星教授说，把45分钟的讲座变成10分钟一段的视频让他们被迫“升级课程”。不是每个老师都能通过这种方式吸引一批学生，但是他们可以参考这个经验，为课堂制作自己的视频，例如实地考察录像。让整个班都出去跑一趟可能不可行，但利用视频和照片，可以把考察点“带”到课室中来。利用智能手机耳机上配备的话筒，还可以为视频配上讲解，从而高效地用多个视频介绍完一个知识点。"
    )
    sents = normal.get_sentences(t)
    doc = []
    for sent in sents:
        words = seg.seg(sent)
    words = normal.filter_stop(words)
    doc.append(words)
    rank = textrank.TextRank(doc)
    rank.solve()
    for index in rank.top_index(5):
        print(sents[index])
    keyword_rank = textrank.KeywordTextRank(doc)
    keyword_rank.solve()
    for w in keyword_rank.top_index(5):
        print(w)
    return "\'..."

Esempio n. 6

0

Mostra file

 def extract_keyword(self):
     sents = []
     comm_list = self.dao.get_hotel_comments()
     # 从语料中读取每一行并切分成子句
     for comm in comm_list:
         sents.extend(normal.get_sentences(comm[2]))
     print "length of sentences:%d" % len(sents)
     # 每个子句进行词性判读
     pos_sents = []
     for sent in sents:
         pos_sents.append(pseg.cut(sent))
     print "length of pos_sents:%d" % len(pos_sents)
     # 分拣出名词,并进行统计
     print "counting"
     noun_dict = {}
     for pos_sent in pos_sents:
         for key, type in pos_sent:
             if type == "n":
                 if key not in noun_dict:
                     noun_dict[key] = 1
                 else:
                     noun_dict[key] = noun_dict[key] + 1
     a = sorted(noun_dict.iteritems(), key=lambda asd: asd[1], reverse=True)
     return a

Esempio n. 7

0

Mostra file

File: KeywordsHandler.py Progetto: Dreamcatcher-GIS/ugc.aggregator

 def extract_keyword(self):
     sents = []
     comm_list = self.dao.get_hotel_comments()
     # 从语料中读取每一行并切分成子句
     for comm in comm_list:
         sents.extend(normal.get_sentences(comm[2]))
     print "length of sentences:%d"%len(sents)
     # 每个子句进行词性判读
     pos_sents = []
     for sent in sents:
         pos_sents.append(pseg.cut(sent))
     print "length of pos_sents:%d"%len(pos_sents)
     # 分拣出名词,并进行统计
     print "counting"
     noun_dict = {}
     for pos_sent in pos_sents:
         for key,type in pos_sent:
             if type == "n":
                 if key not in noun_dict:
                     noun_dict[key] = 1
                 else:
                     noun_dict[key] = noun_dict[key] + 1
     a = sorted(noun_dict.iteritems(),key=lambda asd:asd[1],reverse=True)
     return a

Esempio n. 8

0

Mostra file

File: summary.py Progetto: LorrinWWW/TweetExtraction

def summaryOfSents(docs, limit=5):
    '''
    docs = [
        '第一篇微博',
        '第二篇微博',
        ...
    ]
    '''
    merged_doc = []
    sents = []
    for doc in docs:
        sents_t = normal.get_sentences(doc)
        sents += sents_t
        for sent in sents_t:
            words = jieba.lcut(sent)
            words = normal.filter_stop(words)
            merged_doc.append(words)
    
    rank = textrank.TextRank(merged_doc)
    rank.solve()
    ret = []
    for index in rank.top_index(limit):
        ret.append(sents[index])
    return ret

Esempio n. 9

0

Mostra file

File: test.py Progetto: MengWenkui/snownlp

MOOC是学校的一种新形式，欧伯恩建议在起步的时候，先为每门课程的课件加上指针，再利用软件工具，就可以轻松根据学生的学习进度添加课程。他希望，在学生使用在线社区的同时，教师也能发现参与在线社区的方式。

5．从线上到线下
MOOC的一个缺陷就是无法组建高效的学习小组，而教师在这方面可以大有作为。当学生们看到其他同学更新了课程内容，他们就知道谁掌握了所学的知识，从而邀请这些同学合作完成任务，或向他们请教。我经常向教师们介绍这个例子：我在Google+圈子里发了一条信息，例如“明天我们会讨论矛盾冲突在吸引读者注意力方面的作用。今晚，在你回家的路上，拍一张照片或一段录像。用文字介绍你的见闻，以证明这个观点，并邀请其他同学参与讨论。”我收到的作业包括交通堵塞，猫狗对峙，被泡在水里的花园以及足球训练中的射门。第二天，学生们就可以归纳整理前一天晚上在网络上收集到的评论了。

6．用好你的相机
随着智能手机和平板电脑的普及，相机也变得无处不在，而且分享照片也越来越简单。MOOC的明星教授说，把45分钟的讲座变成10分钟一段的视频让他们被迫“升级课程”。不是每个老师都能通过这种方式吸引一批学生，但是他们可以参考这个经验，为课堂制作自己的视频，例如实地考察录像。让整个班都出去跑一趟可能不可行，但利用视频和照片，可以把考察点“带”到课室中来。利用智能手机耳机上配备的话筒，还可以为视频配上讲解，从而高效地用多个视频介绍完一个知识点。

将MOOC应用到传统课堂教学
随着大规模网络公开课的发展，教师可以考虑把在线教育的方法应用到自己的课堂教学中。MOOC的课程制作涉及比较复杂的技术，但使用这些课程几乎不费吹灰之力，而且成本也远远不及课程制作。没有加入edX或Coursera的大部分学校可以进行更多自创内容的尝试，就像自出版一样，这也是许多cMOOC的尝试。教师也可以向自己的目标努力。通过打开课堂，建立网络社区和制作教学视频，可以让更多的教师和学生享受到MOOC的投入带来的收益。
'''


from snownlp import normal
from snownlp import seg
from snownlp.summary import textrank


if __name__ == '__main__':
    t = normal.zh2hans(text)
    sents = normal.get_sentences(t)
    doc = []
    for sent in sents:
        words = seg.seg(sent)
        words = normal.filter_stop(words)
        doc.append(words)
    rank = textrank.TextRank(doc)
    rank.solve()
    for index in rank.top_index(10):
        print sents[index]

Esempio n. 10

0

Mostra file

File: nlpChinese.py Progetto: putaodoudou/nlp_Chinese

def getJson(fold, filename):
    result = {}
    try:
        count = 0
        cotent = u''
        title = ''
        time = ''
        abstract = ''
        path = fold + '/' + filename
        # ========================================
        #    读取文件的时间、标题、内容
        # ========================================
        for line in open(path, 'r'):
            if (count == 0):
                title = line
                count += 1
                # print (title)
                continue
            if (count == 1):
                time = line
                count += 1
                # print (time)
                continue
            if (count > 1):
                count += 1
                cotent += line
                # print (line)

        # ========================================
        #      生成摘要
        # =======================================

        t = normal.zh2hans(cotent)
        sents = normal.get_sentences(t)
        doc = []
        for sent in sents:
            words = seg.seg(sent)
            words = normal.filter_stop(words)
            doc.append(words)
        rank = textrank.TextRank(doc)
        rank.solve()
        for index in rank.top_index(5):
            abstract = abstract + sents[index] + ' '
        keyword_rank = textrank.KeywordTextRank(doc)
        keyword_rank.solve()
        word0 = {}
        word1 = {}
        word2 = {}
        word3 = {}
        word4 = {}
        wordcount = 0
        for w in keyword_rank.top_index(5):
            if wordcount == 0:
                word0["word"] = w
                word0["frequency"] = float(cotent.count(w)) / float(
                    len(cotent))

            if wordcount == 1:
                word1["word"] = w
                word1["frequency"] = float(cotent.count(w)) / float(
                    len(cotent))
            if wordcount == 2:
                word2["word"] = w
                word2["frequency"] = float(cotent.count(w)) / float(
                    len(cotent))
            if wordcount == 3:
                word3["word"] = w
                word3["frequency"] = float(cotent.count(w)) / float(
                    len(cotent))
            if wordcount == 4:
                word4["word"] = w
                word4["frequency"] = float(cotent.count(w)) / float(
                    len(cotent))
            wordcount += 1

        s = SnowNLP(cotent)
        score = (s.sentiments - 0.5) * 2  # -1-1规范化

        keywords = [word0, word1, word2, word3, word4]
        result["code"] = 0
        result["message"] = "sucess"
    except IOError:
        result["code"] = 1
        result["message"] = "wrong format"
        return result

    result["tilte"] = title.strip()
    result["time"] = time.strip()
    result['abstract'] = abstract
    result['sentiment'] = score
    result["keywords"] = keywords

    return result

Esempio n. 11

0

Mostra file

6．用好你的相机
随着智能手机和平板电脑的普及，相机也变得无处不在，而且分享照片也越来越简单。MOOC的明星教授说，把45分钟的讲座变成10分钟一段的视频让他们被迫“升级课程”。不是每个老师都能通过这种方式吸引一批学生，但是他们可以参考这个经验，为课堂制作自己的视频，例如实地考察录像。让整个班都出去跑一趟可能不可行，但利用视频和照片，可以把考察点“带”到课室中来。利用智能手机耳机上配备的话筒，还可以为视频配上讲解，从而高效地用多个视频介绍完一个知识点。

将MOOC应用到传统课堂教学
随着大规模网络公开课的发展，教师可以考虑把在线教育的方法应用到自己的课堂教学中。MOOC的课程制作涉及比较复杂的技术，但使用这些课程几乎不费吹灰之力，而且成本也远远不及课程制作。没有加入edX或Coursera的大部分学校可以进行更多自创内容的尝试，就像自出版一样，这也是许多cMOOC的尝试。教师也可以向自己的目标努力。通过打开课堂，建立网络社区和制作教学视频，可以让更多的教师和学生享受到MOOC的投入带来的收益。
'''


from snownlp import normal
from snownlp import seg
from snownlp.summary import textrank


if __name__ == '__main__':
    t = normal.zh2hans(text)
    sents = normal.get_sentences(t)
    doc = []
    for sent in sents:
        words = seg.seg(sent)
        words = normal.filter_stop(words)
        doc.append(words)
    rank = textrank.TextRank(doc)
    rank.solve()
    for index in rank.top_index(5):
        print(sents[index])
    keyword_rank = textrank.KeywordTextRank(doc)
    keyword_rank.solve()
    for w in keyword_rank.top_index(5):
        print(w)