コード例 #1
0
def textsegment():
    countnumber = readWeiboData.count()[0]
    for i in range(1,countnumber):

        textdata = readWeiboData.getText(i)
        seg_list = jieba.cut_for_search(textdata)
        texttosql = "  ".join(seg_list)
        writeWeiboData_ByNLP.textWriteToSql(texttosql,i)
コード例 #2
0
def frequency():
    textdict={}
    countnumber = readWeiboData.count()[0]
    for i in range(1,countnumber):
        textdata = readWeiboData.getText(i)
        for word in jieba.cut(textdata):
            word = word.encode('utf8')
            textdict[word] = textdict.get(word, 0) + 1
    return textdict
コード例 #3
0
def keywords():
    countnumber = readWeiboData.count()[0]
    for i in range(1,countnumber):
        textdata = readWeiboData.getText(i)
        # textdata = str(textdata).split('http')[0:-1]
        text = str(textdata).decode('utf8')
        text = SnowNLP(text)
        texttosql = ''
        for j in range(0,len(text.keywords(3))):
            texttosql+=text.keywords(3)[j]+" "

        writeWeiboData.keywordsWriteToSql(texttosql,i)