Beispiel #1
0
def quickAnalyze():
    # dbからデータを読み込む
    u = LoadUserData(conf_path)
    dbSession = model.startSession(u)
    table, footer = readReplyTable.read(exec_path + "/common/replyTable.json")
    regexes = makeRegexes(table)
    
    # 前回の更新時間から現在までのデータを入手する
    q = dbSession.query(model.Tweet)
    tq = q.filter(model.Tweet.isAnalyze == 0)[:10000]
    for t in tq:
        #1発言毎
        t.text = RemoveCharacter(t.text)
        #print_d2(t.text)
        analyzeReply2(t,dbSession,table,regexes)
        t.isAnalyze = 1
        t_enc = t.text.encode(g_mecabencode,'ignore')
        sarray = mecab.sparse_all(t_enc,mecabPath).split("\n")
        sarray2 = connectUnderScore(sarray)
        markovWordList,topNWordList = TakeWordList(sarray)
        
        #最近出た名詞貯める
        for tn in topNWordList:
            hot = model.Hot()
            hot.word = unicode(tn,g_systemencode)
            dbSession.add(hot)
        dbSession.add(t)
        dbSession.commit()
Beispiel #2
0
 def sparse_sentence(self, s):
     #print s
     s_sparse =\
     mecab.sparse_all(s.encode("utf-8"),mecab_path).split("\n")[:-2]
     candidate = set()
     for s2 in s_sparse:  # この時点で単語レベルのハズ(ただしs2=単語 品詞
         # とかかなぁ
         #print "s2",
         s3 = s2.decode("utf-8").split("\t")
         s4 = s3[1].split(",")
         #print s3[0],s4[0]
         if s4[0] != u"記号" and s4[0] != u"助動詞" \
             and s4[0] != u"助詞":#数が集まったら名詞のみにしたい
             candidate.add(s3[0])
     return candidate
 def sparse_sentence(self, s):
     #print s
     s_sparse =\
     mecab.sparse_all(s.encode("utf-8"),mecab_path).split("\n")[:-2]
     candidate = set()
     for s2 in s_sparse: # この時点で単語レベルのハズ(ただしs2=単語 品詞
                         # とかかなぁ
         #print "s2",
         s3 = s2.decode("utf-8").split("\t")
         s4 = s3[1].split(",")
         #print s3[0],s4[0]
         if s4[0] != u"記号" and s4[0] != u"助動詞" \
             and s4[0] != u"助詞":#数が集まったら名詞のみにしたい
             candidate.add(s3[0])
     return candidate
Beispiel #4
0
def sparse_sentence(s):
    #print s
    s_sparse =\
    mecab.sparse_all(s.encode("utf-8"),"/usr/lib/libmecab.so.1").split("\n")[:-2]
    candidate = set()
    for s2 in s_sparse: # この時点で単語レベルのハズ(ただしs2=単語 品詞
                        # とかかなぁ
        #print "s2",
        s3 = s2.decode("utf-8").split("\t")
        s4 = s3[1].split(",")

        if s4[0] == u"名詞":
        #if s4[0] != u"記号" and s4[0] != u"助動詞" \
        #    and s4[0] != u"助詞":#数が集まったら名詞のみにしたい
        #    print s3[0],s4[0],s4[1]
            if not stopwords(s3[0]):
                candidate.add(s3[0])
    return candidate