コード例 #1
0
ファイル: SimilarityDegree.py プロジェクト: jryyufeng/-SNS-
    def wordeee(self, text, topic, question):
        lol = mongDB.Logger()
        dic_1 = {}
        dic_11 = {}
        from collections import Counter
        numall = 0
        words = []
        #words = [word for word in jieba.cut(text, cut_all=True) if len(word) >= 2]
        for word in jieba.cut(text, cut_all=True):
            if len(word) >= 2:
                words.append(word)
                numall += 1
        c = Counter(words)
        p = 0.0
        listword = []
        for word_freq in c.most_common(20):
            word, freq = word_freq
            if str(word).find('\n') != -1:
                pass
            else:
                listword.append(word)
                p += float(freq) / float(numall)
        dic_1['answer_words'] = listword
        dic_1['p'] = p * 100  #百分比
        fileq = open(question)
        qq = fileq.read()
        dic_1['title'] = qq
        self.questionlist.append(dic_1)
        dic_11['questionlist'] = self.questionlist
        dic_11['itstopic'] = topic

        #print word, freq
        #lol.db_table7.insert(dic_11)###########
        return dic_11
コード例 #2
0
ファイル: pzh.py プロジェクト: jryyufeng/zhihu_spider
 def __init__(self):
     #"""Do nothing, by default."""
     self.qqcount = 0
     self.dic_name = {}
     self.ltable1 = []
     self.list_p = []
     self.topic = getopic()
     self.lo = mongDB.Logger()
コード例 #3
0
ファイル: pzhn.py プロジェクト: jryyufeng/zhihu_spider
def getpaint(authors, topic):
    lol = mongDB.Logger()
    for author in authors:
        dic_paint = {}
        dic_paint['name'] = author.name
        dic_paint['num'] = author.follower_count
        dic_paint['topic'] = topic
        #######
        lol.db_table1.insert(dic_paint)
コード例 #4
0
ファイル: start_crawl.py プロジェクト: jryyufeng/zhihu_spider
 def xiangsidu(self):
     lol = mongDB.Logger()
     count = 1
     dic_11={}
     for i in range(10):
         files1 = self.xs.analyse_nr('F:/zhihu/answer1' +str(count)+ '/')
         #print files1
         text = self.xs.change(files1)
         dic_11=self.xs.wordeee(text, self.aa.topic,'F:/zhihu/answer1' +str(count)+ '/'+"q.txt")
         count += 1
         #print dic_11
     lol.db_table7.insert(dic_11)  ###########
コード例 #5
0
ファイル: pzh.py プロジェクト: jryyufeng/zhihu_spider
def getpaint(authors, topic):
    list1 = []
    count = 0
    lol = mongDB.Logger()
    for author in authors:
        dic_paint = {}
        dic_follow = {}
        dic_following = {}
        if author.over:
            continue
        else:
            if author.followings:
                for _, ii in zip(range(20), author.followings):
                    if ii.over:
                        name2 = str(ii.over_reason)
                        continue
                    name2 = str(ii.name)
                    if name2.find('.') != -1:
                        name2 = name2.replace('.', u"(点)")
                    #print name2,'----------------------------'
                    dic_following[name2] = ii.follower_count

            if author.followers:
                for _, follower in zip(range(20), author.followers):
                    if follower.over:
                        name1 = str(follower.over_reason)
                        continue
                    name1 = str(follower.name)
                    if name1.find('.') != -1:
                        name1 = name1.replace('.', u"(点)")
                        #print name1
                    dic_follow[name1] = follower.follower_count
            dic_paint['name'] = author.name
            dic_paint['num'] = author.follower_count
            dic_paint['follower'] = dic_follow
            dic_paint['following'] = dic_following
            dic_paint['topic'] = topic
            ####################
            #js1=json.dumps(dic_paint)
            #js2=json.loads(js1)
            lol.db_table1.insert(dic_paint)
コード例 #6
0
ファイル: start_crawl.py プロジェクト: jryyufeng/zhihu_spider
 def __init__(self):
     self.aa=pzhn.get("")#输入话题
     self.xs=SimilarityDegree.XiangSi()
     self.lo1=mongDB.Logger()