class AnalsisPostBar(object): def __init__(self): self.StorePd = StoredPd() def AnalsisPosOrNeg(self, PostbarID): # 情绪分析 Datas = self.StorePd.GetCommetPosOrNegbyPostBar(PostbarID) datas = [] rows = [] for i in range(len(Datas)): st = Datas[i][0] datas.append(st[0:3]) dataSet = set(datas) BigPosNeg = 0 for dataset in dataSet: if dataset < str(1): array = {"PosOrNeg": dataset, "Num": datas.count(dataset)} rows.append(array) else: BigPosNeg += datas.count(dataset) array = {"PosOrNeg": str(1), "Num": BigPosNeg} rows.append(array) return sorted(rows, key=operator.itemgetter("PosOrNeg")) def AnalsisUserByLevel(self, PostbarID): # 分析用户等级 Data = self.StorePd.GetPostBarUserByLevel(PostbarID) rows = [] for data in Data: row = {"UserLevel": data[0], "Num": data[1]} rows.append(row) return rows def AnalsisUserBytime(self): # 分析用户活跃时间 pass def AnalsisWordCloud(self): # 分析词频 pass
class ProductModel(): def __init__(self): self.StoredPd = StoredPd() def GetPosOrNeg(self): print("Begin PosOrNeg") # 保存情感极性值小于等于0.3的结果为负面情感结果 f1 = open('../../static/neg.txt', 'a+', encoding='utf-8') # 保存情感极性值大于0.3的结果为正面情感结果 f2 = open('../../static/pos.txt', 'a+', encoding='utf-8') Contents = self.StoredPd.GetAllComments() print(len(Contents)) num = 0 for Content in Contents: num = num + 1 s = SnowNLP(Content[1]) result = s.sentiments self.StoredPd.UpdatePosOrNeg(Content[0], result) if result <= 0.4: f1.write(str(Content[1]) + '\n') else: f2.write(str(Content[1]) + '\n') if num % 100 == 0: print(num) f1.close() f2.close() def TrainModel(self): print("Begin Train Model") sentiment.train("../../static/neg.txt", "../../static/pos.txt") sentiment.save("../../static/sentiment.marshal") print("End Train Model") def DataStatistics(self): Data = self.StoredPd.GetDataGroupByBar() for data in Data: self.StoredPd.UpdatePostBar(data)
class PostSpiderPipeline(object): def __init__(self): self.StorePd = StoredPd() def process_item(self, item, spider): if item.name == "PostPageItem": self.StorePostPage(item) else: self.StoreComment(item) def StoreComment(self, item): Comment = [item['CommentID'], item['UserID'], item['PostPageID'], item['Content'], item['UserLeveL'], item['Time']] User = [item['UserID'], item['UserName']] self.StorePd.insertComment(Comment) self.StorePd.insertUser(User) def StorePostPage(self, item): PostBarID = unquote(item["PostBarID"]) PostPage = [item["PostPageID"], item["PostPageTitle"], item["UserID"], PostBarID, item["ReplyNum"]] bo = self.StorePd.insertPostPage(PostPage) if bo > 0: self.StorePd.UpdateTurePostPageisUpdate(PostPage[0])
def __init__(self): self.StorePd = StoredPd()
import threading import unittest import sys from shutil import copyfile from source.DataAnalysis.ProductModel import ProductModel from source.Starter.Start import StartSpider from source.Tool import Common from source.Tool.StoredProcedure import StoredPd from source.Tool.Mysqlpool import Mysql_Pool from source.Tool.RedisPro import RedisControl stp = StoredPd() RC = RedisControl() class MyTestCase(unittest.TestCase): def test_something(self): PostBars = stp.GetAllPostBar() data = {"columns": ['PostBarID', 'TotalPage', 'TotalComment'], "rows": []} for PostBar in PostBars: Total = stp.GetAllPostBarTotal(PostBar[0]) row = { "PostBarID": PostBar[0], "TotalPage": Total[0], "TotalComment": Total[1] } data["rows"].append(row) print(data) def testAnalysis(self):
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36 Edg/88.0.705.68" ] cookies = [ "BIDUPSID=F1A79E0CD42FEB47C44C9E2A07236422; PSTM=1610345810; BAIDUID=F1A79E0CD42FEB47A529D6DD44B930CB:FG=1; bdshare_firstime=1610355090204; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=33425_33441_33272_33570_33584_26350_33266; wise_device=0; Hm_lvt_98b9d8c2fd6608d564bf2ac2ae642948=1613697712,1613722697,1613722884,1613781450; st_key_id=17; BCLID=10082525747833442867; BDSFRCVID=JY0OJeC62ClZPone-3v8M3d6vhHQZzrTH6aoVp6FM5vqegToj7muEG0P8x8g0Kub6S2qogKKQgOTHRtF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tb4toK--tKt3jt-kKn7jKtI-MUCX5-CsJgKj2hcH0KLKbJ6GK5L-bt_sDtouJ-0qbJrB_JjSbfb1MRjv34j6XqK9yPD8h6vH3IrrKl5TtUJ48DnTDMRh-R-u-NJyKMniLCv9-pn4bpQrh459XP68bTkA5bjZKxtq3mkjbPbDfn02eCKuDjtBD5b0jGRabK6aKC5bL6rJabC3ox-9XU6q2bDeQNbpLRbI5C6q0xjztb--fxDxj4jPjp0vWtvJWbbvLT7johRTWqR48xFGMMonDh83b4RHelTJHCOOVp5O5hvvhb3O3MA-yUKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_Et5tttbCH_KvjbRj_jRTpqRQaq4tehHRqWxc9WDTm_DoTBInHhPQNqfckMxrybajqapRW-Nrq-pPKKR7jqPnv5j3rbtnBKp5i0UvH3mkjbpbGfn02OpDzMM8We-4syPRIKMRnWNrJKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCFaMIIxjT_-ePDyqx5Ka43tHD7yWCkaQMJcOR59K4nnDpKA5nQ-e4vaW2De0IQKLCt-elL63MOZXb0g5n7Tbb8eBgvZ2UQkyD5ssq0x0bOtynjDWl8L2bkO3COMahkM5h7xOKQoQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3YjjTyDG0etjDqJn32XPF85nRtDbogqROoK5PpWH39BtQmJJrmWUcxabrqKDOCeMF5b5KBM-7IXJTHQg-q3R7C-IQP8D5CKtoJ3-4qjUct0x-jLnbOVn0MW-5DMx84KtnJyUPubPnnBpjJ3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRh_CcJ-J8XMC8Cej5P; BCLID_BFESS=10082525747833442867; BDSFRCVID_BFESS=JY0OJeC62ClZPone-3v8M3d6vhHQZzrTH6aoVp6FM5vqegToj7muEG0P8x8g0Kub6S2qogKKQgOTHRtF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tb4toK--tKt3jt-kKn7jKtI-MUCX5-CsJgKj2hcH0KLKbJ6GK5L-bt_sDtouJ-0qbJrB_JjSbfb1MRjv34j6XqK9yPD8h6vH3IrrKl5TtUJ48DnTDMRh-R-u-NJyKMniLCv9-pn4bpQrh459XP68bTkA5bjZKxtq3mkjbPbDfn02eCKuDjtBD5b0jGRabK6aKC5bL6rJabC3ox-9XU6q2bDeQNbpLRbI5C6q0xjztb--fxDxj4jPjp0vWtvJWbbvLT7johRTWqR48xFGMMonDh83b4RHelTJHCOOVp5O5hvvhb3O3MA-yUKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_Et5tttbCH_KvjbRj_jRTpqRQaq4tehHRqWxc9WDTm_DoTBInHhPQNqfckMxrybajqapRW-Nrq-pPKKR7jqPnv5j3rbtnBKp5i0UvH3mkjbpbGfn02OpDzMM8We-4syPRIKMRnWNrJKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCFaMIIxjT_-ePDyqx5Ka43tHD7yWCkaQMJcOR59K4nnDpKA5nQ-e4vaW2De0IQKLCt-elL63MOZXb0g5n7Tbb8eBgvZ2UQkyD5ssq0x0bOtynjDWl8L2bkO3COMahkM5h7xOKQoQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3YjjTyDG0etjDqJn32XPF85nRtDbogqROoK5PpWH39BtQmJJrmWUcxabrqKDOCeMF5b5KBM-7IXJTHQg-q3R7C-IQP8D5CKtoJ3-4qjUct0x-jLnbOVn0MW-5DMx84KtnJyUPubPnnBpjJ3H8HL4nv2JcJbM5m3x6qLTKkQN3T-PKO5bRh_CcJ-J8XMC8Cej5P; delPer=0; PSINO=3; ZD_ENTRY=empty; BA_HECTOR=24052k2h80800h01dr1g318sl0r; BAIDUID_BFESS=BF3B3DCC37724BFCA25A9760F2B5812F:FG=1; tb_as_data=fd871a4bd286787472774118298b4ed929c70c8b1ff8db42c8361e72b5f7fd1c0703d949893c8c6161806ddf21d1cda085a22b6c4eeb8630acd8e18e42f0ccecf4a880e771368a6d9825ea584c8df719db265b8254724711df889a774d5526666596fa911499df1c0fd8a083869d863a; Hm_lpvt_98b9d8c2fd6608d564bf2ac2ae642948=1613801053; st_data=c309361a031754d5ddc35101f7bfa41dcea3f17f15b42f07078ebb68748fdf10980f7ce188a4eccf95396e3202bae7c689905b4101a5decbb86f0aba52778c0c18650dfe9c829e2d5c6f3393b229d6906067637766740a1edc78c0a3e4a7eeebcdd68e59adc4b05e7247387f4276c8697b60314a62eae4dc9c2e09a2b961e18f69f92538e3933c07f8da442fad4b5a5e; st_sign=8e0f8296", "BIDUPSID=F1A79E0CD42FEB47C44C9E2A07236422; PSTM=1610345810; BAIDUID=F1A79E0CD42FEB47A529D6DD44B930CB:FG=1; BDUSS=2U0Q3NOampUdXNVQWo1dUk4VElZd0EwNkFubjQwTlNEb3JBRVJ5Y2UzV2g5enBnRVFBQUFBJCQAAAAAAAAAAAEAAAAbepKiu-G3ybXExdbX07rcx78AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKFqE2ChahNgV; BDUSS_BFESS=2U0Q3NOampUdXNVQWo1dUk4VElZd0EwNkFubjQwTlNEb3JBRVJ5Y2UzV2g5enBnRVFBQUFBJCQAAAAAAAAAAAEAAAAbepKiu-G3ybXExdbX07rcx78AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKFqE2ChahNgV; __yjs_duid=1_f8069424467d3d28982dc56bc4c0a8201613179173897; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=33425_33441_33272_33570_33584_26350_33266; delPer=0; PSINO=3; BCLID=10268246537482939884; BDSFRCVID=CdkOJexroG3VpUnea0piM3dmiqcmDscTDYLtOwXPsp3LGJLVN4vIEG0PtD6Z1_F-2ZlgogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tR3aQ5rtKRTffjrnhPF35hJbXP6-hnjy3b7pKfOF5RD2Jl7EDPOVMJtfXpoy2q3RymJJ2-39LPO2hpRjyxv4y4Ldj4oxJpOJbI8DK40MHl51fbbvbURvD--g3-AqBM5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIEoC0XtI0hMCvnh-nhbKC3bfT2K46JHD7yWCvE3pOcOR59K4nnDpKA5Nb-56OE0mDe0n6VWnb0DJF63MOZXMLn0p6aq60D55cNaxQkyD5ssq0x0bO1-4uv3M4L2bkO3COMahkM5h7xOKQoQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3YjjTLea_8JTLDfR32Wn7a5TrMeJrnbtTMq4tehHRL5639WDTm_D_K0Db1ODON-j30MfrybMc0BpkJ-mnH-pPKKR7SKtIzbq8a0bKfMMJptnvH3mkjbpnDfn02OpjPhTJOD-4syPRIKMRnWNrJKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCFabKKwD5_aejPShMntKI6J5Co20Rr2HJOoDDvChfjcy4LdjGKq3j37WRT-3CJxahC5eb6Yy-6F5lLq3-Aq54Rx2m38bxFb3q3fsJ3Kjb7GQfbQ0-7hqP-jW26aWK3otn7JOpvhbUnxyhD3QRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IqJb-qoDKbfbo5KRopMtOhq4tehHRmKPn9WDTOQJ7TtMQ2SJrN-j32MfrybMc0b4JL3T72-pbwBPbcfUnMKn05XM-pXbjwtn5A3mkjbPbgJfFa_pLzDl6SbP4syP4jKMRnWnciKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCcjqR8ZD6t5DT5P; BCLID_BFESS=10268246537482939884; BDSFRCVID_BFESS=CdkOJexroG3VpUnea0piM3dmiqcmDscTDYLtOwXPsp3LGJLVN4vIEG0PtD6Z1_F-2ZlgogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tR3aQ5rtKRTffjrnhPF35hJbXP6-hnjy3b7pKfOF5RD2Jl7EDPOVMJtfXpoy2q3RymJJ2-39LPO2hpRjyxv4y4Ldj4oxJpOJbI8DK40MHl51fbbvbURvD--g3-AqBM5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIEoC0XtI0hMCvnh-nhbKC3bfT2K46JHD7yWCvE3pOcOR59K4nnDpKA5Nb-56OE0mDe0n6VWnb0DJF63MOZXMLn0p6aq60D55cNaxQkyD5ssq0x0bO1-4uv3M4L2bkO3COMahkM5h7xOKQoQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3YjjTLea_8JTLDfR32Wn7a5TrMeJrnbtTMq4tehHRL5639WDTm_D_K0Db1ODON-j30MfrybMc0BpkJ-mnH-pPKKR7SKtIzbq8a0bKfMMJptnvH3mkjbpnDfn02OpjPhTJOD-4syPRIKMRnWNrJKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCFabKKwD5_aejPShMntKI6J5Co20Rr2HJOoDDvChfjcy4LdjGKq3j37WRT-3CJxahC5eb6Yy-6F5lLq3-Aq54Rx2m38bxFb3q3fsJ3Kjb7GQfbQ0-7hqP-jW26aWK3otn7JOpvhbUnxyhD3QRPH-Rv92DQMVU52QqcqEIQHQT3m5-5bbN3ut6IqJb-qoDKbfbo5KRopMtOhq4tehHRmKPn9WDTOQJ7TtMQ2SJrN-j32MfrybMc0b4JL3T72-pbwBPbcfUnMKn05XM-pXbjwtn5A3mkjbPbgJfFa_pLzDl6SbP4syP4jKMRnWnciKfA-b4ncjRcTehoM3xI8LNj405OTbIFO0KJzJCcjqR8ZD6t5DT5P; ab_sr=1.0.0_ZWYyZGM5MGFhZDQ1NTE0ODNjYjMwYzAyN2NhMGI1NDlkNjUwMDgyMzNkNzVlOWVjYTBkODZhMmU5ZDY4YTkwNzU0MjAyNTE1ZTQ5ZDY2Y2NkMGZlZWJjNzZlYzE3MmU2NjljMjQ3Y2ViMDRhOTdlNGI5OTBmMjBmZjU0NDI4Mzc=" ] Spd = StoredPd() def getHeader(): return { 'User-Agent': random.choice(USER_AGENTS), 'Cookie': random.choice(cookies) } def getRespon(url): req = requests.get(url, getHeader()) return req def getSelector(url):