def getEventTimeSE(clusterDbName): try: client = getMongoClient() db = client[clusterDbName] clusterResultCol = db['clusterResult'] clusters = clusterResultCol.find() etime = '0000-00-00' stime = '9999-99-99' for cluster in clusters: mostPossibleTime = cluster['time_infer']['most_possible_time'][ 0:10] if mostPossibleTime < stime: stime = mostPossibleTime if mostPossibleTime > etime: etime = mostPossibleTime timeInfo = {'stime': stime, 'etime': etime} client.close() return { 'resCode': 1, 'resStr': '获取事件始末时间成功', 'resObject': timeInfo, 'resList': [] } except: return { 'resCode': 0, 'resStr': '获取事件始末时间失败', 'resObject': '', 'resList': [] }
def getEventLevelDataAction(clusterDbName): try: client = getMongoClient() db = client[clusterDbName] clusterResultCol = db['clusterResult'] clusters = clusterResultCol.find() levelInfo = dict() for cluster in clusters: level = cluster['summary']['level'] if level in levelInfo.keys(): levelInfo[level] += 1 else: levelInfo[level] = 1 # for key in levelInfo.keys(): # print(key + ':' + str(levelInfo[key])) client.close() return { 'resCode': 1, 'resStr': '获取事件等级统计数据成功', 'resObject': levelInfo, 'resList': [] } except: return { 'resCode': 0, 'resStr': '事件等级统计数据库查询失败', 'resObject': '', 'resList': [] }
def queryVisibleDataByPname(pname): try: client = getMongoClient() db = client['programs'] collection = db['visibleData'] documents = collection.find() for document in documents: if document['pname'] == pname: client.close() return { 'resCode': 1, 'resStr': '方案' + pname + '预存图表数据存在!', 'resObject': document['chartData'], 'resList': [] } # 没有预存数据,需要根据 数据库名查询生成图表数据 program = db['programs'] query = {'pname': pname} dbName = list(program.find(query))[0]['dbName'] client.close() return { 'resCode': 0, 'resStr': '方案' + pname + '预存图表数据不存在!', 'resObject': dbName, 'resList': [] } except: return { 'resCode': -1, 'resStr': '查询方案预存数据时出错!', 'resObject': '', 'resList': [] }
def getEventAnalysisConfigDataAction(): try: client = getMongoClient() db = client['programs'] config = db['eventAnalysis'] configObject = dict(list(config.find())[0]) resultObject = { 'wordCloud': configObject['wordCloud'], 'mapHot': configObject['mapHot'], 'discussionTrend': configObject['discussionTrend'], 'levelPie': configObject['levelPie'], 'popularTweeter': configObject['popularTweeter'], 'timeTrack': configObject['timeTrack'] } return { 'resCode': 1, 'resStr': '获取事件分析图表加载项数据成功', 'resObject': resultObject, 'resList': [] } except: return { 'resCode': 0, 'resStr': '事件分析图表加载项数据库查询失败', 'resObject': '', 'resList': [] }
def getTweetWithTimePots(timePots, clusterDbName, clusterTweetDict): try: client = getMongoClient() db = client[clusterDbName] clusterResultCol = db['clusterResult'] clusters = clusterResultCol.find() # 先获取各时间节点的全部tweet timeTweetsDict = dict() for cluster in clusters: mostPossibleTime = cluster['time_infer']['most_possible_time'][ 0:10] cluid = cluster['cluid'] # 先判断该事件实例的可能时间是否在时间节点中 if mostPossibleTime in timePots: # 取出和mostPossibleTime时间一致的推文 tweetList = clusterTweetDict[cluid] for tweet in tweetList: time = tweet['created_at'][0:10] if time == mostPossibleTime: if time in timeTweetsDict.keys(): timeTweetsDict[time].append(tweet) else: timeTweetsDict[time] = [tweet] # 针对timeTweetsDict 每个时间的tweetList,选出一条 timeTweetsData = list() timeKeys = list(timeTweetsDict.keys()) timeKeys.sort() for i in range(len(timeKeys)): time = timeKeys[i] bestTweet, tweetActionNums = getBestTweet(timeTweetsDict[time]) text = bestTweet['standard_text'] userName = bestTweet['user']['data_name'] userHeadSrc = 'none' hm = bestTweet['created_at'][11:16] timeTweetsData.append({ 'userName': userName, 'userHead': userHeadSrc, 'text': text, 'tweetNums': tweetActionNums, 'time': { 'ymd': time, 'hm': hm } }) client.close() return { 'resCode': 1, 'resStr': '获取指定时间的推文成功', 'resObject': '', 'resList': timeTweetsData } except: return { 'resCode': 0, 'resStr': '获取指定时间的推文失败', 'resObject': '', 'resList': [] }
def getPopularTweeterDataAction(clusterDbName, clusterTweetDict): try: # 先取热度最高的前 20 个事件实例 client = getMongoClient() db = client[clusterDbName] clusterResultCol = db['clusterResult'] clusters = clusterResultCol.find() highHotEvents = list() eventHotIndexDict = dict() index = 0 for cluster in clusters: hot = cluster['summary']['hot'] eventHotIndexDict[index] = hot index += 1 highHotEvents.append(cluster) # 热度值排序,取前20个 eventHotIndexOrdered = sorted(eventHotIndexDict.items(), key=lambda x: x[1], reverse=True)[0:20] # 从每个事件实例的tweets列表中取出最具代表性的一条推文及作者信息 resultList = list() for eventItem in eventHotIndexOrdered: event = highHotEvents[eventItem[0]] cluid = event['cluid'] tweetsList = clusterTweetDict[cluid] bestTweet, tweetActionNums = getBestTweet(tweetsList) # 获取推文内容以及作者数据 text = bestTweet['standard_text'] userName = bestTweet['user']['data_name'] userHeadSrc = 'none' time = bestTweet['created_at'] resultList.append({ 'userName': userName, 'userHead': userHeadSrc, 'time': time, 'text': text, 'tweetNums': tweetActionNums }) # for r in resultList: # print(r) client.close() return { 'resCode': 1, 'resStr': '获取热门tweeter数据成功', 'resObject': '', 'resList': resultList } except: return { 'resCode': 0, 'resStr': '热门tweeter数据库查询失败', 'resObject': '', 'resList': [] }
def addProgramAction(programForm): # 整理参数 pname = programForm['pname'] desc = programForm['desc'] dbName = programForm['dbName'] maxNum = programForm['maxNum'] stime = programForm['timeRange'][0] etime = programForm['timeRange'][1] keywords = programForm['keywords'] ignorewords = programForm['ignorewords'] topics = programForm['topics'] # 数据库操作 client = getMongoClient() programsDb = client['programs'] programsCol = programsDb['programs'] if programsCol.find_one({'pname': pname}) is None: # 查看记录是否已存在,若不存在则放入数据库 try: programData = { 'pname': pname, 'desc': desc, 'dbName': dbName, 'maxNum': maxNum, 'stime': stime, 'etime': etime, 'keywords': keywords, 'ignorewords': ignorewords, 'topics': topics, 'status': '无数据' } programsCol.insert_one(programData) client.close() return { 'resCode': 1, 'resStr': '添加新方案成功', 'resObject': '', 'resList': [] } except: client.close() return { 'resCode': 0, 'resStr': '插入记录时发生错误', 'resObject': '', 'resList': [] } else: client.close() return { 'resCode': -1, 'resStr': '记录已存在', 'resObject': '', 'resList': [] }
def getClusterTweetListDict(clusterDbName): client = getMongoClient() db = client[clusterDbName] clusterResultCol = db['clusterResult'] clusterTweetCol = db['clusterTweet'] clusters = clusterResultCol.find() clusterTweetDict = dict() for cluster in clusters: cluid = cluster['cluid'] query = {'cluid': cluid} tweetObjList = clusterTweetCol.find(query) clusterTweetDict[cluid] = list() for tweetObj in tweetObjList: clusterTweetDict[cluid].append(tweetObj['tweet']) return clusterTweetDict
def getAllProgramsAction(): try: client = getMongoClient() programsDb = client['programs'] programsCol = programsDb['programs'] resultList = getProgramTableList(list(programsCol.find())) client.close() return { 'resCode': 1, 'resStr': '查询数据库成功', 'resObject': '', 'resList': resultList } except: return { 'resCode': 0, 'resStr': '查询数据库失败', 'resObject': '', 'resList': [] }
def storeVisibleData(pname, chartData): try: client = getMongoClient() db = client['programs'] collection = db['visibleData'] data = {'pname': pname, 'chartData': chartData} collection.insert_one(data) return { 'resCode': 1, 'resStr': '预存方案' + pname + '图表数据成功!', 'resObject': '', 'resList': [] } except: return { 'resCode': 0, 'resStr': '预存方案' + pname + '图表数据失败!', 'resObject': '', 'resList': [] }
def changeEventAnalysisAction(data): try: client = getMongoClient() db = client['programs'] config = db['eventAnalysis'] query = {} newValues = {"$set": data} config.update_one(query, newValues) return { 'resCode': 1, 'resStr': '修改事件分析图表加载项数据成功', 'resObject': '', 'resList': [] } except: return { 'resCode': 0, 'resStr': '修改事件分析图表加载项数据失败', 'resObject': '', 'resList': [] }
def deleteProgramAction(pname): client = getMongoClient() programsDb = client['programs'] programsCol = programsDb['programs'] deleteQuery = {'pname': pname} try: programsCol.delete_one(deleteQuery) client.close() return { 'resCode': 1, 'resStr': '删除方案:' + pname + '成功!', 'resObject': '', 'resList': [] } except: client.close() return { 'resCode': 0, 'resStr': '删除方案出错!', 'resObject': '', 'resList': [] }
def startProgramAction(pname, num): try: client = getMongoClient() programsDb = client['programs'] programsCol = programsDb['programs'] query = {'pname': pname} newStatus = {"$set": {"status": num}} programsCol.update_one(query, newStatus) client.close() return { 'resCode': 1, 'resStr': '启动方案成功!', 'resObject': '', 'resList': [] } except: return { 'resCode': 0, 'resStr': '启动方案出错!', 'resObject': '', 'resList': [] }
def getMapHotDataAction(clusterDbName): try: client = getMongoClient() db = client[clusterDbName] clusterResultCol = db['clusterResult'] clusters = clusterResultCol.find() addressInfo = dict() for cluster in clusters: hot = cluster['summary']['hot'] geo_infer = cluster['geo_infer'] for geoItem in geo_infer: address = geoItem['address'] lat = geoItem['lat'] lon = geoItem['lon'] freq = geoItem['freq'] country = geoItem['country'] # print(address + ';' + country + ';' + str(centerLat) + ';' + str(centerLng) + ';' + str(hot)) if address: if address in addressInfo.keys(): addressInfo[address]['hotSum'] += hot * freq addressInfo[address]['hotCount'] += 1 else: addressInfo[address] = { 'hotSum': hot, 'hotCount': 1, 'geo': { 'lat': lat, 'lng': lon }, 'country': country } resultInfo = {'addressInfo': {}, 'countryInfo': []} countryInfoDic = dict() # 计算各个address的热点平均值 for key in addressInfo.keys(): hotAve = addressInfo[key]['hotSum'] / addressInfo[key]['hotCount'] geo = addressInfo[key]['geo'] country = addressInfo[key]['country'] resultInfo['addressInfo'][key] = {'hot': hotAve, 'geo': geo} if country: if country in countryInfoDic.keys(): countryInfoDic[country] += hotAve else: countryInfoDic[country] = hotAve # 国家热度总值最多保留10个 countryInfoList = sorted(countryInfoDic.items(), key=lambda x: x[1], reverse=True) if len(countryInfoList) > 10: resultInfo['countryInfo'] = countryInfoList[0:10] else: resultInfo['countryInfo'] = countryInfoList client.close() return { 'resCode': 1, 'resStr': '获取热点地图数据成功', 'resObject': resultInfo, 'resList': [] } except: return { 'resCode': 0, 'resStr': '热点地图数据库查询失败', 'resObject': '', 'resList': [] }
clusterTweetCol = db['clusterTweet'] clusters = clusterResultCol.find() clusterTweetDict = dict() for cluster in clusters: cluid = cluster['cluid'] query = {'cluid': cluid} tweetObjList = clusterTweetCol.find(query) clusterTweetDict[cluid] = list() for tweetObj in tweetObjList: clusterTweetDict[cluid].append(tweetObj['tweet']) return clusterTweetDict if __name__ == '__main__': # getTweetWithTimePots(['2016-02-26'], 'cluster_natural_disaster') client = getMongoClient() db = client['cluster_2019HKProtest'] clusterTweetCol = db['clusterTweet'] tweetObjList = clusterTweetCol.find() tweetTimeDict = dict() for tweetObj in tweetObjList: tweet = tweetObj['tweet'] time = tweet['created_at'][0:10] if time in tweetTimeDict.keys(): tweetTimeDict[time] += 1 else: tweetTimeDict[time] = 1 sortedList = sorted(tweetTimeDict.items(), key=lambda x: x[1], reverse=True) for item in sortedList:
def getWordCloudDataAction(clusterDbName): try: client = getMongoClient() db = client[clusterDbName] clusterResultCol = db['clusterResult'] clusters = clusterResultCol.find() keyWordsDic = dict() geoWordsDic = dict() for cluster in clusters: keywords = cluster['summary']['keywords'] geowords = cluster['summary']['geowords'] for word in keywords: if word in keyWordsDic.keys(): keyWordsDic[word] += 1 else: keyWordsDic[word] = 1 for wordItem in geowords: word = wordItem[0] freq = wordItem[1] if word in geoWordsDic.keys(): geoWordsDic[word] += freq else: geoWordsDic[word] = freq # cNameList = db.collection_names() # keyWordsDic = dict() # for cName in cNameList: # # 取每个事件簇 # collection = db[cName] # # 取全部子事件 # subEvents = collection.find() # for subEvent in subEvents: # keyWords = subEvent['summary']['keywords'] # for word in keyWords: # if word in keyWordsDic.keys(): # keyWordsDic[word] += 1 # else: # keyWordsDic[word] = 1 except: return { 'resCode': 0, 'resStr': '词云数据库查询失败', 'resObject': '', 'resList': [] } # 按频次排序 sortedKeyWords = sorted(keyWordsDic.items(), key=lambda x: x[1], reverse=True) sortedGeoWords = sorted(geoWordsDic.items(), key=lambda x: x[1], reverse=True) # for item in sortedKeyWords: # print(item[0] + ':' + str(item[1])) # 显示结果保留最多40个 30个keywords 10个geowords result = list() if len(sortedKeyWords) > 30: tempList = sortedKeyWords[0:30] else: tempList = sortedKeyWords if len(sortedGeoWords) > 10: tempList += sortedGeoWords[0:10] else: tempList += sortedGeoWords for item in tempList: result.append({'name': item[0], 'value': item[1]}) client.close() return { 'resCode': 1, 'resStr': '获取词云数据成功', 'resObject': '', 'resList': result }
def getEventSummaryProgramDataAction(pname, clusterTweetDict): try: client = getMongoClient() db = client['programs'] col = db['programs'] query = {'pname': pname} resProgram = list(col.find(query)) resLen = len(resProgram) if resLen > 1: client.close() return { 'resCode': 2, 'resStr': '监控方案名' + pname + '数据库冗余', 'resObject': '', 'resList': [] } elif resLen == 0: client.close() return { 'resCode': 2, 'resStr': '监控方案' + pname + '不存在', 'resObject': '', 'resList': [] } else: result = '根据您设置的\"' + pname + '\"事件的检测条件,' resProgram = resProgram[0] # 组织关键词 keywords = resProgram['keywords'] keywordsList = list() for group in keywords: groupList = getProgramWordsListFromExp(group['exp']) for word in groupList: if word not in keywordsList: keywordsList.append(word) if len(keywordsList) >= 5: break if len(keywordsList) >= 5: break result += '我们围绕着' + getEventSummaryWordsStr(keywordsList) + '等关键词,' # 组织过滤词 ignorewords = resProgram['ignorewords'] if ignorewords: ignorewordsList = list() for word in getProgramWordsListFromExp(ignorewords): if word not in ignorewordsList: ignorewordsList.append(word) if len(ignorewordsList) >= 5: break result += getEventSummaryWordsStr(ignorewordsList) + '等过滤词,' # 组织话题 topics = resProgram['topics'] if topics: topicsList = list() for word in getProgramWordsListFromExp(topics): if word not in topicsList: topicsList.append(word) if len(topicsList) >= 5: break result += getEventSummaryWordsStr(topicsList) + '等话题,' # 组织其他条件 maxNum = resProgram['maxNum'] result += '以每次查询最多' + str(maxNum) + '条推文的方式,' stime = resProgram['stime'] etime = resProgram['etime'] result += '采集了从' + stime + '到' + etime + '的推文数据,' allTweetNum = getAllTweetListLength(clusterTweetDict) result += '对其中与事件相关的' + str(allTweetNum) + '条推文进行了统计分析。' # print(result) client.close() return { 'resCode': 1, 'resStr': '获取事件分析结果概述-方案描述数据成功', 'resObject': result, 'resList': [] } except: return { 'resCode': 0, 'resStr': '事件分析结果概述-方案描述数据库查询失败', 'resObject': '', 'resList': [] }