def opinion_ratio(): """饼图数据 """ topic_name = request.args.get('query', default_topic_name) # 话题名 topk = request.args.get('topk', 10) end_ts = request.args.get('ts', None) during = request.args.get('during', None) subevent_status = request.args.get('subevent', 'global') if end_ts: end_ts = int(end_ts) if during: during = int(during) start_ts = end_ts - during topicid = em.getEventIDByName(topic_name) event = Event(topicid) if subevent_status != 'global': subeventid = subevent_status results = event.getMediaCount(start_ts, end_ts, subevent=subeventid) else: results = event.getMediaCount(start_ts, end_ts) from collections import Counter results = Counter(results) results = dict(results.most_common(topk)) total_weight = sum(results.values()) results = {k: float(v) / float(total_weight) for k, v in results.iteritems()} return json.dumps(results)
def opinion_weibos(): """重要信息排序 """ topic_name = request.args.get('query', default_topic_name) # 话题名 end_ts = request.args.get('ts', None) during = request.args.get('during', None) sort = request.args.get('sort', 'weight') limit = int(request.args.get('limit', 10)) skip = int(request.args.get('skip', 10)) subevent_status = request.args.get('subevent', 'global') if end_ts: end_ts = int(end_ts) if during: during = int(during) start_ts = end_ts - during topicid = em.getEventIDByName(topic_name) event = Event(topicid) results = dict() if subevent_status != 'global': subeventid = subevent_status results = event.getSortedInfos(start_ts, end_ts, key=sort, subeventid=subeventid, limit=limit, skip=skip) return json.dumps(results) else: results = event.getSortedInfos(start_ts, end_ts, key=sort, subeventid=None, limit=limit, skip=skip) return json.dumps(results)
def eventriver(): """event river数据 """ topic_name = request.args.get('query', default_topic_name) # 话题名 sort = request.args.get('sort', 'tfidf') # weight, addweight, created_at, tfidf end_ts = request.args.get('ts', None) during = request.args.get('during', None) if end_ts: end_ts = int(end_ts) if during: during = int(during) start_ts = end_ts - during topicid = em.getEventIDByName(topic_name) event = Event(topicid) subeventlist, dates, total_weight = event.getEventRiverData(start_ts, end_ts, sort=sort) return json.dumps({ "dates": dates, "name": topic_name, "type": "eventRiver", "weight": total_weight, "eventList": subeventlist })
def subevents(): """获取子事件信息 """ subevents = [] events = em.getEvents() for event in events: e = Event(event['_id']) subevents.extend(e.getSubEvents()) results_dict = dict() for s in subevents: feature = Feature(s['_id']) fwords = feature.get_newest() words = sorted(fwords.iteritems(), key=lambda (k, v): v, reverse=True)[:5] name = ','.join([k for k, v in words]) subevent = { '_id': s['_id'], 'eventid': str(s['eventid']), 'name': name } try: results_dict[str(s['eventid'])].append(subevent) except KeyError: results_dict[str(s['eventid'])] = [subevent] return json.dumps(results_dict)
def othertext(): topic_name = request.args.get('query', default_topic_name) # 话题名 topicid = em.getEventIDByName(topic_name) event = Event(topicid) results = event.getOtherSubEventInfos() return json.dumps(results)
def trend(): """返回话题趋势页面 """ topic_name = request.args.get('query', default_topic_name) # 话题名 mode = request.args.get('mode', 'day') topicid = em.getEventIDByName(topic_name) event = Event(topicid) start_ts = event.getStartts() default_startts = start_ts - 3600 * 24 * 30 last_modify = event.getLastmodify() status = event.getStatus() end_ts = event.getEndts() if end_ts: end_date = ts2date(end_ts) else: end_date = u'无' modify_success = event.getModifysuccess() time_range = request.args.get( 'time_range', ts2date(default_startts) + '-' + ts2date(last_modify + 24 * 3600)) return render_template('index/trend.html', mode=mode, topic=topic_name, time_range=time_range, status=status, \ start_date=ts2datetime(start_ts), end_date=end_date, last_modify=ts2datetime(last_modify), modify_success=modify_success)
def timeline(): topic_name = request.args.get('query', default_topic_name) # 话题名 timestamp = int(request.args.get('ts')) subevent_status = request.args.get('subevent', 'global') during = int(request.args.get('during', 3600 * 24)) topicid = em.getEventIDByName(topic_name) event = Event(topicid) results = dict() if subevent_status == 'global': count = event.getInfoCount(timestamp - during, timestamp) results["global"] = [timestamp, count] else: subeventid = subevent_status count = event.getInfoCount(timestamp - during, timestamp, subevent=subeventid) results[subeventid] = [timestamp, count] return json.dumps(results)
def eventriver(): """event river数据 """ topic_name = request.args.get('query', default_topic_name) # 话题名 sort = request.args.get('sort', 'tfidf') # weight, addweight, created_at, tfidf end_ts = request.args.get('ts', None) during = request.args.get('during', None) if end_ts: end_ts = int(end_ts) if during: during = int(during) start_ts = end_ts - during topicid = em.getEventIDByName(topic_name) event = Event(topicid) subeventlist, dates, total_weight = event.getEventRiverData(start_ts, end_ts, sort=sort) return json.dumps({"dates": dates, "name": topic_name, "type": "eventRiver", "weight": total_weight, "eventList": subeventlist})
def urlsearch(): """返回页面 """ topic_name = request.args.get('query', default_topic_name) # 话题名 topicid = em.getEventIDByName(topic_name) news_url = request.args.get('url', default_news_url) # news url news_url = 'http://news.sina.com.cn/c/2014-10-09/145630963839.shtml' event = Event(topicid) news_id = event.get_news_id_by_url(news_url) if not news_id: return json.dumps({"news_id":None}) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if not comments: return json.dumps({"news_id":None}) return json.dumps({"news_id":news_id})
def trenddata(): """获取每个话题按天走势 """ topic_name = request.args.get('query', default_topic_name) # 话题名 mode = request.args.get('mode', 'day') topicid = em.getEventIDByName(topic_name) event = Event(topicid) if mode == 'day': raw = event.getTrendData() else: raw = event.getHourData() dates = [] counts = [] for date, count in raw: dates.append(date) counts.append(count) return json.dumps({"dates": dates, "counts": counts})
def urlsearch(): """返回页面 """ topic_name = request.args.get('query', default_topic_name) # 话题名 topicid = em.getEventIDByName(topic_name) news_url = request.args.get('url', default_news_url) # news url news_url = 'http://news.sina.com.cn/c/2014-10-09/145630963839.shtml' event = Event(topicid) news_id = event.get_news_id_by_url(news_url) if not news_id: return json.dumps({"news_id": None}) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if not comments: return json.dumps({"news_id": None}) return json.dumps({"news_id": news_id})
def subevents(): """获取子事件信息 """ subevents = [] events = em.getEvents() for event in events: e = Event(event['_id']) subevents.extend(e.getSubEvents()) results_dict = dict() for s in subevents: feature = Feature(s['_id']) fwords = feature.get_newest() words = sorted(fwords.iteritems(), key=lambda(k, v): v, reverse=True)[:5] name = ','.join([k for k, v in words]) subevent = {'_id': s['_id'], 'eventid': str(s['eventid']), 'name': name} try: results_dict[str(s['eventid'])].append(subevent) except KeyError: results_dict[str(s['eventid'])] = [subevent] return json.dumps(results_dict)
def opinion_keywords(): """关键词云数据 """ topic_name = request.args.get('query', default_topic_name) # 话题名 end_ts = request.args.get('ts', None) during = request.args.get('during', None) subevent_status = request.args.get('subevent', 'global') topk_keywords = request.args.get('topk', 50) # topk keywords if subevent_status != 'global': subeventid = subevent_status feature = Feature(subeventid) counter = Counter() counter.update(feature.get_newest()) top_keywords_count = counter.most_common(topk_keywords) subevent_keywords = dict(top_keywords_count) return json.dumps(subevent_keywords) else: topicid = em.getEventIDByName(topic_name) event = Event(topicid) if end_ts: end_ts = int(end_ts) if during: during = int(during) counter = Counter() subevents = event.getSubEvents() for subevent in subevents: feature = Feature(subevent["_id"]) counter.update(feature.get_newest()) top_keywords_count = counter.most_common(topk_keywords) return json.dumps(dict(top_keywords_count))
def opinion_ratio(): """饼图数据 """ topic_name = request.args.get('query', default_topic_name) # 话题名 topk = request.args.get('topk', 10) end_ts = request.args.get('ts', None) during = request.args.get('during', None) subevent_status = request.args.get('subevent', 'global') if end_ts: end_ts = int(end_ts) if during: during = int(during) start_ts = end_ts - during topicid = em.getEventIDByName(topic_name) event = Event(topicid) if subevent_status != 'global': subeventid = subevent_status results = event.getMediaCount(start_ts, end_ts, subevent=subeventid) else: results = event.getMediaCount(start_ts, end_ts) from collections import Counter results = Counter(results) results = dict(results.most_common(topk)) total_weight = sum(results.values()) results = { k: float(v) / float(total_weight) for k, v in results.iteritems() } return json.dumps(results)
def index(): """返回页面 """ topic_name = request.args.get('query', default_topic_name) # 话题名 topicid = em.getEventIDByName(topic_name) event = Event(topicid) start_ts = event.getStartts() default_startts = start_ts - 3600 * 24 * 30 last_modify = event.getLastmodify() status = event.getStatus() end_ts = event.getEndts() if end_ts: end_date = ts2date(end_ts) else: end_date = u'无' modify_success = event.getModifysuccess() time_range = request.args.get('time_range', ts2date(default_startts) + '-' + ts2date(last_modify + 24 * 3600)) return render_template('index/semantic.html', topic=topic_name, time_range=time_range, status=status, \ start_date=ts2datetime(start_ts), end_date=end_date, last_modify=ts2datetime(last_modify), modify_success=modify_success)