def save_rt_results_es(topic, repost_list):

    #mappings_event_geo_city_repost()
    #index_name = index_event_geo_city_repost
    #index_type = type_event_geo_city_repost

    #mappings_event_analysis_results(topic)
    index_name = index_event_analysis_results
    index_type = type_event_analysis_results

    item = {}

    for location in repost_list:

        item['en_name'] = topic
        item['original'] = location['original']
        item['mid'] = location['mid']
        item['timestamp'] = location['ts']
        item['origin_location'] = location['origin_location']
        item['repost_location'] = location['repost_location']
        id = location['mid']
        try:
            item_exist = weibo_es.get(index=index_name,
                                      doc_type=index_type,
                                      id=id)['_source']
            weibo_es.update(index=index_name,
                            doc_type=index_type,
                            id=id,
                            body={'doc': item})
        except Exception, e:
            weibo_es.index(index=index_name,
                           doc_type=index_type,
                           id=id,
                           body=item)
Beispiel #2
0
def save_ws_results_es(topic, ts, during, n_limit, province, city, weibos):

    #mappings_event_geo_province_weibos()
    #index_name = index_event_geo_province_weibos
    #index_type = type_event_geo_province_weibos

    #mappings_event_analysis_results(topic)
    index_name = index_event_analysis_results
    index_type = type_event_analysis_results

    item = {}

    item['en_name'] = topic
    item['end_ts'] = ts
    item['range'] = during
    item['limit'] = n_limit
    item['province'] = province
    item['city'] = city
    item['weibo'] = json.dumps(weibos)

    id = topic + '_' + ts

    try:
        item_exist = weibo_es.get(index=index_name, doc_type=index_type,
                                  id=id)['_source']
        weibo_es.update(index=index_name,
                        doc_type=index_type,
                        id=id,
                        body={'doc': item})
    except Exception, e:
        weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
Beispiel #3
0
def es2gexf(indexname):
    try:
        #result = es.search(index = indexname, doc_type = 'text', body = {})
        res = es.get(index=indexname, doc_type='text', id=1)
    except:
        return []
    #print res
    return res['_source']
Beispiel #4
0
def es2gexf(indexname):
	try:
		#result = es.search(index = indexname, doc_type = 'text', body = {})
		res = es.get(index=indexname, doc_type='text', id=1)
	except:
		return []
	#print res
	return res['_source']
Beispiel #5
0
def exist(task_id):
    try:
        task_exist = weibo_es.get(index=topic_index_name,
                                  doc_type=topic_index_type,
                                  id=task_id)['_source']
    except:
        task_exist = {}
    if not task_exist:
        return False
    else:
        return True
Beispiel #6
0
def save_rt_results_es(calc,
                       topic,
                       results,
                       during,
                       klimit=TOP_KEYWORDS_LIMIT,
                       wlimit=TOP_WEIBOS_LIMIT):

    #mappings_event_analysis_results(topic)
    index_name = index_event_analysis_results
    index_type = type_event_analysis_results

    if calc == 'count':  #{时间段:{情绪1:值1,情绪2,值2}}{时间段:{情绪1:值1,情绪2,值2}}

        #mappings_event_sentiment_count()
        #index_name = index_event_sentiment_count
        #index_type = type_event_sentiment_count

        item = {}

        for time, sen_dict in results.iteritems():
            id = topic + '_' + time
            for sentiment, count in sen_dict.iteritems():
                item['en_name'] = topic
                item['end_ts'] = time
                item['range'] = during
                item['sentiment'] = sentiment
                item['count'] = count

                try:
                    item_exist = weibo_es.get(index=index_name,
                                              doc_type=index_type,
                                              id=id)['_source']
                    weibo_es.update(index=index_name,
                                    doc_type=index_type,
                                    id=id,
                                    body={'doc': item})
                except Exception, e:
                    # raise e
                    weibo_es.index(index=index_name,
                                   doc_type=index_type,
                                   id=id,
                                   body=item)
Beispiel #7
0
def save_results_es(topic, language_results):

    #mappings_event_analysis_results(topic)
    index_name = index_event_analysis_results
    index_type = type_event_analysis_results

    id = topic

    try:
        item_exist = weibo_es.get(index=index_name, doc_type=index_type,
                                  id=id)['_source']
        weibo_es.update(index=index_name,
                        doc_type=index_type,
                        id=id,
                        body={'doc': {
                            'language_results': language_results
                        }})
    except Exception, e:
        weibo_es.index(index=index_name,
                       doc_type=index_type,
                       id=id,
                       body={'language_results': language_results})
def save_first_nodes_es(topic,
                        date,
                        windowsize,
                        uid,
                        timestamp,
                        user_info,
                        weibo_info,
                        user_domain='other'):

    #mappings_event_network_first_user()
    #index_name = index_event_network_first_user
    #index_type = type_event_network_first_user

    #mappings_event_analysis_results(topic)
    index_name = index_event_analysis_results
    index_type = type_event_analysis_results

    item = {}

    item['en_name'] = topic
    item['date'] = date
    item['windowsize'] = windowsize
    item['uid'] = uid
    item['timestamp'] = timestamp
    item['user_info'] = json.dumps(user_info)
    item['weibo_info'] = json.dumps(weibo_info)
    item['user_domain'] = user_domain

    id = uid

    try:
        item_exist = weibo_es.get(index=index_name, doc_type=index_type,
                                  id=id)['_source']
        weibo_es.update(index=index_name,
                        doc_type=index_type,
                        id=id,
                        body={'doc': item})
    except:
        weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
Beispiel #9
0
def save_rt_results_es(topic, results, during, first_item):

    #mappings_event_geo_city_topic_count()
    #index_name = index_event_geo_city_topic_count
    #index_type = type_event_geo_city_topic_count

    mappings_event_analysis_results(topic)
    index_name = index_event_analysis_results
    index_type = type_event_analysis_results

    item = {}

    for mtype, time_geo in results.iteritems(
    ):  ##{'message_type':[timestamp,{['province':('provice':cishu),()],'city':[(city:cishu)}]}
        item['en_name'] = topic
        item['end_ts'] = time_geo[0]
        item['range'] = during
        item['mtype'] = mtype
        item['ccount'] = time_geo[1]
        item['first_item'] = first_item

        id = topic + '_' + ts

        try:
            item_exist = weibo_es.get(index=index_name,
                                      doc_type=index_type,
                                      id=id)['_source']
            weibo_es.update(index=index_name,
                            doc_type=index_type,
                            id=id,
                            body={'doc': item})
        except Exception, e:
            weibo_es.index(index=index_name,
                           doc_type=index_type,
                           id=id,
                           body=item)
        #index_type = type_event_time_kcount

        item = {}
        for time, mtype_dict in results.iteritems():
            id = topic + '_' + time
            for mtype, keyword_dict in mtype_dict.iteritems():
                item['en_name'] = topic
                item['end_ts'] = time
                item['range'] = during
                item['mtype'] = mtype
                item['limit'] = klimit
                item['kcount'] = json.dumps(keyword_dict)

                try:
                    item_exist = weibo_es.get(index=index_name,
                                              doc_type=index_type,
                                              id=id)['_source']
                    weibo_es.update(index=index_name,
                                    doc_type=index_type,
                                    id=id,
                                    body={'doc': item})
                except Exception, e:
                    weibo_es.index(index=index_name,
                                   doc_type=index_type,
                                   id=id,
                                   body=item)

    elif calc == 'weibo':
        #mappings_event_time_weibo()
        #index_name = index_event_time_weibo
        #index_type = type_event_time_weibo
def compute_topic_task():

    create_task()

    index_name = index_manage_event_analysis
    index_type = type_manage_event_analysis

    index_name_results = index_event_analysis_results
    index_type_results = type_event_analysis_results
    '''
    while  True:
        #print r.rpop(topic_queue_name)

        task_detail = r_event_analysis.rpop(task_event_analysis)

        #if not task_detail:
        #    break

        if  task_detail:

            break

        else:
    
            task_detail = json.loads(task_detail)
            topic = task_detail[0]
            en_name = task_detail[1]
            start_ts = task_detail[2]
            end_ts = task_detail[3]
            #keywords = task_detail[4]
            #event_value_finish = task['event_value_finish']
            #mappings_event_analysis_results(en_name)
          
            print 'start scan!!'
            while 1:
                es_result = weibo_es.get(index=index_name, doc_type=index_type, id=en_name)["_source"]
                if int(es_result["scan_text_finish"]) == 2:
                    break  #跳出该循环,接着往下执行。
                else:
                    time.sleep(60)   #等待扫描完成(int(es_result["scan_text_finish"]) == 2)
            
            t1=time.time()
    '''
    t1 = time.time()
    '''
    topic = '天津老太摆射击摊被判刑' #'毛泽东诞辰纪念日'
    en_name = 'tian_jin_lao_tai_she_ji_qiang_bei_pan_xing' #"mao_ze_dong_dan_chen_ji_nian_ri"
    start_ts = 1482768502 #1482681600
    end_ts = 1483455435 #1483113600
    must_keywords = ["射击","判刑"] #['毛泽东']
    should_keywords = ["天津","老太"] #['诞辰','纪念日']
    #submit_time = time.time()
    submit_user = '******'
    '''

    topic = '毛泽东诞辰纪念日'
    en_name = "mao_ze_dong_dan_chen_ji_nian_ri"
    start_ts = 1482681600
    end_ts = 1483113600
    must_keywords = ['毛泽东']
    should_keywords = ['诞辰', '纪念日']
    #submit_time = time.time()
    submit_user = '******'

    #start computes

    weibo_es.update(index=index_name,
                    doc_type=index_type,
                    id=en_name,
                    body={'doc': {
                        'event_value_finish': 1
                    }})

    #try:
    #weibo_counts,uid_counts=counts(start_ts,end_ts,topic,en_name,keywords)
    weibo_counts, uid_counts = counts_aggs(en_name, start_ts, end_ts)
    #weibo_es.index(index='topics',doc_type='text',id=en_name,body={'name':topic,'start_ts':start_ts,'end_ts':end_ts,'submit_ts':submit_ts,'comput_status':0,'en_name':en_name})
    #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-1,'weibo_counts':weibo_counts,'uid_counts':uid_counts}})
    print 'finish change status'

    item = {}
    item['topic'] = topic
    item['en_name'] = en_name
    item['start_time'] = start_ts
    item['stop_time'] = end_ts
    item['weibo_counts'] = weibo_counts
    item['uid_counts'] = uid_counts
    item['must_keywords'] = must_keywords
    item['should_keywords'] = should_keywords
    item['submit_user'] = submit_user
    #item['submit_time'] = submit_time

    weibo_es.index(index=index_name_results,
                   doc_type=index_type_results,
                   id=en_name,
                   body=item)

    #time
    time_results = propagateCronTopic(en_name, start_ts, end_ts)
    #{'during': ,'count':{},'kcount':{},'weibo':{}}
    time_results = json.dumps(time_results)

    #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-2}})
    print 'finish time analyze'

    #geo
    sort_ts_attr, repost_list = repost_search(en_name, start_ts, end_ts)
    #对每条微博得到转微博、mid、话题、时间、原地理位置、转发地理位置
    #repost_list数组中每一项: {original:xx, mid:xx, topic:xx, ts:xx, origin_location:xx, repost_location:xx}

    #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-}})
    print 'finish geo_1 analyze'
    geo_cityTopic_results = cityTopic(en_name, start_ts, end_ts)

    # {'geo_weibos':{},'geo_cityCount':{}}
    geo_results = {
        'sort_ts_attr': sort_ts_attr,
        'repost_list': repost_list,
        'geo_cityTopic_results': geo_cityTopic_results
    }
    geo_results = json.dumps(geo_results)
    id = en_name
    try:
        item_exist = weibo_es.get(index=index_name, doc_type=index_type,
                                  id=id)['_source']
        weibo_es.update(index=index_name_results,
                        doc_type=index_type_results,
                        id=id,
                        body={'doc': {
                            'geo_results': geo_results
                        }})
    except Exception, e:
        weibo_es.index(index=index_name_results,
                       doc_type=index_type_results,
                       id=id,
                       body={'geo_results': geo_results})
Beispiel #12
0
def compute_network(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小
        date = ts2datetime(end_ts)
        '''

    #改动的地方从es表中读取话题的拼音也就是表名
    network_results = {}

    if True:
        print end_ts, type(end_ts)
        #topicname = topic
        date = ts2datetime(end_ts)
        windowsize = (end_ts - start_ts) / Day  # 确定时间跨度的大小
        topic_pinyin_name = topic
        # print 'start topic_name_transfer'   #把汉字的时间名换成拼音 奥运会>aoyunhui
        # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts)
        # print topic_pinyin_name

        print 'start compute first_nodes'
        #start_date = ts2datetime(start_ts) # used to compute the first user
        first_node_results = get_first_node(topic_pinyin_name, start_ts,
                                            end_ts, windowsize, date)
        print 'end compute first_nodes'

        network_results['first_node_results'] = first_node_results

        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict = make_network(topic_pinyin_name, date,
                                                 windowsize, max_size,
                                                 attribute_add)
        #print g,gg,new_attribute_dict

        network_results['new_attribute_dict'] = new_attribute_dict

        print 'write gexf file'
        #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        real_topic_id = topic_pinyin_name
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
        print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf'
        #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+')
        #fh.close()
        #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+')
        #fh.close()
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        #这里要改一下 不用SSDB了
        #save_attribute_dict(new_attribute_dict, 'g')
        #save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, data_dict, sorted_uids = pagerank_rank(
            TOPK, date, windowsize, topic_pinyin_name)
        network_results['pagerank'] = {}
        network_results['pagerank']['all_uid_pr'] = all_uid_pr
        network_results['pagerank']['sorted_uids'] = sorted_uids
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        #topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_pinyin_name:  # 待删
            gexf = ''
        else:
            gexf= make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data_dict,sorted_uids,\
                new_attribute_dict)
            #gexf = json.dumps(gexf)
        print 'save gexf'
        #print '*************************'*10
        #print gexf
        #print '*************************'*10
        long_gexf = save_gexf_results(topic_pinyin_name, date, windowsize,
                                      gexf, gexf_type)

        network_results['long_gexf'] = long_gexf

        print 'start fu_tr'
        maker_results, pusher_results = get_interval_count(
            topic_pinyin_name, date, windowsize)
        print 'update_topic_end'
        #db_date = date
        #_update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date)
        network_results['maker_results'] = maker_results
        network_results['pusher_results'] = pusher_results

        index_name = index_event_analysis_results
        index_type = type_event_analysis_results

        network_results = json.dumps(network_results)

        id = topic

        try:
            tem_exist = weibo_es.get(index=index_name,
                                     doc_type=index_type,
                                     id=id)['_source']
            weibo_es.update(index=index_name,
                            doc_type=index_type,
                            id=id,
                            body={'doc': {
                                'network_results': network_results
                            }})
        except Exception, e:
            weibo_es.index(index=index_name,
                           doc_type=index_type,
                           id=id,
                           body={'network_results': network_results})

        print 'network_results save done!!'

        print 'all done!'