def followers_domain_update():

    if S_TYPE == 'test':
        current_time = datetime2ts(S_DATE)

    else:
        current_time = int(time.time())

    flow_text_index_name_list = get_flow_text_index_list(current_time)

    query_body = {'query': {'match_all': {}}, 'size': MAX_VALUE}

    search_results = es_xnr.search(index=weibo_xnr_fans_followers_index_name,\
     doc_type=weibo_xnr_fans_followers_index_type,body=query_body)['hits']['hits']
    followers_list_all = []
    for result in search_results:
        result = result['_source']
        followers_list = result['followers_list']
        followers_list_all.extend(followers_list)

    followers_list_all_set_list = list(set(followers_list_all))

    uid_weibo_keywords_dict, keywords_dict_all_users = uid_list_2_uid_keywords_dict(
        followers_list_all_set_list, flow_text_index_name_list)
    uids_avtive_list = uid_weibo_keywords_dict.keys(
    )  # 防止关注列表中有无效uid,或者只有近期活跃的uid才有意义。

    ## 领域分类
    r_domain = dict()
    print 'uids_avtive_list::', uids_avtive_list

    domain, r_domain = domain_classfiy(uids_avtive_list,
                                       uid_weibo_keywords_dict)
    print 'r_domain::', r_domain

    for uid, domain in r_domain.iteritems():
        domain_name = domain_en2ch_dict[domain]
        _id = uid
        try:
            print '_id:::', _id
            get_result = es_xnr.get(index=user_domain_index_name,doc_type=user_domain_index_type,\
             id=_id)['_source']

            get_result['domain_name'] = domain_name
            get_result['update_time'] = int(time.time())
            es_xnr.update(index=user_domain_index_name,doc_type=user_domain_index_type,\
             id=_id,body={'doc':get_result})

        except:
            item_dict = {}
            item_dict['uid'] = uid
            item_dict['domain_name'] = domain_name
            item_dict['update_time'] = int(time.time())

            es_xnr.index(index=user_domain_index_name,doc_type=user_domain_index_type,\
             id=_id,body=item_dict)
예제 #2
0
def get_day_status(xnr_user_no, datetime):
    task_id = xnr_user_no + '_' + ts2datetime(datetime)
    try:
        result = es_xnr.get(index=weibo_community_status_index_name,
                            doc_type=weibo_community_status_index_type,
                            id=task_id)['_source']
        status = result['status']
    except:
        status = -1
    return status
예제 #3
0
파일: utils.py 프로젝트: feifanhanmc/xnr2
def xnr_user_no2uid(xnr_user_no):
    try:
        result = es_xnr.get(index=weibo_xnr_index_name,
                            doc_type=weibo_xnr_index_type,
                            id=xnr_user_no)['_source']
        uid = result['uid']
    except:
        uid = ''

    return uid
예제 #4
0
def get_sensitive_info(timestamp,mid=None,text=None):
    sensitive_info = 0
    index_name = flow_text_index_name_pre + ts2datetime(timestamp)
    if mid:
        try:    #有记录就取
            item_result = es_xnr.get(index=index_name,doc_type=flow_text_index_type,id=mid)['_source']
            sensitive_info = item_result['sensitive']
            return sensitive_info
        except Exception,e: #没记录,就现算
            if text:
                sensitive_info = compute_sensitive(text)
예제 #5
0
def get_sensitive_info(timestamp, mid):
    index_name = facebook_flow_text_index_name_pre + ts2datetime(timestamp)
    try:
        item_result = es_xnr.get(index=index_name,
                                 doc_type=facebook_flow_text_index_type,
                                 id=mid)['_source']
        sensitive_info = item_result['sensitive']
    except:
        sensitive_info = 0

    return sensitive_info
예제 #6
0
def influence_trust(uid):

	try:
		es_search = es.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,\
						id=uid)['_source']
		
		influence_trust = es_search['is_verified']

	except:
		influence_trust = 0

	return influence_trust
예제 #7
0
def get_hot_subopinion(xnr_user_no, task_id):

    task_id_new = xnr_user_no + '_' + task_id
    es_task = []
    try:
        es_task = es.get(index=tw_hot_keyword_task_index_name,doc_type=tw_hot_keyword_task_index_type,\
                    id=task_id_new)['_source']
    except:
        return '尚未提交计算'

    if es_task:
        if es_task['compute_status'] != 2:
            return '正在计算'
        else:
            es_result = es.get(index=tw_hot_subopinion_results_index_name,doc_type=tw_hot_subopinion_results_index_type,\
                                id=task_id_new)['_source']

            if es_result:
                contents = json.loads(es_result['subopinion_tw'])

                return contents
예제 #8
0
def change_process_proportion(task_id, proportion):
    mark = False
    try:
        task_exist_result = es_xnr.get(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id)['_source']
    except:
        task_exist_result = {}
        return 'task is not exist'
    if task_exist_result != {}:
        task_exist_result['compute_status'] = proportion
        es_xnr.update(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id, body={'doc':task_exist_result})
        mark = True

    return mark
예제 #9
0
def judge_sensing_sensor(xnr_user_no,uid):
    
    exist_item = es_xnr.exists(index=index_sensing,doc_type=type_sensing,id=xnr_user_no)

    if not exist_item:
        return False 
    else:
        get_result = es_xnr.get(index=index_sensing,doc_type=type_sensing,id=xnr_user_no)['_source']
        
        social_sensors = get_result['social_sensors']
    
        if uid in social_sensors:
            return True
        else:
            return False
예제 #10
0
def get_recommend_at_user(xnr_user_no):
    #_id  = user_no2_id(user_no)
    es_result = es.get(index=tw_xnr_index_name,
                       doc_type=tw_xnr_index_type,
                       id=xnr_user_no)['_source']
    #print 'es_result:::',es_result
    if es_result:
        uid = es_result['uid']
        daily_interests = es_result['daily_interests']
    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_TW)
    else:
        now_ts = int(time.time())
    datetime = ts2datetime(now_ts - 24 * 3600)

    index_name = twitter_flow_text_index_name_pre + datetime
    nest_query_list = []
    daily_interests_list = daily_interests.split('&')

    es_results_daily = es.search(index=index_name,doc_type=twitter_flow_text_index_type,\
                        body={'query':{'match_all':{}},'size':200,\
                        'sort':{'timestamp':{'order':'desc'}}})['hits']['hits']

    uid_list = []
    if es_results_daily:
        for result in es_results_daily:
            result = result['_source']
            uid_list.append(result['uid'])

    ## 根据uid,从weibo_user中得到 nick_name
    uid_nick_name_dict = dict()  # uid不会变,而nick_name可能会变
    es_results_user = es.mget(index=twitter_user_index_name,
                              doc_type=twitter_user_index_type,
                              body={'ids': uid_list})['docs']
    i = 0
    for result in es_results_user:

        if result['found'] == True:
            result = result['_source']
            uid = result['uid']
            nick_name = result['name']
            if nick_name:
                i += 1
                uid_nick_name_dict[uid] = nick_name
        if i >= DAILY_AT_RECOMMEND_USER_TOP:
            break

    return uid_nick_name_dict
예제 #11
0
def get_tw_xnr_fans_followers():

    query_body_tw = {
        'query': {
            'term': {
                'create_status': 2
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    tw_xnrs = es_xnr.search(index=tw_xnr_index_name, doc_type=tw_xnr_index_type,\
     body=query_body_tw)['hits']['hits']

    for tw_xnr in tw_xnrs:

        root_uid = tw_xnr['_source']['uid']
        xnr_user_no = tw_xnr['_source']['xnr_user_no']

        query_body = {
            'query': {
                'term': {
                    'root_uid': root_uid
                }
            },
            'size': MAX_SEARCH_SIZE
        }

        tw_results = es_xnr.search(index=twitter_feedback_follow_index_name,doc_type=twitter_feedback_follow_index_type,\
           body=query_body)['hits']['hits']

        friends_list = []

        for tw_result in tw_results:
            uid = tw_result['_source']['uid']
            friends_list.append(uid)

        try:
            get_results = es_xnr.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\
             id=xnr_user_no)['_source']


            es_xnr.update(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\
             id=xnr_user_no,body={'doc':{'followers_list':friends_list}})

        except:
            es_xnr.index(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\
             id=xnr_user_no,body={'followers_list':friends_list})
예제 #12
0
def load_tw_app_api_info(xnr_user_no):
    results = es_xnr.get(index=tw_xnr_index_name,
                         doc_type=tw_xnr_index_type,
                         id=xnr_user_no)
    try:
        res = results['_source']
        info = {
            'access_secret': res['access_secret'],
            'access_token': res['access_token'],
            'consumer_key': res['consumer_key'],
            'consumer_secret': res['consumer_secret'],
        }
        return info
    except Exception, e:
        print e
        return False
예제 #13
0
def influence_trust(uid):

    try:
        es_search = es.get(index=facebook_user_index_name,doc_type=facebook_user_index_type,\
                        id=uid)['_source']

        if 'category' in es_search:
            influence_trust = 1

        else:
            influence_trust = 0

    except:
        influence_trust = 0

    return influence_trust
예제 #14
0
def fb_save_to_fans_follow_ES(xnr_user_no,uid,follow_type,trace_type):

 
    results = es_xnr.get(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\
            id=xnr_user_no)

    results = results["_source"]
    if follow_type == 'follow':
        if trace_type == 'trace_follow':
            # 添加追随关注
            try:
                trace_follow_uids = results['trace_follow_pre_list']
                trace_follow_uids_set = set(trace_follow_uids)
                trace_follow_uids_set.add(uid)
                trace_follow_uids = list(trace_follow_uids_set)
            except:
                trace_follow_uids = [uid]

            # # 添加普通关注
            # try:
            #     followers_uids = results['followers_list']
            #     followers_uids_set = set(followers_uids)
            #     followers_uids_set.add(uid)
            #     followers_uids = list(followers_uids_set)
            # except:
            #     followers_uids = [uid]
            
            # results['followers_list'] = followers_uids
            results['trace_follow_list'] = trace_follow_uids
            es_xnr.update(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\
                        id=xnr_user_no,body={'doc':results})


    elif follow_type == 'unfollow':

        try:
            followers_uids = results['trace_follow_pre_list']
            followers_uids = list(set(followers_uids).difference(set([uid])))
            results['trace_follow_pre_list'] = followers_uids

            es_xnr.update(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\
                        id=xnr_user_no,body={'doc':results})
        except:
            return False

    return True
예제 #15
0
def save_user_warning(xnr_user_no,start_time,end_time):

    #判断数据库是否存在:
    today_date=ts2datetime(end_time)
    today_datetime = datetime2ts(today_date)
    weibo_user_warning_index_name=weibo_user_warning_index_name_pre+today_date
    if not es_xnr.indices.exists(index=weibo_user_warning_index_name):
        weibo_user_warning_mappings(weibo_user_warning_index_name)

    
    new_user_warning = create_personal_warning(xnr_user_no,start_time,end_time)

    today_history_user_warning,old_uid_list = lookup_history_user_warming(xnr_user_no,today_datetime,end_time)

    results = []
    if new_user_warning:
        for item in new_user_warning:
            id_mark = set_intersection(item['uid'],old_uid_list)
            if id_mark == 1:
                #组合,更新数据库
                task_id = xnr_user_no+'_'+item['uid']
                old_user = es_xnr.get(index=weibo_user_warning_index_name,doc_type=weibo_user_warning_index_type,id=task_id)['_source']
                old_user['content'] = json.loads(old_user['content'])
                old_user['content'].extend(item['content'])
                old_user['user_sensitive'] = old_user['user_sensitive'] + item['user_sensitive']
                #old_user['user_influence'] = old_user['user_influence'] + item['user_influence']
                try:
                    es_xnr.index(index=weibo_user_warning_index_name,doc_type=weibo_user_warning_index_type,body=old_user,id=task_id)
                    mark=True
                except:
                    mark=False

            else:
                #直接存储
                task_id=xnr_user_no+'_'+item['uid']
                try:
                    es_xnr.index(index=weibo_user_warning_index_name,doc_type=weibo_user_warning_index_type,body=item,id=task_id)
                    mark=True
                except:
                    mark=False

            results.append(mark)
    else:
        pass
    print 'person_mark::',results
    return results
예제 #16
0
def get_xnr_sensitive(xnr_user_no):
    xnr_sensitive_word = []
    try:
        xnr_result = es_xnr.get(index=weibo_xnr_index_name,
                                doc_type=weibo_xnr_index_type,
                                id=xnr_user_no)['_source']
        submitter = xnr_result['submitter']
    except:
        submitter = ''
    sxnr_type = 'my_xnrs'
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'create_type': sxnr_type
                            }
                        }, {
                            'term': {
                                'submitter': submitter
                            }
                        }]
                    }
                }
            }
        },
        'sort': {
            'create_time': {
                'order': 'asc'
            }
        }
    }
    if submitter:
        try:
            sensitive_result = es_xnr.search(
                index=weibo_sensitive_words_index_name,
                doc_type=weibo_sensitive_words_index_type,
                body=query_body)['hits']['hits']
            for item in sensitive_result:
                xnr_sensitive_word.append(item['_source']['sensitive_words'])
        except:
            print 'except!!!-sensitive_words'
    return xnr_sensitive_word
예제 #17
0
def save_group_description_results(group_results, decect_task_information):
    mark = False
    task_id = decect_task_information['domain_pinyin']

    try:
        item_exist = es.get(index=weibo_domain_index_name,
                            doc_type=weibo_domain_index_type,
                            id=task_id)['_source']
        item_exist['role_distribute'] = json.dumps(
            group_results['role_distribute'])
        item_exist['top_keywords'] = json.dumps(group_results['top_keywords'])
        item_exist['political_side'] = json.dumps(
            group_results['political_side'])
        item_exist['topic_preference'] = json.dumps(
            group_results['topic_preference'])
        item_exist['compute_status'] = 2  # 存入群体描述
        es.update(index=weibo_domain_index_name,
                  doc_type=weibo_domain_index_type,
                  id=task_id,
                  body={'doc': item_exist})
    except Exception, e:
        item_exist = dict()
        item_exist['domain_pinyin'] = json.dumps(
            decect_task_information['domain_pinyin'])
        item_exist['domain_name'] = json.dumps(
            decect_task_information['domain_name'])
        item_exist['create_type'] = json.dumps(
            decect_task_information['create_type'])
        item_exist['create_time'] = json.dumps(
            decect_task_information['create_time'])
        item_exist['submitter'] = json.dumps(
            decect_task_information['submitter'])
        item_exist['remark'] = json.dumps(decect_task_information['remark'])
        item_exist['role_distribute'] = json.dumps(
            group_results['role_distribute'])
        item_exist['top_keywords'] = json.dumps(group_results['top_keywords'])
        item_exist['political_side'] = json.dumps(
            group_results['political_side'])
        item_exist['topic_preference'] = json.dumps(
            group_results['topic_preference'])
        item_exist['compute_status'] = 2  # 存入群体描述
        es.index(index=weibo_domain_index_name,
                 doc_type=weibo_domain_index_type,
                 id=task_id,
                 body=item_exist)
예제 #18
0
def judge_trace_follow(xnr_user_no, uid):
    exist_item = es_xnr.exists(index=weibo_xnr_fans_followers_index_name,
                               doc_type=weibo_xnr_fans_followers_index_type,
                               id=xnr_user_no)
    if not exist_item:
        return False
    else:
        get_result = es_xnr.get(index=weibo_xnr_fans_followers_index_name,
                                doc_type=weibo_xnr_fans_followers_index_type,
                                id=xnr_user_no)['_source']
        try:
            trace_follow_list = get_result['trace_follow_list']
        except:
            trace_follow_list = []
        if uid in trace_follow_list:
            return True
        else:
            return False
예제 #19
0
def delete_xnr_followers(xnr_user_no, follower_uid):
    xnr_es_result = es_xnr.get(index=weibo_xnr_fans_followers_index_name,
                               doc_type=weibo_xnr_fans_followers_index_type,
                               id=xnr_user_no)['_source']
    user_no = int(xnr_user_no[-4:])
    uid = xnr_es_result['uid']
    fans_list = xnr_es_result['fans_list']

    origin_followers_list = xnr_es_result['followers_list']
    origin_followers_list.remove(follower_uid)
    followers_list = origin_followers_list

    try:
        mark=es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no,\
        body={"doc":{'user_no':user_no,'uid':uid,'fans_list':fans_list,'followers_list':followers_list}})
        mark = True
    except:
        mark = False
    return mark
예제 #20
0
def save_role_feature_analysis(role_results,role_label,domain,role_id,task_id):
    mark = False

    try:
        item_exist = es_xnr.get(index=fb_role_index_name,doc_type=fb_role_index_type,id=role_id)['_source']
        item_exist['role_pinyin'] = role_id
        item_exist['role_name'] = role_label
        item_exist['domains'] = domain
        item_exist['personality'] = json.dumps(role_results['personality'])
        item_exist['political_side'] = json.dumps(role_results['political_side'])
        item_exist['geo'] = json.dumps(role_results['geo'])
        item_exist['active_time'] = json.dumps(list(role_results['active_time']))
        item_exist['day_post_num'] = json.dumps(list(role_results['day_post_num']))  
        item_exist['psy_feature'] = json.dumps(role_results['psy_feature'])
        item_exist['member_uids'] = json.dumps(role_results['member_uids'])

    
        es_xnr.update(index=fb_role_index_name,doc_type=fb_role_index_type,id=role_id,body={'doc':item_exist})
        
        item_domain = dict()
        item_domain['compute_status'] = 3  # 存入角色分析结果
        es_xnr.update(index=fb_domain_index_name,doc_type=fb_domain_index_type,id=task_id,body={'doc':item_domain})
    
    except Exception, e:
        item_exist = dict()
        item_exist['role_pinyin'] = role_id
        item_exist['role_name'] = role_label
        item_exist['domains'] = domain
        item_exist['personality'] = json.dumps(role_results['personality'])
        item_exist['political_side'] = json.dumps(role_results['political_side'])
        item_exist['geo'] = json.dumps(role_results['geo'])
        item_exist['active_time'] = json.dumps(list(role_results['active_time']))
        item_exist['day_post_num'] = json.dumps(list(role_results['day_post_num']))
        item_exist['psy_feature'] = json.dumps(role_results['psy_feature'])
        item_exist['member_uids'] = json.dumps(role_results['member_uids'])
       
        es_xnr.index(index=fb_role_index_name,doc_type=fb_role_index_type,id=role_id,body=item_exist)
        
        item_domain = dict()
        item_domain['compute_status'] = 3  # 存入角色分析结果
        es_xnr.update(index=fb_domain_index_name,doc_type=fb_domain_index_type,id=task_id,body={'doc':item_domain})
예제 #21
0
def save_detect_results(detect_results, decect_task_information):
    mark = False
    task_id = decect_task_information['domain_pinyin']

    try:
        item_exist = es_xnr.get(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id)['_source']
        item_exist['group_size'] = len(detect_results)
        item_exist['member_uids'] = detect_results
        item_exist['compute_status'] = 1  # 存入uid
        es_xnr.update(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id,body={'doc':item_exist})
    except Exception, e:
        item_exist = dict()
        item_exist['domain_pinyin'] = json.dumps(decect_task_information['domain_pinyin'])
        item_exist['domain_name'] = json.dumps(decect_task_information['domain_name'])
        item_exist['create_type'] = json.dumps(decect_task_information['create_type'])
        item_exist['create_time'] = decect_task_information['create_time']
        item_exist['submitter'] = json.dumps(decect_task_information['submitter'])
        item_exist['remark'] = json.dumps(decect_task_information['remark'])
        item_exist['group_size'] = len(detect_results)
        item_exist['member_uids'] = detect_results
        item_exist['compute_status'] = 1  # 存入uid
        es_xnr.index(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id,body=item_exist)
def get_bussiness_recomment_tweets(xnr_user_no,sort_item):
    
    get_results = es.get(index=weibo_xnr_index_name,doc_type=weibo_xnr_index_type,id=xnr_user_no)['_source']
    
    monitor_keywords = get_results['monitor_keywords']
    monitor_keywords_list = monitor_keywords.split(',')
    
    if sort_item == 'timestamp':
        sort_item_new = 'timestamp'
        es_results = get_tweets_from_flow(monitor_keywords_list,sort_item_new)
    elif sort_item == 'sensitive_info':
        sort_item_new = 'sensitive'
        es_results = get_tweets_from_flow(monitor_keywords_list,sort_item_new)
    elif sort_item == 'sensitive_user':
        sort_item_new = 'sensitive'
        es_results = get_tweets_from_user_portrait(monitor_keywords_list,sort_item_new)  
    elif sort_item == 'influence_info':
        sort_item_new = 'retweeted'
        es_results = get_tweets_from_flow(monitor_keywords_list,sort_item_new)
    elif sort_item == 'influence_user':
        sort_item_new = 'user_index'
        es_results = get_tweets_from_bci(monitor_keywords_list,sort_item_new)
        
    return es_results
예제 #23
0
def get_daily_recommend_tweets(theme,sort_item):

    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_FB)    
    else:
        now_ts = int(time.time())

    datetime = ts2datetime(now_ts)

    index_name = daily_interest_index_name_pre +'_'+ datetime

    theme_en = daily_ch2en[theme]
    es_results = es_xnr.get(index=index_name,doc_type=daily_interest_index_type,id=theme_en)['_source']
    content = json.loads(es_results['content'])

    results_all = []
    for result in content:
        #result = result['_source']
        uid = result['uid']
        nick_name,photo_url = fb_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
예제 #24
0
def influence_cover(uid,current_time):
	#index_name = twitter_feedback_fans_index_name
	# 没有列表
	
	try:
		es_search = es.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,\
						id=uid)['_source']
		try:
			followers_count = es_search['followers_count']
		except:
			followers_count = 0

		try:
			status_count = es_search['status_count']
		except:
			status_count = 0

		try:
			friends_count = es_search['friends_count']
		except:
			friends_count = 0

		try:
			favourites_count = es_search['favourites_count']
		except:
			favourites_count = 0

		cover_num = followers_count + status_count + friends_count + favourites_count

		if not cover_num:
			cover_num = 1

	except:
		cover_num = 1   # 取log之后为0

	return cover_num
예제 #25
0
파일: getgroup.py 프로젝트: zhhhzhang/xnr1
def getgroup_v2(qq_xnr):
    group_dict = {}
    #step0: get qqbot_port
    if qq_xnr[:4] != 'QXNR':

        search_result = es.search(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,\
            body={'query':{'term':{'qq_number':qq_xnr}}})['hits']['hits']

        qq_xnr = search_result[0]['_id']

    #try:
    qq_xnr_es_result = es.get(index=qq_xnr_index_name,\
            doc_type=qq_xnr_index_type, id=qq_xnr, _source=True)['_source']

    group_info = json.loads(qq_xnr_es_result['group_info'])

    qqbot_port = qq_xnr_es_result['qqbot_port']
    print 'qqbot_port..', qqbot_port
    p_str = 'qq ' + str(qqbot_port) + ' list group'
    p = subprocess.Popen(p_str, shell=True, \
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    line_count = 0
    for line in p.stdout.readlines():
        line_count += 1
        #print 'line.==========',line
        if line_count >= 5 and line_count % 2 == 1:
            item_line_list = line.split('|')

            try:
                #qq_group_number = str(int(item_line_list[2]))
                qq_uin_number = str(int(item_line_list[7]))
                #print 'qq_uin_number..',qq_uin_number
                qq_group_name = item_line_list[4]
                qq_mark_name = item_line_list[5]
                # group_dict[qq_group_number] = qq_group_name
                group_dict[qq_uin_number] = qq_group_name

                # 如果uin为空,则添加进去uin,如果不为空,则更新群名(因为群名可能修改)
                for key, value_dict in group_info.iteritems():

                    mark_name = value_dict['mark_name']

                    if not qq_mark_name:
                        if qq_mark_name == mark_name:
                            if not qq_group_name in value_dict['group_name']:
                                group_info[key]['group_name'].append(
                                    qq_group_name)

            except:
                next

    group_info = json.dumps(group_info)
    es.update(index=qq_xnr_index_name,
              doc_type=qq_xnr_index_type,
              id=qq_xnr,
              body={'doc': {
                  'group_info': group_info
              }})

    print 'group_dict::len..', len(group_dict)

    return group_dict
예제 #26
0
def executeES(indexName, typeName, listData):
    #current_time = int(time.time())
    #indexName += '_' + ts2datetime(current_time)

    #print 'listData:',listData
    for list_data in listData:

        data = {}
        jsonData = json.loads(list_data)
        for key, val in jsonData.items():
            # print key, '====', val
            data[key] = val
            # data['update_time'] = current_time

        if indexName != 'weibo_feedback_group':

            xnr_user_no = uid2xnr_user_no(data["root_uid"])

            sensor_mark = judge_sensing_sensor(xnr_user_no, data['uid'])
            data['sensor_mark'] = sensor_mark

            trace_follow_mark = judge_trace_follow(xnr_user_no, data['uid'])
            data['trace_follow_mark'] = trace_follow_mark

            data['sensitive_info'] = get_sensitive_info(
                data['timestamp'], data['mid'])
            data['sensitive_user'] = get_sensitive_user(
                data['timestamp'], data['uid'])

            if indexName == 'weibo_feedback_follow':
                # 修改 _id、保存至fans_followers_es表
                _id = data["root_uid"] + '_' + data["mid"]
                xnr_user_no = uid2xnr_user_no(data["root_uid"])

                save_type = 'followers'
                follow_type = 'follow'

                if xnr_user_no:
                    save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type,
                                           follow_type)
                    save_to_redis_fans_follow(xnr_user_no, data["uid"],
                                              save_type)

                    # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid'])
                    # data['sensor_mark'] = sensor_mark

                    # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid'])
                    # data['trace_follow_mark'] = trace_follow_mark
                print 'save to es!!!!', es.index(index=indexName,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_fans':
                _id = data["root_uid"] + '_' + data["mid"]
                xnr_user_no = uid2xnr_user_no(data["root_uid"])
                save_type = 'fans'
                follow_type = 'follow'

                if xnr_user_no:
                    save_to_fans_follow_ES(xnr_user_no, data["uid"], save_type,
                                           follow_type)
                    save_to_redis_fans_follow(xnr_user_no, data["uid"],
                                              save_type)

                    # sensor_mark = judge_sensing_sensor(xnr_user_no,data['uid'])
                    # data['sensor_mark'] = sensor_mark

                    # trace_follow_mark = judge_trace_follow(xnr_user_no,data['uid'])
                    # data['trace_follow_mark'] = trace_follow_mark
                try:
                    es.get(index=indexName, doc_type=typeName, id=_id)
                except:
                    print 'save to es!!!!', es.index(index=indexName,
                                                     doc_type=typeName,
                                                     id=_id,
                                                     body=data)

            elif indexName == 'weibo_feedback_comment':
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])
                date_time = ts2datetime(data['timestamp'])
                # print 'date!!!!!!!',date_time
                # print 'indexName_date:::',indexName_date
                mappings_func = weibo_feedback_comment_mappings
                _id = data["mid"]
                # print 'comment_id........',_id
                mappings_func(date_time)
                # print 'data:::',data
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_retweet':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_retweet_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_at':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_at_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_like':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])

                mappings_func = weibo_feedback_like_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

            elif indexName == 'weibo_feedback_private':
                # indexName += '_' + ts2datetime(data['timestamp'])
                indexName_date = indexName + '_' + ts2datetime(
                    data['timestamp'])

                date_time = ts2datetime(data['timestamp'])
                mappings_func = weibo_feedback_private_mappings
                _id = data["mid"]
                mappings_func(date_time)
                print 'save to es!!!!', es.index(index=indexName_date,
                                                 doc_type=typeName,
                                                 id=_id,
                                                 body=data)

        else:

            _id = data["mid"]
            print 'save to es!!!!', es.index(index=indexName,
                                             doc_type=typeName,
                                             id=_id,
                                             body=data)

        # print 'data.........',data
        # print 'indexName....',indexName
        # print '_id......',_id
        # #print 'typeName.....',typeName
        # print 'es...',es

        # print 'save to es!!!!',es.index(index=indexName, doc_type=typeName, id=_id, body=data)

    print 'update %s ES done' % indexName
예제 #27
0
def save_event_warning(xnr_user_no,start_time,end_time):
    #判断数据库是否存在:
    today_date=ts2datetime(end_time)
    today_datetime = datetime2ts(today_date)
    weibo_event_warning_index_name = weibo_event_warning_index_name_pre+today_date
    if not es_xnr.indices.exists(index=weibo_event_warning_index_name):
        weibo_event_warning_mappings(weibo_event_warning_index_name)

    new_event_warning = create_event_warning(xnr_user_no,start_time,end_time)    

    today_history_event_warning,old_name_list = lookup_history_event_warming(xnr_user_no,today_datetime,end_time)
    print 'warning!!!',len(new_event_warning)
    results = [] 
    if new_event_warning:
        for item in new_event_warning:
            event_mark = set_intersection(item['event_name'],old_name_list)
            if event_mark == 1:
                task_id = xnr_user_no+'_'+ item['event_name']
                old_event = es_xnr.get(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id)['_source']

                #用户合并
                old_event_main_info = json.loads(old_event['main_user_info'])
                old_event_uid_list = [user['uid'] for user in old_event_main_info]

                new_event_main_info = json.loads(item['main_user_info'])
                new_event_uid_list = [user['uid'] for user in new_event_main_info]

                add_uid_list = list(set(new_event_uid_list) - set(old_event_uid_list)&set(new_event_uid_list))

                new_main_user_info = []
                item_main_user_info = json.loads(item['main_user_info'])
                for uid in add_uid_list:
                    
                    uid_info = [u for u in item_main_user_info if u['uid'] == uid]
                    if uid_info:
                        new_main_user_info.append(uid_info[0])
                    else:
                        pass
                old_event['main_user_info'] = json.loads(old_event['main_user_info'])
                old_event['main_user_info'].extend(new_main_user_info)


                old_event_weibo_info = json.loads(old_event['main_weibo_info'])
                old_event_mid_list = [content['mid'] for content in old_event_weibo_info]

                new_event_weibo_info = json.loads(item['main_weibo_info'])
                new_event_mid_list = [content['mid'] for content in new_event_weibo_info]

                add_weibo_list = list(set(new_event_mid_list) - set(new_event_mid_list)&set(old_event_mid_list))     

                new_main_weibo_info = []
                for mid in add_weibo_list:
                    mid_info = [t for t in item['main_weibo_info'] if t['mid'] == mid]
                    if mid_info:
                        new_main_weibo_info.append(mid_info[0])
                    else:
                        pass
                old_event['main_weibo_info'] = json.loads(old_event['main_weibo_info'])
                old_event['main_weibo_info'].extend(new_main_weibo_info)

                old_event['event_influence']=old_event['event_influence']+item['event_influence']
               
                try:
                    es_xnr.update(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id)
                    mark=True
                except:
                    mark=False

            else:
                #直接存储
                task_id=xnr_user_no+'_'+ item['event_name']
                try:
                    es_xnr.index(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,body=item,id=task_id)
                    mark=True
                except:
                    mark=False
            results.append(mark)
    else:
        pass
    print 'event_waring::',results
    return results
예제 #28
0
def create_event_warning(xnr_user_no,start_time,end_time):
    #获取事件名称
    today_datetime = start_time
    hashtag_list = get_hashtag(today_datetime)
    #print 'hashtag_list::',hashtag_list

    flow_text_index_name = get_day_flow_text_index_list(today_datetime)

    #虚拟人的粉丝列表和关注列表
    try:
        es_xnr_result=es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no)['_source']
        followers_list=es_xnr_result['followers_list']
        fans_list=es_xnr_result['fans_list']
    except:
        followers_list=[]
        fans_list=[]

    event_warming_list=[]
    event_num=0
    for event_item in hashtag_list:
        event_sensitive_count=0
        event_warming_content=dict()     #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间
        event_warming_content['event_name']=event_item['event_name']
        print 'event_name:',event_item
        event_num=event_num+1
        print 'event_num:::',event_num
        print 'first_time:::',int(time.time())
        event_influence_sum=0
        event_time_sum=0       
        query_body={
            'query':{
                # 'bool':{
                #     'must':[{'wildcard':{'text':'*'+event_item[0]+'*'}},
                #     {'range':{'sensitive':{'gte':1}}}]
                # }
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'hashtag':event_item['event_name']}},
                                {'range':{'sensitive':{'gte':1}}},
                                {'range':{'timestamp':{'gte':start_time,'lte':end_time}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_WARMING_SIZE,
            'sort':{'sensitive':{'order':'desc'}}
        }
        #try:         
        event_results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
        print 'event:::',len(event_results),start_time,end_time
        if event_results:
            weibo_result=[]
            fans_num_dict=dict()
            followers_num_dict=dict()
            alluser_num_dict=dict()
            print 'sencond_time:::',int(time.time())
            for item in event_results:
                #print 'event_content:',item['_source']['text']          
                
                #统计用户信息
                if alluser_num_dict.has_key(str(item['_source']['uid'])):
                    followers_mark=set_intersection(item['_source']['uid'],followers_list)
                    if followers_mark > 0:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1*2
                    else:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1
                else:
                    alluser_num_dict[str(item['_source']['uid'])]=1                

                #计算影响力
                origin_influence_value=(1+item['_source']['comment']+item['_source']['retweeted'])*(1+item['_source']['sensitive'])
                # fans_value=judge_user_type(item['_source']['uid'],fans_list)
                followers_value=judge_user_type(item['_source']['uid'],followers_list)
                item['_source']['weibo_influence_value']=origin_influence_value*(followers_value)
                
                item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])

                weibo_result.append(item['_source'])

                #统计影响力、时间
                event_influence_sum=event_influence_sum+item['_source']['weibo_influence_value']
                event_time_sum=event_time_sum+item['_source']['timestamp']            
        
            print 'third_time:::',int(time.time())
            #典型微博信息
            the_weibo_result=remove_repeat_v2(weibo_result)
            the_weibo_result.sort(key=lambda k:(k.get('weibo_influence_value',0)),reverse=True)
            event_warming_content['main_weibo_info']=json.dumps(the_weibo_result)

            #事件影响力和事件时间
            number=len(event_results)
            event_warming_content['event_influence']=event_influence_sum/number
            event_warming_content['event_time']=event_time_sum/number

        # except:
        #     event_warming_content['main_weibo_info']=[]
        #     event_warming_content['event_influence']=0
        #     event_warming_content['event_time']=0
        
        # try:
            #对用户进行排序
            alluser_num_dict=sorted(alluser_num_dict.items(),key=lambda d:d[1],reverse=True)
            main_userid_list=[]
            for i in xrange(0,len(alluser_num_dict)):
                main_userid_list.append(alluser_num_dict[i][0])

        #主要参与用户信息
            main_user_info=[]
            user_es_result=es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':main_userid_list})['docs']
            for item in user_es_result:

                user_dict=dict()
                if item['found']:
                    user_dict['photo_url']=item['_source']['photo_url']
                    user_dict['uid']=item['_id']
                    user_dict['nick_name']=item['_source']['nick_name']
                    user_dict['favoritesnum']=item['_source']['favoritesnum']
                    user_dict['fansnum']=item['_source']['fansnum']
                else:
                    user_dict['photo_url']=''
                    user_dict['uid']=item['_id']
                    user_dict['nick_name']=''
                    user_dict['favoritesnum']=0
                    user_dict['fansnum']=0
                main_user_info.append(user_dict)
            event_warming_content['main_user_info']=json.dumps(main_user_info)


        # except:
            # event_warming_content['main_user_info']=[]
            print 'fourth_time:::',int(time.time())
            event_warming_content['xnr_user_no']=xnr_user_no
            event_warming_content['validity']=0
            event_warming_content['timestamp']=today_datetime

            event_warming_list.append(event_warming_content)
        else:
        	pass
        print 'fifth_time:::',int(time.time())
    return event_warming_list
예제 #29
0
파일: utils.py 프로젝트: feifanhanmc/xnr2
def save_to_fans_follow_ES(xnr_user_no,
                           uid,
                           save_type,
                           follow_type,
                           trace_type='ordinary_follow'):

    if save_type == 'followers':

        try:
            results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no)

            results = results["_source"]
            if follow_type == 'follow':
                if trace_type == 'trace_follow':
                    # 添加追随关注
                    try:
                        trace_follow_uids = results['trace_follow_list']
                        trace_follow_uids_set = set(trace_follow_uids)
                        trace_follow_uids_set.add(uid)
                        trace_follow_uids = list(trace_follow_uids_set)
                    except:
                        trace_follow_uids = [uid]

                    # 添加普通关注
                    try:
                        followers_uids = results['followers_list']
                        followers_uids_set = set(followers_uids)
                        followers_uids_set.add(uid)
                        followers_uids = list(followers_uids_set)
                    except:
                        followers_uids = [uid]

                    results['followers_list'] = followers_uids
                    results['trace_follow_list'] = trace_follow_uids
                    es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                                id=xnr_user_no,body={'doc':results})

                else:

                    try:
                        followers_uids = results['followers_list']
                        followers_uids_set = set(followers_uids)
                        followers_uids_set.add(uid)
                        followers_uids = list(followers_uids_set)
                    except:
                        followers_uids = [uid]

                    results['followers_list'] = followers_uids

                    es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                                id=xnr_user_no,body={'doc':results})

            elif follow_type == 'unfollow':
                try:
                    followers_uids = results['followers_list']
                    followers_uids = list(
                        set(followers_uids).difference(set([uid])))
                    results['followers_list'] = followers_uids

                    es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                                id=xnr_user_no,body={'doc':results})
                except:
                    return False

        except:
            #if follow_type == 'follow':
            body_info = {}
            body_info['followers_list'] = [uid]
            body_info['xnr_user_no'] = xnr_user_no

            es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no, body=body_info)
            #elif follow_type == 'unfollow':

    elif save_type == 'fans':
        try:
            results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no)

            results = results["_source"]

            try:
                fans_uids = results['fans_list']
                fans_uids_set = set(fans_uids)
                fans_uids_set.add(uid)
                fans_uids = list(fans_uids_set)
                results['fans_list'] = fans_uids
            except:
                results['fans_list'] = [uid]

            es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                        id=xnr_user_no,body={'doc':results})

        except:
            body_info = {}
            body_info['fans_list'] = [uid]
            body_info['xnr_user_no'] = xnr_user_no
            es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no, body=body_info)

    return True
예제 #30
0
        else:
            fb_type = 'stranger'
    return fb_type

## 判断是否为敏感人物传感器
def judge_fb_sensing_sensor(xnr_user_no,uid):
    try:
        exist_item = es_xnr.exists(index=fb_index_sensing,doc_type=fb_type_sensing,id=xnr_user_no)
    except Exception,e:
        print e
        return False

    if not exist_item:
        return False 
    else:
        get_result = es_xnr.get(index=fb_index_sensing,doc_type=fb_type_sensing,id=xnr_user_no)['_source']
        social_sensors = get_result['social_sensors']
        if uid in social_sensors:
            return True
        else:
            return False

## twitter判断关注类型
def judge_tw_follow_type(xnr_user_no,uid):
    exist_item = es_xnr.exists(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\
                id=xnr_user_no)
    if not exist_item:
        tw_type = 'stranger'
    else:
        es_get = es_xnr.get(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\
                id=xnr_user_no)['_source']