def get_save_step_three_2(task_detail): task_id = task_detail['task_id'] # nick_name = task_detail['nick_name'] try: item_fans_followers = dict() followers_uids = list(set(task_detail['followers_uids'].split(','))) # item_fans_followers['followers_list'] = followers_uids item_fans_followers['fans_list'] = followers_uids item_fans_followers['xnr_user_no'] = task_id print es.index(index=fb_xnr_fans_followers_index_name, doc_type=fb_xnr_fans_followers_index_type, id=task_id, body=item_fans_followers) #把关注任务加到redis队列中 for followers_uid in followers_uids: queue_dict = { 'channel': 'facebook', 'operate_type': 'add', 'content': { 'xnr_user_no': task_id, 'uid': followers_uid } } if not add_operate2redis(queue_dict): mark = False return mark mark = True except: mark = False return mark
def get_save_step_one(task_detail): es_results = es.search(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,body={'query':{'match_all':{}},\ 'sort':{'user_no':{'order':'desc'}}})['hits']['hits'] if es_results: user_no_max = es_results[0]['_source']['user_no'] user_no_current = user_no_max + 1 else: user_no_current = 1 task_detail['user_no'] = user_no_current task_id = user_no2fb_id(user_no_current) #五位数 WXNR0001 print 'task_id' print task_id try: item_exist = dict() item_exist['user_no'] = task_detail['user_no'] item_exist['domain_name'] = task_detail['domain_name'] item_exist['role_name'] = task_detail['role_name'] item_exist['psy_feature'] = '&'.join( task_detail['psy_feature'].encode('utf-8').split(',')) item_exist['political_side'] = task_detail['political_side'] item_exist['business_goal'] = '&'.join( task_detail['business_goal'].encode('utf-8').split(',')) # item_exist['daily_interests'] = '&'.join(task_detail['daily_interests'].encode('utf-8').split(',')) item_exist['monitor_keywords'] = '&'.join( task_detail['monitor_keywords'].encode('utf-8').split(',')) item_exist['create_status'] = 0 # 第一步完成 print es.index(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, id=task_id, body=item_exist) mark = True except: mark = False return mark
def get_save_step_two(task_detail): #update user_no_max = get_fb_xnr_no() user_no_current = user_no_max + 1 r.set(fb_xnr_max_no, user_no_current) task_detail['user_no'] = user_no_current task_id = user_no2fb_id(user_no_current) #五位数 FXNR0001 item_exist = dict() item_exist['submitter'] = task_detail['submitter'] item_exist['user_no'] = task_detail['user_no'] item_exist['domain_name'] = task_detail['domain_name'] item_exist['role_name'] = task_detail['role_name'] item_exist['psy_feature'] = '&'.join(task_detail['psy_feature'].encode('utf-8').split(',')) item_exist['political_side'] = task_detail['political_side'] item_exist['business_goal'] = '&'.join(task_detail['business_goal'].encode('utf-8').split(',')) # item_exist['daily_interests'] = '&'.join(task_detail['daily_interests'].encode('utf-8').split(',')) item_exist['monitor_keywords'] = ','.join(task_detail['monitor_keywords'].encode('utf-8').split(',')) item_exist['active_time'] = '&'.join(task_detail['active_time'].split('-')) item_exist['day_post_average'] = json.dumps(task_detail['day_post_average'].split('-')) item_exist['create_status'] = 1 # 第二步完成 item_exist['xnr_user_no'] = task_id # 虚拟人编号 item_exist['create_time'] = int(time.time()) print es.index(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,id=task_id,body=item_exist) mark = True return mark,task_id
def addto_twitter_corpus(task_detail): flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp']) try: corpus_result = es_xnr.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source'] task_detail['text']=corpus_result['text'] #查询三个指标字段 tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp'],task_detail['timestamp']) if tid_result: task_detail['comment']=tid_result['comment'] task_detail['share']=tid_result['share'] task_detail['favorite']=tid_result['favorite'] else: task_detail['comment']=0 task_detail['share']=0 task_detail['favorite']=0 #查询用户昵称 task_detail['nick_name']=get_user_nickname(item['_source']['uid']) except: mark=False try: es_xnr.index(index=twitter_xnr_corpus_index_name,doc_type=twitter_xnr_corpus_index_type,id=task_detail['tid'],body=task_detail) mark=True except: mark=False return mark
def addto_warning_corpus(task_detail): flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp']) try: corpus_result = es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source'] corpus_result['xnr_user_no'] = task_detail['xnr_user_no'] corpus_result['warning_source'] = task_detail['warning_source'] corpus_result['create_time'] = task_detail['create_time'] corpus_result['validity'] = 1 corpus_result['nick_name'] = get_user_nickname(task_detail['uid']) tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp']) if tid_result: corpus_result['comment']=tid_result['comment'] corpus_result['share']=tid_result['share'] corpus_result['favorite']=tid_result['favorite'] else: corpus_result['comment']=0 corpus_result['share']=0 corpus_result['favorite']=0 #查询好友列表 lookup_type='fans_list' friends_list=lookup_xnr_fans_followers(task_detail['xnr_user_no'],lookup_type) set_mark = set_intersection(task_detail['uid'],friends_list) if set_mark > 0: corpus_result['content_type']='friends' else: corpus_result['content_type']='unfriends' es_xnr_2.index(index=twitter_warning_corpus_index_name,doc_type=twitter_warning_corpus_index_type,id=task_detail['tid'],body=corpus_result) mark=True except: mark=False return mark
def write_envent_warming(today_datetime,event_warming_content,task_id): twitter_event_warning_index_name=twitter_event_warning_index_name_pre+ts2datetime(today_datetime) # print 'facebook_event_warning_index_name:',facebook_event_warning_index_name #try: es_xnr_2.index(index=twitter_event_warning_index_name,doc_type=twitter_event_warning_index_type,body=event_warming_content,id=task_id) mark=True #except: # mark=False return mark
def domain_update_task(domain_name, create_type, create_time, submitter, description, remark, compute_status=0): task_id = pinyin.get(domain_name, format='strip', delimiter='_') try: domain_task_dict = dict() #domain_task_dict['xnr_user_no'] = xnr_user_no domain_task_dict['domain_pinyin'] = pinyin.get(domain_name, format='strip', delimiter='_') domain_task_dict['domain_name'] = domain_name domain_task_dict['create_type'] = json.dumps(create_type) domain_task_dict['create_time'] = create_time domain_task_dict['submitter'] = submitter domain_task_dict['description'] = description domain_task_dict['remark'] = remark domain_task_dict['compute_status'] = compute_status print 'create_type' print create_type r.lpush(tw_target_domain_detect_queue_name, json.dumps(domain_task_dict)) item_exist = dict() #item_exist['xnr_user_no'] = domain_task_dict['xnr_user_no'] item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin'] item_exist['domain_name'] = domain_task_dict['domain_name'] item_exist['create_type'] = domain_task_dict['create_type'] item_exist['create_time'] = domain_task_dict['create_time'] item_exist['submitter'] = domain_task_dict['submitter'] item_exist['description'] = domain_task_dict['description'] item_exist['remark'] = domain_task_dict['remark'] item_exist['group_size'] = '' item_exist['compute_status'] = 0 # 存入创建信息 es.index(index=tw_domain_index_name, doc_type=tw_domain_index_type, id=item_exist['domain_pinyin'], body=item_exist) mark = True except Exception, e: print e mark = False
def domain_create_task(domain_name, create_type, create_time, submitter, description, remark, compute_status=0): task_id = pinyin.get(domain_name, format='strip', delimiter='_') try: es.get(index=fb_domain_index_name, doc_type=fb_domain_index_type, id=task_id)['_source'] return 'domain name exists!' except: try: domain_task_dict = dict() domain_task_dict['domain_pinyin'] = pinyin.get(domain_name, format='strip', delimiter='_') domain_task_dict['domain_name'] = domain_name domain_task_dict['create_type'] = json.dumps(create_type) domain_task_dict['create_time'] = create_time domain_task_dict['submitter'] = submitter domain_task_dict['description'] = description domain_task_dict['remark'] = remark domain_task_dict['compute_status'] = compute_status # print 'domain_task_dict' # print domain_task_dict # print 'before: r.lrange' # print r.lrange(fb_target_domain_detect_queue_name,0,100) r.lpush(fb_target_domain_detect_queue_name, json.dumps(domain_task_dict)) # print 'after: r.lrange' # print r.lrange(fb_target_domain_detect_queue_name,0,100) item_exist = dict() item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin'] item_exist['domain_name'] = domain_task_dict['domain_name'] item_exist['create_type'] = domain_task_dict['create_type'] item_exist['create_time'] = domain_task_dict['create_time'] item_exist['submitter'] = domain_task_dict['submitter'] item_exist['description'] = domain_task_dict['description'] item_exist['remark'] = domain_task_dict['remark'] item_exist['group_size'] = '' item_exist['compute_status'] = 0 # 存入创建信息 print es.index(index=fb_domain_index_name, doc_type=fb_domain_index_type, id=item_exist['domain_pinyin'], body=item_exist) mark = True except Exception, e: print e mark = False return mark
def get_save_step_three_2(task_detail): task_id = task_detail['task_id'] nick_name = task_detail['nick_name'] try: item_fans_followers = dict() followers_uids = list(set(task_detail['followers_uids'].split(','))) item_fans_followers['followers_list'] = followers_uids item_fans_followers['xnr_user_no'] = task_id print es.index(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,id=task_id,body=item_fans_followers) mark = True except: mark = False return mark
def addto_facebook_corpus(task_detail): flow_text_index_name = facebook_flow_text_index_name_pre + ts2datetime( task_detail['timestamp']) try: corpus_result = es_xnr.get(index=flow_text_index_name, doc_type=facebook_flow_text_index_type, id=task_detail['fid'])['_source'] task_detail['text'] = corpus_result['text'] #查询三个指标字段 fid_result = lookup_fid_attend_index(task_detail['fid'], task_detail['timestamp'], task_detail['timestamp']) if fid_result: task_detail['comment'] = fid_result['comment'] task_detail['share'] = fid_result['share'] task_detail['favorite'] = fid_result['favorite'] else: task_detail['comment'] = 0 task_detail['share'] = 0 task_detail['favorite'] = 0 #查询用户昵称 task_detail['nick_name'] = get_user_nickname(corpus_result['uid']) # task_detail['retweeted']=corpus_result['retweeted'] # task_detail['comment']=corpus_result['comment'] # task_detail['like']=corpus_result['like'] except: mark = False try: es_xnr.index(index=facebook_xnr_corpus_index_name, doc_type=facebook_xnr_corpus_index_type, id=task_detail['fid'], body=task_detail) mark = True except: mark = False return mark
def create_fans_info(xnr_user_no): print es.index(fb_xnr_fans_followers_index_name, fb_xnr_fans_followers_index_type, body={'fans_list': []}, id=xnr_user_no)
def get_generate_example_model(domain_name, role_name, mail): export_group_info(domain_name, mail) domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') role_en = tw_domain_ch2en_dict[role_name] task_id = domain_pinyin + '_' + role_en es_result = es.get(index=tw_role_index_name, doc_type=tw_role_index_type, id=task_id)['_source'] item = es_result # print 'es_result:::',es_result # 政治倾向 political_side = json.loads(item['political_side'])[0][0] if political_side == 'mid': item['political_side'] = u'中立' elif political_side == 'left': item['political_side'] = u'左倾' else: item['political_side'] = u'右倾' # 心理特征 psy_feature_list = [] psy_feature = json.loads(item['psy_feature']) for i in range(TOP_PSY_FEATURE): psy_feature_list.append(psy_feature[i][0]) item['psy_feature'] = '&'.join(psy_feature_list) role_group_uids = json.loads(item['member_uids']) if S_TYPE == 'test': current_time = datetime2ts(S_DATE) else: current_time = int(time.time()) index_name_list = get_flow_text_index_list(current_time) query_body_search = { 'query': { 'filtered': { 'filter': { 'terms': { 'uid': role_group_uids } } } }, 'size': MAX_VALUE, '_source': ['keywords_string'] } es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\ body=query_body_search)['hits']['hits'] keywords_string = '' for mget_item in es_keyword_results: keywords_string += '&' keywords_string += mget_item['_source']['keywords_string'] k_dict = extract_keywords(keywords_string) monitor_keywords_list = [] for item_item in k_dict: monitor_keywords_list.append(item_item.word.encode('utf-8')) item['monitor_keywords'] = ','.join(monitor_keywords_list) mget_results_user = es_user_portrait.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': role_group_uids})['docs'] item['nick_name'] = [] for mget_item in mget_results_user: if mget_item['found']: content = mget_item['_source'] item['nick_name'] = '' if content.has_key('username'): item['nick_name'] = content['username'] item['location'] = '' if content.has_key('location'): item['location'] = content['location'] item['description'] = '' if content.has_key('description'): item['description'] = content['description'] item['business_goal'] = u'渗透' # item['daily_interests'] = u'旅游' item['age'] = 30 item['career'] = u'自由职业' active_time_list_np = np.array(json.loads(item['active_time'])) active_time_list_np_sort = np.argsort( -active_time_list_np)[:TOP_ACTIVE_TIME] item['active_time'] = active_time_list_np_sort.tolist() day_post_num_list = np.array(json.loads(item['day_post_num'])) item['day_post_num'] = np.mean(day_post_num_list).tolist() item['role_name'] = role_name task_id_new = 'tw_' + domain_pinyin + '_' + role_en example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json' try: with open(example_model_file_name, "w") as dump_f: json.dump(item, dump_f) item_dict = dict() item_dict['domain_name'] = domain_name item_dict['role_name'] = role_name es.index(index=tw_example_model_index_name,doc_type=tw_example_model_index_type,\ body=item_dict,id=task_id_new) mark = True except: mark = False return mark
def report_warming_content(task_detail): report_dict=dict() report_dict['report_type']=task_detail['report_type'] report_dict['report_time']=task_detail['report_time'] report_dict['xnr_user_no']=task_detail['xnr_user_no'] report_dict['event_name']=task_detail['event_name'] report_dict['uid']=task_detail['uid'] report_dict['nick_name']=get_user_nickname(task_detail['uid']) tw_list=[] user_list=[] # print 'type:',type(task_detail['weibo_info']),task_detail['weibo_info'] tw_info=task_detail['tw_info'] for item in tw_info: lookup_mark=False item['timestamp'] = int(item['timestamp']) if task_detail['report_type']==u'人物': twitter_user_warning_index_name = twitter_user_warning_index_name_pre + ts2datetime(item['timestamp']) twitter_user_warming_id=task_detail['xnr_user_no']+'_'+task_detail['uid'] try: twitter_user_result=es_xnr_2.get(index=twitter_user_warning_index_name,doc_type=twitter_user_warning_index_type,id=twitter_user_warming_id)['_source'] user_warning_content=json.dumps(twitter_user_result['content']) for content in user_warning_content: if content['tid'] == item['tid']: lookup_mark=True tw_list.append(content) else: pass except: print 'user_error!' elif task_detail['report_type']==u'言论': twitter_speech_warning_index_name = twitter_speech_warning_index_name_pre + ts2datetime(item['timestamp']) try: twitter_speech_result=es_xnr_2.get(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,id=task_detail['xnr_user_no']+'_'+item['tid'])['_source'] report_dict['uid']=twitter_speech_result['uid'] lookup_mark=True tw_list.append(twitter_speech_result) except: # weibo_timing_warning_index_name = weibo_timing_warning_index_name_pre + ts2datetime(item['timestamp']) print 'speech_error!' elif task_detail['report_type']==u'事件': twitter_event_warning_index_name = twitter_event_warning_index_name_pre + ts2datetime(item['timestamp']) event_warning_id = task_detail['xnr_user_no']+'_'+task_detail['event_name'] try: event_result=es_xnr_2.get(index=twitter_event_warning_index_name,doc_type=twitter_event_warning_index_type,id=event_warning_id)['_source'] event_content=json.dumps(event_result['main_twitter_info']) for event in event_content: if event['tid'] == item['tid']: lookup_mark=True tw_list.append(event) else: pass except: print 'event_error!' elif task_detail['report_type']==u'时间': year = ts2yeartime(item['timestamp']) twitter_timing_warning_index_name = twitter_timing_warning_index_name_pre + year +'_' + task_detail['date_time'] try: time_result=es_xnr_2.search(index=twitter_timing_warning_index_name,doc_type=twitter_timing_warning_index_type,query_body={'query':{'match_all':{}}})['hits']['hits'] time_content=[] for timedata in time_result: for data in timedata['twitter_date_warming_content']: if data['tid'] == item['tid']: lookup_mark=True tw_list.append(data) else: pass except: print 'time_error!' if lookup_mark: pass else: flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(item['timestamp']) try: tw_result=es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=item['tid'])['_source'] tw_result['nick_name']=get_user_nickname(fb_result['uid']) tid_result=lookup_tid_attend_index(item['tid'],item['timestamp']) if tid_result: tw_result['comment']=tid_result['comment'] tw_result['share']=tid_result['share'] tw_result['favorite']=tid_result['favorite'] else: tw_result['comment']=0 tw_result['share']=0 tw_result['favorite']=0 tw_list.append(tw_result) except: print 'flow_text error!' user_info=task_detail['user_info'] if user_info: for uid in user_info: user=dict() try: user_result=es_xnr_2.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,id=uid)['_source'] user_dict['uid']=item['_id'] user_dict['username']=user_result['username'] if user_result.has_key('talking_about_count'): user_dict['talking_about_count']=user_result['talking_about_count'] else: user_dict['talking_about_count']=0 if user_result.has_key('likes'): user_dict['likes']=user_result['likes'] else: user_dict['likes']=0 if user_result.has_key('category'): user_dict['category']=user_result['category'] else: user_dict['category']='' user_list.append(user) except: user_dict['uid']=item['_id'] user_dict['username']='' user_dict['talking_about_count']=0 user_dict['likes']=0 user_dict['category']='' user_list.append(user) print 'user_list error!' else: pass report_content=dict() report_content['user_list']=user_list report_content['tw_list']=tw_list report_dict['report_content']=json.dumps(report_content) report_id='' if task_detail['report_type'] == u'言论': report_id=weibo_info[0]['tid'] elif task_detail['report_type'] == u'人物': report_id=task_detail['xnr_user_no']+'_'+task_detail['uid'] elif task_detail['report_type'] == u'事件': report_id=task_detail['xnr_user_no']+'_'+task_detail['event_name'] elif task_detail['report_type'] == u'时间': # print weibo_info if tw_info: report_id=tw_info[0]['tid'] else: report_id=str(task_detail['report_time']) if tw_list: report_mark=True else: report_mark=False #预警上报后不再显示问题 now_time=int(time.time()) twitter_report_management_index_name = twitter_report_management_index_name_pre + ts2datetime(now_time) if es_xnr_2.indices.exists(index=twitter_report_management_index_name): pass else: twitter_report_management_mappings() if report_id and report_mark: try: es_xnr_2.index(index=twitter_report_management_index_name,doc_type=twitter_report_management_index_type,id=report_id,body=report_dict) mark=True except: mark=False else: mark=False return mark