def deal_seed_user_file(upload_data, seed_info_type): if seed_info_type == 'uid': uid_list = [] invalid_user_list = [] line_list = upload_data.split('\n') print 'line_list:', line_list for line in line_list: uid = line.split('\r')[0] if len(uid)==10: uid_list.append(uid) elif uid != '': invalid_user_list.append(uid) elif seed_info_type == 'uname': uid_list = [] valid_uname_list = [] invalid_user_list = [] line_list = upload_data.split('\n') uname_list = [line_item.split('\r')[0] for line_item in line_list] #get uid by es_user_portrait profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type,\ body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits'] if profile_exist_result: for profile_exist_item in profile_exist_result: uid = profile_exist_item['_id'] uid_list.append(uid) uname = profile_exist_item['fields']['nick_name'][0] valid_uname_list.append(uname) invalid_user_list = list(set(uname_list) - set(valid_uname_list)) elif seed_info_type == 'url': uid_list = [] invalid_user_list = [] line_list = upload_data.split('\n') uid_list = [line_item.split('/')[4][-10:] for line_item in line_list] return uid_list, invalid_user_list
def deal_seed_user_string(seed_info_string, seed_info_type): if seed_info_type == 'uid': uid_list = [] invalid_user_list = [] uid_list_pre = seed_info_string.split(' ') for uid in uid_list_pre: if len(uid)==10: uid_list.append(uid) else: invalid_user_list.append(uid) elif seed_info_type == 'uname': uid_list = [] valid_uname_list = [] invalid_user_list = [] uname_list = seed_info_string.split(' ') profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type,\ body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits'] if profile_exist_result: for profile_item in profile_exist_result: uid_list.append(profile_item['_id']) uname = profile_item['fields']['nick_name'][0] valid_uname_list.append(uname) invalid_user_list = list(set(uname_list) - set(valid_uname_list)) elif seed_info_type == 'url': uid_list = [] url_list = seed_info_string.split(' ') for url_item in url_list: url_item_list = url_item.split('/') url_uid = url_item_list[4][-10:] uid_list.append(url_uid) return uid_list, invalid_user_list
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search( index=profile_index_name, doc_type=profile_index_type, body={'query': { 'terms': { 'nick_name': uname_list } }}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [ exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found'] == False ] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set( r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system': '0', 'operation': submit_user} r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') return True
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] operation_type = input_data['operation_type'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list_pre = upload_data.split('\n') uname_list = [item.split('\r')[0] for item in uname_list_pre] uid_list = [] have_in_user_list = [] invalid_user_list = [] valid_uname_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False, fields=['nick_name'])['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) uname = profile_item['fields']['nick_name'][0] valid_uname_list.append(uname) invalid_user_list = list(set(uname_list) - set(valid_uname_list)) if len(invalid_user_list) != 0: return False, 'invalid user info', invalid_user_list #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] have_in_user_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==True] if not new_uid_list: return False, 'all user in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = set(r.hkeys('compute')) in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return False, 'all user in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) #identify final submit user list final_submit_user_list = [] for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, in_item)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} if operation_type == 'submit': r.hset(hashname_submit, in_item, json.dumps(tmp)) r.hset(submit_user_recomment, in_item, '0') final_submit_user_list.append(in_item) return True, invalid_user_list, have_in_user_list, final_submit_user_list
def post(self): form = SearchForm() if not form.validate_on_submit(): return render_template(self.template, form=form) # TODO es search try: source = es.search(size=100) except Exception as e: raise e return render_template(self.template, source=source, data=data)
def submit_identify_in_uname(input_data): date = input_data['date'] submit_user = input_data['user'] upload_data = input_data['upload_data'] # get uname list from upload data uname_list = upload_data.split('\n') uid_list = [] #step1: get uid list from uname profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits'] for profile_item in profile_exist_result: uid = profile_item['_id'] uid_list.append(uid) if not uid_list: return 'uname list valid' #step2: filter user not in user_portrait and compute #step2.1: identify in user_portrait new_uid_list = [] exist_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids': uid_list})['docs'] new_uid_list = [exist_item['_id'] for exist_item in exist_portrait_result if exist_item['found']==False] if not new_uid_list: return 'uname list all in' #step2.2: identify in compute new_uid_set = set(new_uid_list) compute_set = r.hkeys('compute') in_uid_list = list(new_uid_set - compute_set) if not in_uid_list: return 'uname list all in' #step3: save submit hashname_submit = 'submit_recomment_' + date hashname_influence = 'recomment_' + date + '_influence' hashname_sensitive = 'recomment_' + date + '_sensitive' submit_user_recomment = 'recomment_' + submit_user + '_' + str(date) auto_recomment_set = set(r.hkeys(hashname_influence)) | set(r.hkeys(hashname_sensitive)) for in_item in in_uid_list: if in_item in auto_recomment_set: tmp = json.loads(r.hget(hashname_submit, uid)) recommentor_list = tmp['operation'].split('&') recommentor_list.append(str(submit_user)) new_list = list(set(recommentor_list)) tmp['operation'] = '&'.join(new_list) else: tmp = {'system':'0', 'operation': submit_user} r.hset(hashname_submit, uid, json.dumps(tmp)) r.hset(submit_user_recomment, uid, '0') return True
def deal_seed_user_string(seed_info_string, seed_info_type): if seed_info_type == 'uid': uid_list = seed_info_string.split('/') elif seed_info_type == 'uname': uid_list = [] uname_list = seed_info_string.split('/') profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type,\ body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits'] if profile_exist_result: for profile_item in profile_exist_result: uid_list.append(profile_item['_id']) elif seed_info_type == 'url': uid_list = [] url_list = seed_info_string.split('/') for url_item in url_list: url_item_list = url_item.split('/') url_uid = url_item_list[4][-10:] uid_list.append(url_uid) return uid_list
def get(self): user_id = request.args.get('id') select_id = user_id.split(',') file_location = dirname(dirname( abspath(__file__))) + '/static/download/test.csv' isflag = 1 csvfile = file(file_location, 'wb') writer = csv.writer(csvfile) for uid in select_id: item_content = [] if uid: source = es.search(index='weibo_user', doc_type='user', body={ 'query': { 'match': { 'id': uid } }, 'size': 1 }) source_content = source['hits']['hits'][0]['_source'] if isflag: item_head = [key for key in source_content] writer.writerow(item_head) isflag = 0 for key in source_content: if isinstance(source_content[key], unicode): print 'it is unicode' item_content.append(self.decode_item(source_content[key])) writer.writerow(item_content) csvfile.close() return json.dumps(source)
def deal_seed_user_file(upload_data, seed_info_type): if seed_info_type == 'uid': uid_list = [] line_list = upload_data.split('\n') for line in line_list: uid = line[:10] if len(uid)==10: uid_list.append(uid) elif seed_info_type == 'uname': uid_list = [] line_list = upload_data.split('\n') uname_list = [line_item for line_item in line_list] #get uid by es_user_portrait profile_exist_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type,\ body={'query':{'terms':{'nick_name': uname_list}}}, _source=False)['hits']['hits'] if profile_exist_result: for profile_exist_item in profile_exist_result: uid = profile_exist_item['_id'] uid_list.append(uid) elif seed_info_type == 'url': uid_list = [] line_list = upload_data.split('\n') uid_list = [line_item.split('/')[2][-10:] for line_item in line_list] return uid_list
def get(self): user_id = request.args.get('id') select_id = user_id.split(',') file_location = dirname(dirname(abspath(__file__)))+'/static/download/test.csv' isflag = 1 csvfile = file(file_location, 'wb') writer = csv.writer(csvfile) for uid in select_id: item_content = [] if uid: source = es.search( index = 'weibo_user', doc_type = 'user', body ={ 'query':{ 'match':{'id':uid} }, 'size':1 } ) source_content = source['hits']['hits'][0]['_source'] if isflag: item_head = [key for key in source_content] writer.writerow(item_head) isflag = 0 for key in source_content: if isinstance(source_content[key],unicode): print 'it is unicode' item_content.append(self.decode_item(source_content[key])) writer.writerow(item_content) csvfile.close() return json.dumps(source)
def search_mention(now_ts, uid, top_count): date = ts2datetime(now_ts) #evaluate_max_dict = get_evaluate_max() ts = datetime2ts(date) stat_results = dict() results = dict() uid_dict = {} for i in range(1,8): ts = ts - DAY try: result_string = r_cluster.hget('at_' + str(ts), str(uid)) except: result_string = '' if not result_string: continue result_dict = json.loads(result_string) for at_uname in result_dict: try: stat_results[at_uname] += result_dict[at_uname] except: stat_results[at_uname] = result_dict[at_uname] sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True) # print sort_stat_results out_portrait_list = [] out_list = stat_results.keys() #use to get user information from user profile out_query_list = [{'match':{'uname':item}} for item in out_list] if len(out_query_list) != 0: query = [{'bool':{'should': out_query_list}}] try: out_profile_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'bool':{'must':query}}, 'size':100})['hits']['hits'] except: out_profile_result = [] else: out_profile_result = [] out_in_profile_list = [] bci_search_id_list = [] for out_item in out_profile_result: source = out_item['_source'] uname = source['nick_name'] uid = source['uid'] location = source['location'] friendsnum = source['friendsnum'] out_portrait_list.append([uid, uname, stat_results[uname], '', location, friendsnum, '']) out_in_profile_list.append(uname) #use to search bci history bci_search_id_list.append(uid) out_out_profile_list = list(set(out_list) - set(out_in_profile_list)) for out_out_item in out_out_profile_list: out_portrait_list.append(['', out_out_item, stat_results[out_out_item],'', '', '', '']) #add index from bci_history new_out_portrait_list = [] try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': bci_search_id_list}, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs'] except: bci_history_result = [] iter_count = 0 for out_portrait_item in out_portrait_list: append_dict = {} try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {} new_out_portrait_item = out_portrait_item append_dict['uid'] = out_portrait_item[0] append_dict['uname'] = out_portrait_item[1] append_dict['count'] = out_portrait_item[2] if bci_history_item: if bci_history_item['found'] == True: fansnum = bci_history_item['fields']['user_fansnum'][0] user_weibo_count = bci_history_item['fields']['weibo_month_sum'][0] user_friendsnum = bci_history_item['fields']['user_friendsnum'][0] else: fansnum = '' user_weibo_count = '' user_friendsnum = '' else: fansnum = '' user_weibo_count = '' user_friendsnum = '' append_dict['fansnum'] = fansnum append_dict['weibo_count'] = user_weibo_count append_dict['friendsnum'] = user_friendsnum # new_out_portrait_item[3] = fansnum # new_out_portrait_item[6] = user_weibo_count # new_out_portrait_item[-2] = user_friendsnum #new_out_portrait_list.append(new_out_portrait_item) new_out_portrait_list.append(append_dict) iter_count += 1 #print append_dict return new_out_portrait_list # uid,名字,提及次数,粉丝数,注册地,关注数,微博数
def get(self): item_head = [] fuzz_item = [ 'uid', 'nick_name', 'real_name', 'user_location', 'user_email', 'user_birth' ] range_item = ['statusnum', 'fansnum', 'friendsnum'] select_item = ['sex', 'tn', 'sp_type'] data = {} query = [] num = 0 order = [] data['uid'] = request.args.get('q0') data['nick_name'] = request.args.get('q2') data['real_name'] = request.args.get('q3') data['sp_type'] = request.args.get('q4') data['tn'] = request.args.get('tn') data['sex'] = request.args.get('sex') data['user_email'] = request.args.get('q7') data['user_location'] = request.args.get('q12') data['user_birth'] = request.args.get('q13') rank_order = request.args.get('order') for key in range_item: data[key] = {} data['statusnum']['from'] = '0' data['statusnum']['to'] = '100000000' data['fansnum']['from'] = '0' data['fansnum']['to'] = '100000000' data['friendsnum']['from'] = '0' data['friendsnum']['to'] = '100000000' data['statusnum']['from'] = request.args.get('q5') data['statusnum']['to'] = request.args.get('q6') data['fansnum']['from'] = request.args.get('q8') data['fansnum']['to'] = request.args.get('q9') data['friendsnum']['from'] = request.args.get('q10') data['friendsnum']['to'] = request.args.get('q11') size = request.args.get('size') if size == '': size = 100 for key in range_item: if data[key]['from'] == '' and data[key]['to'] != '': data[key]['from'] = '0' if data[key]['from'] != '' and data[key]['to'] == '': data[key]['to'] = '100000000' if rank_order == "0": order = [{'statusnum': {'order': 'desc'}}] if rank_order == "1": order = [{'fansnum': {'order': 'desc'}}] if rank_order == "2": order = [{'friendsnum': {'order': 'desc'}}] if data['tn'] == '2': data['tn'] = '' if data['sex'] == '3': data['sex'] = '' if data['sp_type'] == '0': data['sp_type'] = '' for key in data: if data[key] and key not in range_item: if key in fuzz_item: query.append({'wildcard': {key: "*" + data[key] + '*'}}) num += 1 if key in select_item: if data[key]: query.append({'match': {key: data[key]}}) num += 1 elif data[key]: if data[key]['from'] and data[key]['to']: query.append({ 'range': { key: { "from": data[key]['from'], "to": data[key]['to'] } } }) num += 1 if num > 0: try: source = es.search(index='weibo_user', doc_type='user', body={ 'query': { 'bool': { 'must': query } }, "sort": order, "size": size }) except Exception as e: # TODO handle exception raise e else: source = es.search(index='weibo_user', doc_type='user', body={ 'query': { 'match_all': {} }, "sort": [{ 'statusnum': { 'order': 'desc' } }], "size": 100 }) file_location = dirname(dirname( abspath(__file__))) + '/static/download/test.csv' isflag = 1 csvfile = file(file_location, 'wb') writer = csv.writer(csvfile) for key in source['hits']['hits']: item_content = [] source_content = key['_source'] if isflag: item_head = [key for key in source_content] writer.writerow(item_head) isflag = 0 for key in source_content: item_content.append(self.decode_item(source_content[key])) writer.writerow(item_content) csvfile.close() return json.dumps(source)
def get(self): #q = request.args.get('q') item_content = [] item_head = [] fuzz_item = ['uid', 'nick_name'] data = {} query = [] num = 0 data['uid'] = request.args.get('uidnickname') data['nick_name'] = request.args.get('uidnickname') for key in data: if data[key]: if key in fuzz_item: query.append({'wildcard': {key: "*" + data[key] + '*'}}) num += 1 if num > 0: try: source = es.search(index='weibo_user', doc_type='user', body={ 'query': { 'bool': { 'should': query } }, "sort": [{ 'statusnum': { 'order': 'desc' } }], "size": 100 }) except Exception as e: # TODO handle exception raise e else: source = es.search(index='weibo_user', doc_type='user', body={ 'query': { 'match_all': {} }, "sort": [{ 'statusnum': { 'order': 'desc' } }], "size": 100 }) file_location = dirname(dirname( abspath(__file__))) + '/static/download/test.csv' isflag = 1 csvfile = file(file_location, 'wb') writer = csv.writer(csvfile) for key in source['hits']['hits']: item_content = [] source_content = key['_source'] if isflag: item_head = [key for key in source_content] writer.writerow(item_head) isflag = 0 for key in source_content: item_content.append(self.decode_item(source_content[key])) writer.writerow(item_content) return render_template(self.index_template, source=source, data=data)
def get(self): #q = request.args.get('q') item_content = [] item_head = [] fuzz_item = ['uid', 'nick_name'] data = {} query = [] num = 0 data['uid'] = request.args.get('uidnickname') data['nick_name'] = request.args.get('uidnickname') for key in data: if data[key]: if key in fuzz_item: query.append({'wildcard':{key : "*" + data[key] + '*'}}) num += 1 if num > 0: try: source = es.search( index = 'weibo_user', doc_type = 'user', body = { 'query':{ 'bool':{ 'should':query } }, "sort":[{'statusnum':{'order':'desc'}}], "size" : 100 } ) except Exception as e: # TODO handle exception raise e else: source = es.search( index = 'weibo_user', doc_type = 'user', body = { 'query':{'match_all':{} }, "sort":[{'statusnum':{'order':'desc'}}], "size" : 100 } ) file_location = dirname(dirname(abspath(__file__)))+'/static/download/test.csv' isflag = 1 csvfile = file(file_location, 'wb') writer = csv.writer(csvfile) for key in source['hits']['hits']: item_content = [] source_content = key['_source'] if isflag: item_head = [key for key in source_content] writer.writerow(item_head) isflag = 0 for key in source_content: item_content.append(self.decode_item(source_content[key])) writer.writerow(item_content) return render_template(self.index_template, source=source, data=data)
def get(self): item_head = [] fuzz_item = ['uid', 'nick_name', 'real_name', 'user_location', 'user_email', 'user_birth'] range_item = ['statusnum', 'fansnum', 'friendsnum'] select_item = ['sex', 'tn', 'sp_type'] data = {} query = [] num = 0 order = [] data['uid'] = request.args.get('q0') data['nick_name'] = request.args.get('q2') data['real_name'] = request.args.get('q3') data['sp_type'] = request.args.get('q4') data['tn'] = request.args.get('tn') data['sex'] = request.args.get('sex') data['user_email'] = request.args.get('q7') data['user_location'] = request.args.get('q12') data['user_birth'] = request.args.get('q13') rank_order = request.args.get('order') for key in range_item: data[key] = {} data['statusnum']['from'] = '0' data['statusnum']['to'] = '100000000' data['fansnum']['from'] = '0' data['fansnum']['to'] = '100000000' data['friendsnum']['from'] = '0' data['friendsnum']['to'] = '100000000' data['statusnum']['from'] = request.args.get('q5') data['statusnum']['to'] = request.args.get('q6') data['fansnum']['from'] = request.args.get('q8') data['fansnum']['to'] = request.args.get('q9') data['friendsnum']['from'] = request.args.get('q10') data['friendsnum']['to'] = request.args.get('q11') size = request.args.get('size') if size == '': size = 100 for key in range_item: if data[key]['from'] == '' and data[key]['to'] != '': data[key]['from'] = '0' if data[key]['from'] != '' and data[key]['to'] == '': data[key]['to'] = '100000000' if rank_order == "0": order = [{'statusnum':{'order':'desc'}}] if rank_order == "1": order = [{'fansnum':{'order':'desc'}}] if rank_order == "2": order = [{'friendsnum':{'order':'desc'}}] if data['tn'] == '2': data['tn'] = '' if data['sex'] == '3': data['sex'] = '' if data['sp_type'] == '0': data['sp_type'] = '' for key in data: if data[key] and key not in range_item: if key in fuzz_item: query.append({'wildcard':{key : "*" + data[key] + '*'}}) num += 1 if key in select_item : if data[key]: query.append({'match':{key : data[key]}}) num += 1 elif data[key]: if data[key]['from'] and data[key]['to']: query.append({'range':{key:{"from":data[key]['from'],"to":data[key]['to']}}}) num += 1 if num > 0: try: source = es.search( index = 'weibo_user', doc_type = 'user', body = { 'query':{ 'bool':{ 'must':query } }, "sort":order, "size" : size } ) except Exception as e: # TODO handle exception raise e else: source = es.search( index = 'weibo_user', doc_type = 'user', body = { 'query':{'match_all':{} }, "sort":[{'statusnum':{'order':'desc'}}], "size" : 100 } ) file_location = dirname(dirname(abspath(__file__)))+'/static/download/test.csv' isflag = 1 csvfile = file(file_location, 'wb') writer = csv.writer(csvfile) for key in source['hits']['hits']: item_content = [] source_content = key['_source'] if isflag: item_head = [key for key in source_content] writer.writerow(item_head) isflag = 0 for key in source_content: item_content.append(self.decode_item(source_content[key])) writer.writerow(item_content) csvfile.close() return json.dumps(source)