def get_users(xnr_user_no, nodes=None): print 'xnr_user_no:::', xnr_user_no if not nodes: #print 'get xnr es...' # result = xnr_es.search(index=save_index,doc_type=save_type,body={'query':{'match_all':{}},'size':999999}) # result = result['hits']['hits'] # uids = [i['_source']['uid'] for i in result] uids = create_xnr_targetuser(xnr_user_no) else: # print 'have input nodes...' uids = nodes print 'uids:::', len(uids) if uids: #print 'uid!!!!!!',uids print '#####' uid_list = [uids[i:i + 100] for i in xrange(0, len(uids), 100)] retweet_result = [] for uid_item in uid_list: try: retweet_result_temp = user_es.mget(index=retweet_index, doc_type=retweet_type, body={'ids': uid_item}, _source=True)['docs'] retweet_result.extend(retweet_result_temp) except: print 'uid error!!' # comment_result = user_es.mget(index=comment_index, doc_type=comment_type,body={'ids':uids}, _source=True)['docs'] # print 'retweet_index::',retweet_index # print 'retweet_result:::',retweet_result # print 'comment_result:::',comment_result G = nx.Graph() # retweet_result.sort(key=lambda k:(json.loads(k['_source']['uid_retweet']).getvalues()),reverse=True) for i in retweet_result: # print 'i:',i if not i['found']: continue uid_retweet = json.loads(i['_source']['uid_retweet']) max_count = max([int(n) for n in uid_retweet.values()]) G.add_weighted_edges_from([ (i['_source']['uid'], j, float(uid_retweet[j]) / max_count) for j in uid_retweet.keys() if j != i['_source']['uid'] and j and i['_source']['uid'] ]) # for i in comment_result: # print 'comment_i:',i # if not i['found']: # continue # uid_comment = json.loads(i['_source']['uid_comment']) # max_count = max([int(n) for n in uid_comment.values()]) # G.add_weighted_edges_from([(i['_source']['uid'],j,float(uid_comment[j])/max_count) for j in uid_comment.keys() if j != i['_source']['uid'] and j and i['_source']['uid']]) #print 'GGG:::',G.number_of_nodes(),G.number_of_edges() # print 'G_nodes::',G.neighbors(2) print 'G has compelete!' else: G = nx.Graph() return G
def get_users(xnr_user_no, nodes=None): if not nodes: print 'get xnr es...' # result = xnr_es.search(index=save_index,doc_type=save_type,body={'query':{'match_all':{}},'size':999999}) # result = result['hits']['hits'] # uids = [i['_source']['uid'] for i in result] uids = create_xnr_targetuser(xnr_user_no) else: print 'have input nodes...' uids = nodes retweet_result = user_es.mget(index=retweet_index, doc_type=retweet_type, body={'ids': uids}, _source=True)['docs'] comment_result = user_es.mget(index=comment_index, doc_type=comment_type, body={'ids': uids}, _source=True)['docs'] G = nx.Graph() for i in retweet_result: # print 'i:',i if not i['found']: continue uid_retweet = json.loads(i['_source']['uid_retweet']) max_count = max([int(n) for n in uid_retweet.values()]) G.add_weighted_edges_from([ (i['_source']['uid'], j, float(uid_retweet[j]) / max_count) for j in uid_retweet.keys() if j != i['_source']['uid'] and j and i['_source']['uid'] ]) for i in comment_result: # print 'comment_i:',i if not i['found']: continue uid_comment = json.loads(i['_source']['uid_comment']) max_count = max([int(n) for n in uid_comment.values()]) G.add_weighted_edges_from([ (i['_source']['uid'], j, float(uid_comment[j]) / max_count) for j in uid_comment.keys() if j != i['_source']['uid'] and j and i['_source']['uid'] ]) return G