Example #1
0
File: utils.py Project: SwoJa/ruman
def get_person_value(uid):
    #认证类型
    #print es_user_profile,profile_index_name,profile_index_type,uid
    try:
        value_static = es_bci_history.get(index = bci_history_index_name,doc_type = bci_history_index_type,id=uid)
        value_inf = es_user_portrait.get(index = portrait_index_name,doc_type = portrait_index_type,id=uid)
        static = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)
    except:
        return 'no'
    fans_max = es_bci_history.search(index = bci_history_index_name,doc_type = bci_history_index_type,body={'query':{'match_all':{}},'sort':{'user_fansnum':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['user_fansnum']
    print 'max:',fans_max
    #print static['found']
    if static['found']==False:
        return 'no'
    else:
        static = static['_source']
    #print "static",static
    try:
        ver_calue = verified_value[static['verified_type']]
    except:
        ver_calue = 0
    #账号创建时间
    times = math.ceil((time.time()-int(static['create_at']))/31536000)
    #粉丝数
    #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source']
    fans_value = math.log(float(value_static['_source']['user_fansnum'])/float(fans_max)*9+1,10)
    #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4
    #if fans_value>1:
    #    fans_value=1.0
    influence_max = es_user_portrait.search(index = portrait_index_name,doc_type = portrait_index_type,body={'query':{'match_all':{}},'sort':{'influence':{'order':'desc'}},'size':1})['hits']['hits'][0]['_source']['influence']
    influence_value = float(value_inf['_source']['influence'])/float(influence_max)
    final= (ver_calue*0.1+times*0.05+fans_value+influence_value*1.2)*30
    print ver_calue,times,fans_value,influence_value
    return final
Example #2
0
File: utils.py Project: SwoJa/ruman
def get_subopinion_new(topic,start_ts,end_ts,sort_item='timestamp'):
    # query_body = {
    #     'query':{
    #         'filtered':{
    #             'filter':{
    #                 'bool':{
    #                     'must':[
    #                         {'match_phrase':{'name':topic}}
    #                     ]
    #                 }
    #             }
    #         }
    #     }
    # }
    query_body = {
        "query": {
          "match_phrase": {
            "name": topic
          }
        }
    }
    # print query_body
    results = []
    print topic
    weibos = weibo_es.get(index=subopinion_index_name,id=topic,doc_type="text")['_source']

    print weibos
    for k,weibo in weibos.items():
        if k != 'start_ts' and k !='end_ts':
            result = {}
            weibo_dict ={}
    
            raw_keys=weibo['keys']
            raw_keys = raw_keys.split('_')
            result['keys']=get_mode(raw_keys)
    
            weibos_texts = json.loads(weibo['cluster_dump_dict'])
            for weibo_text in weibos_texts.values():#jln0825
                weibo_text = weibo_text[0]
                weibo_content = {}
                weibo_content['text'] = weibo_text['text'] 
                weibo_content['uid'] = weibo_text['uid']
                weibo_content['timestamp'] = full_datetime2ts(weibo_text['datetime'])
                weibo_content['comment'] = weibo_text['comment']
                weibo_content['retweeted'] = weibo_text['retweeted']
                weibo_content['mid'] = weibo_text['id']
                try:
                    user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                    weibo_content['uname'] = user['nick_name']
                    weibo_content['photo_url'] = user['photo_url']
                except:
                    weibo_content['uname'] = 'unknown'
                    weibo_content['photo_url'] = 'unknown'
                weibo_dict[weibo_content['mid']] = weibo_content
    
            result['weibos'] = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
            results.append(result)


    return results
Example #3
0
File: utils.py Project: SwoJa/ruman
def get_weibo_by_time(topic,start_ts,end_ts,sort_item='timestamp'):
    print topic,start_ts,end_ts,weibo_es
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'range':{'timestamp':{'lte':int(end_ts),'gte':int(start_ts)}}}
                ]
            }
        },
        'size':200,
        'sort':{sort_item:{'order':'desc'}}
    }
    items = weibo_es.search(index=topic,body=query_body)['hits']['hits']
    #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all()
    weibo_dict = {}
    if items:
        for item in items:  
            #print item,type(item)
            #mtype = item.mtype
            #weibos = _json_loads(item.weibos)
            weibo = item['_source']
            #print mtype
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = weibo['timestamp']
            #weibo_content['sentiment'] = weibo['sentiment'] 
            try:
                weibo_content['comment'] = weibo['comment']
            except:
                weibo_content['comment'] = 0
            try:
                weibo_content['retweeted'] = weibo['retweeted']
            except:
                weibo_content['retweeted'] = 0
            try:
                user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            #weibo_content['keywords'] = weibo['keywords_dict']
            weibo_content['mid'] = weibo['mid']
            #print weibo_content
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=1)

    else:
        results = []   
        #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)
        #for result in results:
            #print result
    return results
Example #4
0
File: utils.py Project: SwoJa/ruman
def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    print 'opinion:::::::::',opinion
    print 'topic:::::::;:::',topic
    print type(start_ts),type(end_ts)
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'wildcard':{'keys':opinion}},
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }  #没有查到uid   每次的id不一样   
    print query_body
    weibos = weibo_es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    if weibos:
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():#jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
        #print results
        return results
    else:
        return 'no results'
Example #5
0
def get_weibo_content(topic, start_ts, end_ts, sort_item='timestamp', sen=0):
    #按时间、转发量、情绪类型  对微博排序
    items = db.session.query(SentimentWeibos).filter(SentimentWeibos.end>start_ts, \
                                                    SentimentWeibos.end<=end_ts, \
                                                    SentimentWeibos.query==topic,\
                                                    SentimentWeibos.sentiment==sen).all()
    weibo_dict = {}
    for item in items:
        weibos = _json_loads(item.weibos)
        ori_text = set()
        for weibo in weibos:
            try:
                a = weibo['text']
            except:
                continue
            if weibo['text'] not in ori_text:
                ori_text.add(weibo['text'])

                weibo_content = {}
                weibo_content['text'] = weibo['text']
                weibo_content['uid'] = weibo['uid']
                weibo_content['timestamp'] = weibo['timestamp']
                weibo_content['sentiment'] = weibo['sentiment']
                weibo_content['comment'] = weibo['comment']
                weibo_content['retweeted'] = weibo['retweeted']
                weibo_content['keywords'] = weibo['keywords_dict']
                weibo_content['mid'] = weibo['mid']
                try:
                    user = es_user_profile.get(
                        index=profile_index_name,
                        doc_type=profile_index_type,
                        id=weibo_content['uid'])['_source']
                    weibo_content['uname'] = user['nick_name']
                    weibo_content['photo_url'] = user['photo_url']
                except:
                    weibo_content['uname'] = 'unknown'
                    weibo_content['photo_url'] = 'unknown'
                weibo_dict[weibo_content['mid']] = weibo_content

    results = sorted(weibo_dict.items(),
                     key=lambda x: x[1][sort_item],
                     reverse=True)
    #print results
    db.session.close()
    return results
Example #6
0
File: utils.py Project: SwoJa/ruman
def get_weibo_content(topic,start_ts,end_ts,province,sort_item='timestamp',unit=Fifteenminutes):
    city = {}
    #print province.encode('utf8')
    # item = db.session.query(ProvinceWeibos).filter(ProvinceWeibos.end == 1468495800).all()
    # for i in item:
    #     print i.province.encode('utf8')  ###结果是unicode
    if (end_ts - start_ts < unit):
        upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit)
        items = db.session.query(ProvinceWeibos).filter(ProvinceWeibos.end==upbound, \
                                                        ProvinceWeibos.province == province,\
                                                       ProvinceWeibos.topic==topic).all()
    else:
        upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit)

        lowbound = long((start_ts / unit) * unit)
        items = db.session.query(ProvinceWeibos).filter(ProvinceWeibos.end>lowbound, \
                                                         ProvinceWeibos.end<=upbound, \
                                                        ProvinceWeibos.province == province,\
                                                         ProvinceWeibos.topic==topic).all()
    weibo_dict = {}
    for item in items: 
        weibo = _json_loads(item.weibos)
        #for weibo in weibos:
        weibo_content = {}
        weibo_content['text'] = weibo['_source']['text'] 
        weibo_content['uid'] = weibo['_source']['uid']
        weibo_content['timestamp'] = weibo['_source']['timestamp']
        weibo_content['sentiment'] = weibo['_source']['sentiment'] 
        weibo_content['comment'] = weibo['_source']['comment']
        weibo_content['retweeted'] = weibo['_source']['retweeted']
        weibo_content['keywords'] = weibo['_source']['keywords_dict']
        weibo_content['mid'] = weibo['_source']['mid']
        try:
            user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
            weibo_content['uname'] = user['nick_name']
            weibo_content['photo_url'] = user['photo_url']
        except:
            weibo_content['uname'] = 'unknown'
            weibo_content['photo_url'] = 'unknown'
        weibo_dict[weibo_content['mid']] = weibo_content
    results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
    #print results
    return results
Example #7
0
File: utils.py Project: SwoJa/ruman
def get_weibo_content(topic,start_ts,end_ts,sort_item='timestamp',sen=0):
    #按时间、转发量、情绪类型  对微博排序
    items = db.session.query(SentimentWeibos).filter(SentimentWeibos.end>start_ts, \
                                                    SentimentWeibos.end<=end_ts, \
                                                    SentimentWeibos.query==topic,\
                                                    SentimentWeibos.sentiment==sen).all()
    weibo_dict = {}
    for item in items:          
        weibos = _json_loads(item.weibos)
        ori_text = set()
        for weibo in weibos:
            try:
                a = weibo['text']
            except:
                continue
            if weibo['text'] not in ori_text:
                ori_text.add(weibo['text'])
                
                weibo_content = {}
                weibo_content['text'] = weibo['text'] 
                weibo_content['uid'] = weibo['uid']
                weibo_content['timestamp'] = weibo['timestamp']
                weibo_content['sentiment'] = weibo['sentiment'] 
                weibo_content['comment'] = weibo['comment']
                weibo_content['retweeted'] = weibo['retweeted']
                weibo_content['keywords'] = weibo['keywords_dict']
                weibo_content['mid'] = weibo['mid']
                try:
                    user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                    weibo_content['uname'] = user['nick_name']
                    weibo_content['photo_url'] = user['photo_url']
                except:
                    weibo_content['uname'] = 'unknown'
                    weibo_content['photo_url'] = 'unknown'
                weibo_dict[weibo_content['mid']] = weibo_content

    results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
    #print results
    return results
Example #8
0
def get_person_value(uid):
    #认证类型
    #print es_user_profile,profile_index_name,profile_index_type,uid
    try:
        value_static = es_bci_history.get(index=bci_history_index_name,
                                          doc_type=bci_history_index_type,
                                          id=uid)
        value_inf = es_user_portrait.get(index=portrait_index_name,
                                         doc_type=portrait_index_type,
                                         id=uid)
        static = es_user_profile.get(index=profile_index_name,
                                     doc_type=profile_index_type,
                                     id=uid)
    except:
        return 'no'
    fans_max = es_bci_history.search(
        index=bci_history_index_name,
        doc_type=bci_history_index_type,
        body={
            'query': {
                'match_all': {}
            },
            'sort': {
                'user_fansnum': {
                    'order': 'desc'
                }
            },
            'size': 1
        })['hits']['hits'][0]['_source']['user_fansnum']
    print 'max:', fans_max
    #print static['found']
    if static['found'] == False:
        return 'no'
    else:
        static = static['_source']
    #print "static",static
    try:
        ver_calue = verified_value[static['verified_type']]
    except:
        ver_calue = 0
    #账号创建时间
    times = math.ceil((time.time() - int(static['create_at'])) / 31536000)
    #粉丝数
    #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source']
    fans_value = math.log(
        float(value_static['_source']['user_fansnum']) / float(fans_max) * 9 +
        1, 10)
    #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4
    #if fans_value>1:
    #    fans_value=1.0
    influence_max = es_user_portrait.search(
        index=portrait_index_name,
        doc_type=portrait_index_type,
        body={
            'query': {
                'match_all': {}
            },
            'sort': {
                'influence': {
                    'order': 'desc'
                }
            },
            'size': 1
        })['hits']['hits'][0]['_source']['influence']
    influence_value = float(
        value_inf['_source']['influence']) / float(influence_max)
    final = (ver_calue * 0.1 + times * 0.05 + fans_value +
             influence_value * 1.2) * 30
    print ver_calue, times, fans_value, influence_value
    return final
Example #9
0
def get_weibo_content(topic,
                      start_ts,
                      end_ts,
                      opinion,
                      sort_item='timestamp'):  #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    print 'opinion:::::::::', opinion
    print 'topic:::::::;:::', topic
    print type(start_ts), type(end_ts)
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'wildcard': {
                        'keys': opinion
                    }
                }, {
                    'term': {
                        'name': topic
                    }
                }, {
                    'range': {
                        'start_ts': {
                            'lte': start_ts
                        }
                    }
                }, {
                    'range': {
                        'end_ts': {
                            'gte': end_ts
                        }
                    }
                }]
            }
        }
    }  #没有查到uid   每次的id不一样
    print query_body
    weibos = weibo_es.search(index=subopinion_index_name,
                             doc_type=subopinion_index_type,
                             body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    if weibos:
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():  #jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text']
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es_user_profile.get(index=profile_index_name,
                                           doc_type=profile_index_type,
                                           id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=True)
        #print results
        return results
    else:
        return 'no results'
Example #10
0
def get_subopinion_new(topic, start_ts, end_ts, sort_item='timestamp'):
    # query_body = {
    #     'query':{
    #         'filtered':{
    #             'filter':{
    #                 'bool':{
    #                     'must':[
    #                         {'match_phrase':{'name':topic}}
    #                     ]
    #                 }
    #             }
    #         }
    #     }
    # }
    query_body = {"query": {"match_phrase": {"name": topic}}}
    # print query_body
    results = []
    print topic
    weibos = weibo_es.get(index=subopinion_index_name,
                          id=topic,
                          doc_type="text")['_source']

    print weibos
    for k, weibo in weibos.items():
        if k != 'start_ts' and k != 'end_ts':
            result = {}
            weibo_dict = {}

            raw_keys = weibo['keys']
            raw_keys = raw_keys.split('_')
            result['keys'] = get_mode(raw_keys)

            weibos_texts = json.loads(weibo['cluster_dump_dict'])
            for weibo_text in weibos_texts.values():  #jln0825
                weibo_text = weibo_text[0]
                weibo_content = {}
                weibo_content['text'] = weibo_text['text']
                weibo_content['uid'] = weibo_text['uid']
                weibo_content['timestamp'] = full_datetime2ts(
                    weibo_text['datetime'])
                weibo_content['comment'] = weibo_text['comment']
                weibo_content['retweeted'] = weibo_text['retweeted']
                weibo_content['mid'] = weibo_text['id']
                try:
                    user = es_user_profile.get(
                        index=profile_index_name,
                        doc_type=profile_index_type,
                        id=weibo_content['uid'])['_source']
                    weibo_content['uname'] = user['nick_name']
                    weibo_content['photo_url'] = user['photo_url']
                except:
                    weibo_content['uname'] = 'unknown'
                    weibo_content['photo_url'] = 'unknown'
                weibo_dict[weibo_content['mid']] = weibo_content

            result['weibos'] = sorted(weibo_dict.items(),
                                      key=lambda x: x[1][sort_item],
                                      reverse=True)
            results.append(result)

    return results
Example #11
0
def get_weibo_by_time(topic, start_ts, end_ts, sort_item='timestamp'):
    # print topic,start_ts,end_ts,weibo_es
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'range': {
                        'timestamp': {
                            'lte': int(end_ts),
                            'gte': int(start_ts)
                        }
                    }
                }]
            }
        },
        'size': 200,
        'sort': {
            sort_item: {
                'order': 'desc'
            }
        }
    }
    items = weibo_es.search(index=topic, body=query_body)['hits']['hits']
    #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all()
    weibo_dict = {}
    if items:
        for item in items:
            #print item,type(item)
            #mtype = item.mtype
            #weibos = _json_loads(item.weibos)
            weibo = item['_source']
            #print mtype
            weibo_content = {}
            weibo_content['text'] = weibo['text']
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = weibo['timestamp']
            #weibo_content['sentiment'] = weibo['sentiment']
            try:
                weibo_content['comment'] = weibo['comment']
            except:
                weibo_content['comment'] = 0
            try:
                weibo_content['retweeted'] = weibo['retweeted']
            except:
                weibo_content['retweeted'] = 0
            try:
                user = es_user_profile.get(index=profile_index_name,
                                           doc_type=profile_index_type,
                                           id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            #weibo_content['keywords'] = weibo['keywords_dict']
            weibo_content['mid'] = weibo['mid']
            #print weibo_content
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=1)

    else:
        results = []
        #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)
        #for result in results:
        #print result
    return results