Ejemplo n.º 1
0
def add_attribute_portrait(uid, attribute_name, attribute_value, submit_user):
    status = False
    # identify the user exist
    # identify the attribute exist
    # identify the attribute exist in user_portrait
    # add attribute in user_portrait
    # submit user should has power to change???without
    try:
        user_result = es.get(index=user_index_name,
                             doc_type=user_index_type,
                             id=uid)['_source']
    except:
        return 'no user'
    try:
        attribute_result = es.get(index=attribute_index_name,
                                  doc_type=attribute_index_type,
                                  id=attribute_name)['_source']
    except:
        return 'no attribute'
    attribute_value_list = attribute_result['attribute_value'].split('&')
    if attribute_value not in attribute_value_list:
        return 'no attribute value'
    if attribute_name in user_result:
        return 'attribute exist'
    add_attribute_dict = {attribute_name: attribute_value}

    es.update(index=user_index_name,
              doc_type=user_index_type,
              id=uid,
              body={'doc': add_attribute_dict})
    status = True
    return status
Ejemplo n.º 2
0
def change_attribute_portrait(uid, attribute_name, attribute_value,
                              submit_user):
    status = False
    #identify the user exist
    #identify the attribute exist
    #identify the attribute value exist
    #identify the submit_user have been admitted----without
    try:
        user_exist = es.get(index=user_index_name,
                            doc_type=user_index_type,
                            id=uid)['_source']
    except:
        return 'no user'
    try:
        attribute_result = es.get(index=attribute_index_name,
                                  doc_type=attribute_index_type,
                                  id=attribute_name)['_source']
    except:
        return 'no attribute'
    value_list = attribute_result['attribute_value'].split('&')
    if attribute_value not in value_list:
        return 'no attribute value'
    change_attribute_dict = {attribute_name: attribute_value}
    es.update(index=user_index_name,
              doc_type=user_index_type,
              id=uid,
              body={'doc': change_attribute_dict})
    status = True
    return status
Ejemplo n.º 3
0
def add_tag2group(uid_list, attribute_name, attribute_value):
    status = False
    #identify the attribute exist
    #for uid in uid_list
    #identify the attribute not in this user
    #add tag to this user
    try:
        attribute_exist = es.get(index=attribute_index_name,
                                 doc_type=attribute_index_type,
                                 id=attribute_name)['_source']
    except:
        return 'no attribute'
    attribute_exist_value_list = attribute_exist['attribute_value'].split('&')
    if attribute_value not in attribute_exist_value_list:
        return 'no attribute value'
    for uid in uid_list:
        try:
            user_exist = es.get(index=user_index_name,
                                doc_type=user_index_type,
                                id=uid)['_source']
        except:
            user_exist = {}
        if user_exist and attribute_name not in user_exist:
            add_attribute_dict = {attribute_name: attribute_value}
            es.update(index=user_index_name,
                      doc_type=user_index_type,
                      id=uid,
                      body={'doc': add_attribute_dict})
    status = True
    return status
Ejemplo n.º 4
0
def add_attribute_portrait(uid, attribute_name, attribute_value, submit_user):
    status = False
    # identify the user exist
    # identify the attribute exist
    # identify the attribute exist in user_portrait
    # add attribute in user_portrait
    # submit user should has power to change???without
    try:
        user_result = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source']
    except:
        return 'no user'
    try:
        attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
    except:
        return 'no attribute'
    attribute_value_list = attribute_result['attribute_value'].split('&')
    if attribute_value not in attribute_value_list:
        return 'no attribute value'
    if attribute_name in user_result:
        return 'attribute exist'
    add_attribute_dict = {attribute_name: attribute_value}
    
    es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc':add_attribute_dict})
    status = True
    return status
def user_type(uid):
    try:
        result = es.get(index='sensitive_user_portrait', doc_type="user", id=uid)['_source']['type']
    except:
        result = ''

    return result
Ejemplo n.º 6
0
def submit_task(input_data):
    status = 0 # mark it can not submit
    task_name = input_data['task_name']
    try:
        result = es.get(index=index_name, doc_type=index_type, id=task_name)
    except:
        status = 1
    
    if status != 0 and 'uid_file' not in input_data:
        r.lpush('group_task', json.dumps(input_data))
        input_data['status'] = 0 # mark the task not compute
        count = len(input_data['uid_list'])
        input_data['count'] = count
        uid_list_string = json.dumps(input_data['uid_list'])
        es.index(index='group_result', doc_type='group', id=task_name, body=input_data)
    elif status != 0 and 'uid_file' in input_data:
        input_data['status'] = 0 # mark the task not compute
        uid_file = input_data['uid_file']
        uid_list = read_uid_file(uid_file)
        input_data['count'] = len(uid_list)
        input_data['uid_list'] = json.dumps(uid_list)
        r.lpush('group_task', json.dumps(input_data))
        es.index(index='group_result', doc_type='group', id=task_name, body=input_data)
        delete_status = delete_uid_file(uid_file)
        if delete_status == 0:
            print 'fail delete uid file'
        elif delete_status == 1:
            print 'success delete uid file'
    return status
Ejemplo n.º 7
0
def get_group_list(task_name):
    results = []
    try:
        es_results = es.get(index=index_name, doc_type=index_type, id=task_name)['_source']
    except:
        return results
    #print 'es_result:', es_results['uid_list'], type(es_results['uid_list'])
    uid_list = es_results['uid_list']
    user_portrait_attribute = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            results.append([uid, uname, gender, location, normal_importance, normal_influence])
        except:
            results.append([uid])
    return results
def identify_task(task_name):
    try:
        task_exist = es.get(index=task_index_name, doc_type=task_index_type, id=task_name)['_source']
    except:
        task_exist = None

    return task_exist
Ejemplo n.º 9
0
def user_type(uid):
    try:
        result = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]["type"]
    except:
        result = ""

    return result
Ejemplo n.º 10
0
def get_group_list(task_name):
    results = []
    try:
        es_results = es.get(index=index_name,
                            doc_type=index_type,
                            id=task_name)['_source']
    except:
        return results
    #print 'es_result:', es_results['uid_list'], type(es_results['uid_list'])
    uid_list = es_results['uid_list']
    user_portrait_attribute = es.mget(index='user_portrait',
                                      doc_type='user',
                                      body={'ids': uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(
                importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(
                influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            results.append([
                uid, uname, gender, location, normal_importance,
                normal_influence
            ])
        except:
            results.append([uid])
    return results
Ejemplo n.º 11
0
def end_track_task(task_name):
    status = 0
    try:
        task_exist = es.get(index=index_name,
                            doc_type=index_type,
                            id=task_name)['_source']
    except:
        return 'task name not exist'
    task_status = task_exist['status']
    if status == '0':
        return 'task have end'
    else:
        task_exist['status'] = 0
        # made end time
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        now_date_ts = datetime2ts(now_date)
        time_segment = int((now_ts - now_date_ts) / 900) + 1
        end_ts = now_date_ts + time_segment * 900
        end_date = ts2date(end_ts)
        task_exist['end_date'] = end_date
        task_user = task_exist['uid_list']
        status = change_user_count(task_user)
        if status == 0:
            return 'change user task count fail'
        else:
            es.index(index=index_name,
                     doc_type=index_type,
                     id=task_name,
                     body=task_exist)
            status = delete_task_redis(task_name)
            if status == 0:
                return 'delete task from redis fail'
            else:
                return 'success change status to end'
Ejemplo n.º 12
0
def submit_task(input_data):
    status = 0  # mark it can not submit
    task_name = input_data['task_name']
    try:
        result = es.get(index=index_name, doc_type=index_type, id=task_name)
    except:
        status = 1

    if status != 0 and 'uid_file' not in input_data:
        r.lpush('group_task', json.dumps(input_data))
        input_data['status'] = 0  # mark the task not compute
        count = len(input_data['uid_list'])
        input_data['count'] = count
        uid_list_string = json.dumps(input_data['uid_list'])
        es.index(index='group_result',
                 doc_type='group',
                 id=task_name,
                 body=input_data)
    elif status != 0 and 'uid_file' in input_data:
        input_data['status'] = 0  # mark the task not compute
        uid_file = input_data['uid_file']
        uid_list = read_uid_file(uid_file)
        input_data['count'] = len(uid_list)
        input_data['uid_list'] = json.dumps(uid_list)
        r.lpush('group_task', json.dumps(input_data))
        es.index(index='group_result',
                 doc_type='group',
                 id=task_name,
                 body=input_data)
        delete_status = delete_uid_file(uid_file)
        if delete_status == 0:
            print 'fail delete uid file'
        elif delete_status == 1:
            print 'success delete uid file'
    return status
def submit_track_task(input_data):
    '''
    step1: identify the task_name is not exist
    step2: index new task_name
    step3: add user list to redis---should identify
    step4: add task to redis queue
    step5: add start_ts to redis hash----monitor_task_time_record
    '''
    status = 0
    task_name = input_data['task_name']
    submit_date = input_data['submit_date']
    try:
        result = es.get(index=index_name, doc_type=index_type, id=task_name)['_source']
        return 'task_name exist'
    except:
        es.index(index=index_name, doc_type=index_type, id=task_name, body=input_data)
        task_user = input_data['uid_list']
        status = add_user_set(task_user)
        if status == 0:
            return 'add user to redis set fail'
        else:
            status = add_task_redis(task_name)
            if status == 0:
                return 'add task to redis fail'
            else:
                status = add_task_record_time(task_name, submit_date)
                if status == 0:
                    return 'add task record time fail'
                else:
                    return 'success submit'
def end_track_task(task_name):
    status = 0
    try:
        task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source']
    except:
        return 'task name not exist'
    task_status = task_exist['status']
    if status == '0':
        return 'task have end'
    else:
        task_exist['status'] = 0
        # made end time
        now_ts = time.time()
        now_date = ts2datetime(now_ts)
        now_date_ts = datetime2ts(now_date)
        time_segment = int((now_ts - now_date_ts) / 900) + 1
        end_ts = now_date_ts + time_segment * 900
        end_date = ts2date(end_ts)
        task_exist['end_date'] = end_date
        task_user = task_exist['uid_list']
        status = change_user_count(task_user)
        if status == 0:
            return 'change user task count fail'
        else:
            es.index(index=index_name, doc_type=index_type, id=task_name, body=task_exist)
            status = delete_task_redis(task_name)
            if status == 0:
                return 'delete task from redis fail'
            else:
                return 'success change status to end'
Ejemplo n.º 15
0
def identify_uid_in(uid):
    result = []
    search_result = es.get(index='sensitive_user_portrait',
                           doc_type="user",
                           id=uid)['found']

    return search_result
Ejemplo n.º 16
0
def submit_attribute(attribute_name, attribute_value, submit_user,
                     submit_date):
    status = False
    #maybe there have to identify the user admitted to submit attribute
    try:
        attribute_exist = es.get(index=attribute_index_name,
                                 doc_type=attribute_index_type,
                                 id=attribute_name)['docs']
    except:
        attribute_exist = {}
    try:
        source = attribute_exist['_source']
    except:
        input_data = dict()
        now_ts = time.time()
        date = ts2datetime(now_ts)
        input_data['attribute_name'] = attribute_name
        input_data['attribute_value'] = '&'.join(attribute_value.split(','))
        input_data['user'] = submit_user
        input_data['date'] = submit_date
        es.index(index=attribute_index_name,
                 doc_type=attribute_index_type,
                 id=attribute_name,
                 body=input_data)
        status = True
    return status
Ejemplo n.º 17
0
def submit_track_task(input_data):
    '''
    step1: identify the task_name is not exist
    step2: index new task_name
    step3: add user list to redis---should identify
    step4: add task to redis queue
    step5: add start_ts to redis hash----monitor_task_time_record
    '''
    status = 0
    task_name = input_data['task_name']
    submit_date = input_data['submit_date']
    try:
        result = es.get(index=index_name, doc_type=index_type,
                        id=task_name)['_source']
        return 'task_name exist'
    except:
        es.index(index=index_name,
                 doc_type=index_type,
                 id=task_name,
                 body=input_data)
        task_user = input_data['uid_list']
        status = add_user_set(task_user)
        if status == 0:
            return 'add user to redis set fail'
        else:
            status = add_task_redis(task_name)
            if status == 0:
                return 'add task to redis fail'
            else:
                status = add_task_record_time(task_name, submit_date)
                if status == 0:
                    return 'add task record time fail'
                else:
                    return 'success submit'
Ejemplo n.º 18
0
def delete_attribute(attribute_name):
    status = False
    try:
        result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
        print 'result:', result
    except Exception, e:
        raise e
        return status
Ejemplo n.º 19
0
def sort_sensitive_text(uid):
    sensitive_text = search_sensitive_text(uid)
    text_all = []
    if sensitive_text:
        for item in sensitive_text:
            text_detail = []
            item = item["_source"]
            if not item["sensitive"]:
                continue
            text = item["text"].encode("utf-8", "ignore")
            sentiment_dict = json.loads(item["sentiment"])
            if not sentiment_dict:
                sentiment = 0
            else:
                positive = len(sentiment_dict.get("126", {}))
                negetive = (
                    len(sentiment_dict.get("127", {}))
                    + len(sentiment_dict.get("128", {}))
                    + len(sentiment_dict.get("129", {}))
                )
                if positive > negetive:
                    sentiment = 1
                elif positive < negetive:
                    sentiment = -1
                else:
                    sentiment = 0
            ts = item["timestamp"]
            uid = item["uid"]
            mid = item["mid"]
            message_type = item.get("message_type", 0)
            date = ts2datetime(float(ts)).replace("-", "")
            try:
                bci_result = es.get(index=date, doc_type="bci", id=uid)["_source"]
                if int(message_type) == 1:
                    retweeted_number = bci_result["s_origin_weibo_retweeted_detail"].get(mid)
                    comment_number = bci_result["s_origin_weibo_comment_detail"].get(mid)
                elif int(message_type) == 2:
                    retweeted_number = bci_result["s_retweeted_weibo_retweeted_detail"].get(mid)
                    comment_number = bci_result["s_retweeted_weibo_comment_detail"].get(mid)
                else:
                    retweeted_number = 0
                    comment_number = 0
            except:
                retweeted_number = 0
                comment_number = 0
            single_sw = item.get("sensitive_words", {})
            if single_sw:
                sw = json.loads(single_sw).keys()
            else:
                # print item
                sw = []
            geo = item["geo"]
            retweeted_link = extract_uname(text)
            text_detail.extend(
                [ts, geo, text, sw, retweeted_link, sentiment, message_type, retweeted_number, comment_number]
            )
            text_all.append(text_detail)
    return text_all
Ejemplo n.º 20
0
def user_type(uid):
    try:
        result = es.get(index='sensitive_user_portrait',
                        doc_type="user",
                        id=uid)['_source']['type']
    except:
        result = ''

    return result
Ejemplo n.º 21
0
def get_attribute_value(attribute_name):
    attribute_value_list = []
    try:
        attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
    except:
        return 'no attribute'
    print 'attribute_result:', attribute_result
    attribute_value_string = attribute_result['attribute_value']
    attribute_value_list = attribute_value_string.split('&')
    return attribute_value_list
Ejemplo n.º 22
0
def delete_attribute(attribute_name):
    status = False
    try:
        result = es.get(index=attribute_index_name,
                        doc_type=attribute_index_type,
                        id=attribute_name)['_source']
        print 'result:', result
    except Exception, e:
        raise e
        return status
Ejemplo n.º 23
0
def get_attribute_value(attribute_name):
    attribute_value_list = []
    try:
        attribute_result = es.get(index=attribute_index_name,
                                  doc_type=attribute_index_type,
                                  id=attribute_name)['_source']
    except:
        return 'no attribute'
    print 'attribute_result:', attribute_result
    attribute_value_string = attribute_result['attribute_value']
    attribute_value_list = attribute_value_string.split('&')
    return attribute_value_list
Ejemplo n.º 24
0
def change_attribute_portrait(uid, attribute_name, attribute_value, submit_user):
    status = False
    #identify the user exist
    #identify the attribute exist
    #identify the attribute value exist
    #identify the submit_user have been admitted----without 
    try:
        user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source']
    except:
        return 'no user'
    try:
        attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
    except:
        return 'no attribute'
    value_list = attribute_result['attribute_value'].split('&')
    if attribute_value not in value_list:
        return 'no attribute value'
    change_attribute_dict = {attribute_name: attribute_value}
    es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc': change_attribute_dict})
    status = True
    return status
def sort_sensitive_text(uid):
    sensitive_text = search_sensitive_text(uid)
    text_all = []
    if sensitive_text:
        for item in sensitive_text:
            text_detail = []
            item = item['_source']
            if not item['sensitive']:
                continue
            text = item['text'].encode('utf-8', 'ignore')
            sentiment_dict = json.loads(item['sentiment'])
            if not sentiment_dict:
                sentiment = 0
            else:
                positive = len(sentiment_dict.get('126', {}))
                negetive = len(sentiment_dict.get('127', {})) + len(sentiment_dict.get('128', {})) + len(sentiment_dict.get('129', {}))
                if positive > negetive:
                    sentiment = 1
                elif positive < negetive:
                    sentiment = -1
                else:
                    sentiment = 0
            ts =item['timestamp']
            uid = item['uid']
            mid = item['mid']
            message_type = item.get('message_type', 0)
            date = ts2datetime(float(ts)).replace('-', '')
            try:
                bci_result = es.get(index=date, doc_type='bci', id=uid)['_source']
                if int(message_type) == 1:
                    retweeted_number = bci_result['s_origin_weibo_retweeted_detail'].get(mid)
                    comment_number = bci_result['s_origin_weibo_comment_detail'].get(mid)
                elif int(message_type) == 2:
                    retweeted_number = bci_result['s_retweeted_weibo_retweeted_detail'].get(mid)
                    comment_number = bci_result['s_retweeted_weibo_comment_detail'].get(mid)
                else:
                    retweeted_number = 0
                    comment_number = 0
            except:
                retweeted_number = 0
                comment_number = 0
            single_sw = item.get('sensitive_words', {})
            if single_sw:
                sw = json.loads(single_sw).keys()
            else:
                # print item
                sw = []
            geo = item['geo']
            retweeted_link = extract_uname(text)
            text_detail.extend([ts, geo, text, sw, retweeted_link, sentiment, message_type, retweeted_number, comment_number])
            text_all.append(text_detail)
    return text_all
Ejemplo n.º 26
0
def get_user_attribute_name(uid):
    result = []
    user_result = es.get(index=user_index_name, doc_type=user_index_type, \
                        id=uid)
    print 'user_result:', user_result

    try:
        source = user_result['_source']
    except:
        source = {}
    for key in source:
        if key not in identify_attribute_list:
            result.append(key)
    return result
Ejemplo n.º 27
0
def get_user_attribute_name(uid):
    result = []
    user_result = es.get(index=user_index_name, doc_type=user_index_type, \
                        id=uid)
    print 'user_result:', user_result

    try:
        source = user_result['_source']
    except:
        source = {}
    for key in source:
        if key not in identify_attribute_list:
            result.append(key)
    return result
def get_sensitive_word(task_name, timestamp):
    #step1: get task user
    #step2: get sensitive word from mid-result by condition: task_name, timestamp
    task_exist = identify_task(task_name)
    if not task_exist:
        return 'the task is not exist'
    try:
        task_mid_result = es.get(index=monitor_index_name, doc_type=task_name, id=str(timestamp))['_source']
    except:
        result = None
    sensitive_word_dict = json.loads(task_mid_result['sensitive_word'])
    sort_sensitive_word = sorted(sensitive_word_dict.items(), key=lambda x:x[1], reverse=True)

    return sort_sensitive_word
Ejemplo n.º 29
0
def add_tag2group(uid_list, attribute_name, attribute_value):
    status = False
    #identify the attribute exist
    #for uid in uid_list
    #identify the attribute not in this user
    #add tag to this user
    try:
        attribute_exist = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
    except:
        return 'no attribute'
    attribute_exist_value_list = attribute_exist['attribute_value'].split('&')
    if attribute_value not in attribute_exist_value_list:
        return 'no attribute value'
    for uid in uid_list:
        try:
            user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source']
        except:
            user_exist = {}
        if user_exist and attribute_name not in user_exist:
            add_attribute_dict = {attribute_name: attribute_value}
            es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc':add_attribute_dict})
    status = True
    return status
def get_network(task_exist):
    task_name = task_exist['task_name']
    submit_date = task_exist['submit_date']
    submit_ts = date2ts(submit_date)

    time_segment = 24*3600
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    now_date_ts = datetime2ts(now_date)
    #test
    now_date_ts = datetime2ts('2013-09-07')
    iter_date_ts = now_date_ts
    iter_count = 1
    date_list = []
    top_list_dict = {}
    while True:
        if iter_count >= 8 or iter_date_ts < submit_ts:
            break
        iter_date = ts2datetime(iter_date_ts)
        date_list.append(iter_date)
        key = 'inner_' + str(iter_date)
        try:
            task_date_result = es.get(index=monitor_index_name, doc_type=task_name, id=key)['_source']
        except:
            task_date_result = {}
        #print 'task_name, key, task_date_result:', task_name, key, task_date_result
        iter_field = ['top1', 'top2', 'top3', 'top4', 'top5']
        for field in iter_field:
            user_count_item = json.loads(task_date_result[field])
            uid = user_count_item[0]
            uname = uid2uname(uid)
            count = user_count_item[1]
            try:
                top_list_dict[field].append([uid, uname, count])
            except:
                top_list_dict[field] = [[uid, uname, count]]
        
        iter_date_ts -= time_segment
        # get inner-retweet group from es---field: inner_graph
        '''
        try:
            inner_graph = json.loads(task_date_result['inner_graph'])
        except:
            inner_graph = {}
        '''

    abnormal_index = compute_inner_polarization(top_list_dict)
    
    return [date_list, top_list_dict, abnormal_index]
Ejemplo n.º 31
0
def delete_attribute_portrait(uid, attribute_name, submit_user):
    status = False
    #identify the user exist
    #identify the attribute value exist in es_user_portrait
    #identify the submit_user have been admitted---without
    try:
        user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source']
    except:
        return 'no user'
    if attribute_name not in user_exist:
        return 'user have no attribtue'
    try:
        del_attribute_value = user_exist.pop(attribute_name)
        es.index(index=user_index_name, doc_type=user_index_type, id=uid, body=user_exist)
        status = True
    except Exception, e:
        raise e
Ejemplo n.º 32
0
def change_attribute(attribute_name, value, user, state):
    status = False
    # identify the attribute_name is in ES - custom attribute
    try:
        result =  es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
    except:
        result = None
        return status
    value_list = '&'.join(value.split(','))
    result['attribute_name'] = attribute_name
    result['attribute_value'] = value_list
    result['user'] = user
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    result['date'] = now_date
    es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name ,body=result)
    status = True
    return status
Ejemplo n.º 33
0
def get_group_tag(group_name):
    result = {}
    order_result = []
    #get group task uid list
    #get user tag
    #statistic tag
    try:
        group_task_result = es.get(index=group_index_name, doc_type=group_index_type, id=group_name)
    except:
        return 'no group task'
    try:
        uid_list = group_task_result['_source']['uid_list']
    except:
        return 'no user'
    try:
        user_result = es.mget(index=user_index_name, doc_type=user_index_type, body={'ids': uid_list})['docs']
    except Exception, e:
        raise e
def delete_track_task(task_name):
    status = 0
    try:
        task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source']
    except:
        return 'task not exist'
    task_user = task_exist['uid_list']
    #change the user task_count in redis set
    #status = change_user_count(task_user)
    status = 1
    if status==0:
        return 'change user count fail'
    else:
        #delete task from es
        result = es.delete(index=index_name, doc_type=index_type, id=task_name)
        status = delete_task_redis(task_name)
        if status == 0:
            return 'delete task from redis fail'
        else:
            return 'success delete task'
Ejemplo n.º 35
0
def submit_attribute(attribute_name, attribute_value, submit_user, submit_date):
    status = False
    #maybe there have to identify the user admitted to submit attribute
    try:
        attribute_exist = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['docs']
    except:
        attribute_exist = {}
    try:
        source = attribute_exist['_source']
    except:
        input_data = dict()
        now_ts = time.time()
        date = ts2datetime(now_ts)
        input_data['attribute_name'] = attribute_name
        input_data['attribute_value'] = '&'.join(attribute_value.split(','))
        input_data['user'] = submit_user
        input_data['date'] = submit_date
        es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=input_data)
        status = True
    return status
Ejemplo n.º 36
0
def delete_attribute_portrait(uid, attribute_name, submit_user):
    status = False
    #identify the user exist
    #identify the attribute value exist in es_user_portrait
    #identify the submit_user have been admitted---without
    try:
        user_exist = es.get(index=user_index_name,
                            doc_type=user_index_type,
                            id=uid)['_source']
    except:
        return 'no user'
    if attribute_name not in user_exist:
        return 'user have no attribtue'
    try:
        del_attribute_value = user_exist.pop(attribute_name)
        es.index(index=user_index_name,
                 doc_type=user_index_type,
                 id=uid,
                 body=user_exist)
        status = True
    except Exception, e:
        raise e
Ejemplo n.º 37
0
def get_group_tag(group_name):
    result = {}
    order_result = []
    #get group task uid list
    #get user tag
    #statistic tag
    try:
        group_task_result = es.get(index=group_index_name,
                                   doc_type=group_index_type,
                                   id=group_name)
    except:
        return 'no group task'
    try:
        uid_list = group_task_result['_source']['uid_list']
    except:
        return 'no user'
    try:
        user_result = es.mget(index=user_index_name,
                              doc_type=user_index_type,
                              body={'ids': uid_list})['docs']
    except Exception, e:
        raise e
Ejemplo n.º 38
0
def delete_track_task(task_name):
    status = 0
    try:
        task_exist = es.get(index=index_name,
                            doc_type=index_type,
                            id=task_name)['_source']
    except:
        return 'task not exist'
    task_user = task_exist['uid_list']
    #change the user task_count in redis set
    #status = change_user_count(task_user)
    status = 1
    if status == 0:
        return 'change user count fail'
    else:
        #delete task from es
        result = es.delete(index=index_name, doc_type=index_type, id=task_name)
        status = delete_task_redis(task_name)
        if status == 0:
            return 'delete task from redis fail'
        else:
            return 'success delete task'
Ejemplo n.º 39
0
def change_attribute(attribute_name, value, user, state):
    status = False
    # identify the attribute_name is in ES - custom attribute
    try:
        result = es.get(index=attribute_index_name,
                        doc_type=attribute_index_type,
                        id=attribute_name)['_source']
    except:
        result = None
        return status
    value_list = '&'.join(value.split(','))
    result['attribute_name'] = attribute_name
    result['attribute_value'] = value_list
    result['user'] = user
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    result['date'] = now_date
    es.index(index=attribute_index_name,
             doc_type=attribute_index_type,
             id=attribute_name,
             body=result)
    status = True
    return status
Ejemplo n.º 40
0
def ajax_sort_sensitive_words():
    level_order = request.args.get("level", "")  # 0:all, 1:level 1, 2:level2, 3:level3
    category_order = request.args.get("category", "")  # '': all
    uid = request.args.get("uid", "")
    words_dict = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]["sensitive_words_dict"]
    words_dict = json.loads(words_dict)
    all_words_dict = dict()
    for v in words_dict.values():
        for key in v:
            if all_words_dict.has_key(key):
                all_words_dict[key] += v[key]
            else:
                all_words_dict[key] = v[key]
    sorted_words = sorted(all_words_dict.items(), key=lambda x: x[1], reverse=True)
    new_words_list = sort_sensitive_words(sorted_words)
    print new_words_list
    if 1:
        level_1 = []
        level_2 = []
        level_3 = []
        for item in new_words_list:
            if int(item[2]) == 1:
                if not category_order:
                    level_1.append(item)
                else:
                    if item[3] == category_order:
                        level_1.append(item)
                    else:
                        pass
            elif int(item[2]) == 2:
                if not category_order:
                    level_2.append(item)
                else:
                    if item[3] == category_order:
                        level_2.append(item)
                    else:
                        pass
            elif int(item[2]) == 3:
                if not category_order:
                    level_3.append(item)
                else:
                    if item[3] == category_order:
                        level_3.append(item)
                    else:
                        pass
    new_list = []
    if int(level_order) == 0:
        if not category_order:
            return json.dumps(new_words_list)
        else:
            new_list.extend(level_1)
            new_list.extend(level_2)
            new_list.extend(level_3)
    elif int(level_order) == 1:
        new_list = level_1
    elif int(level_order) == 2:
        new_list = level_2
    else:
        new_list = level_3

    return json.dumps(new_list)
def identify_uid_in(uid):
    result= []
    search_result = es.get(index='sensitive_user_portrait', doc_type="user", id=uid)['found']

    return search_result
Ejemplo n.º 42
0
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results['utype'] = 0
        return results
    results['utype'] = 1

    results['uid'] = uid
    portrait_result = es.get(index='sensitive_user_portrait',
                             doc_type='user',
                             id=uid)['_source']
    results['uname'] = portrait_result['uname']
    if portrait_result['uname'] == 0:
        results['uname'] = 'unknown'
    if portrait_result['photo_url'] == 0:
        portrait_result['photo_url'] = 'unknown'
    if portrait_result['location'] == 0:
        portrait_result['location'] = 'unknown'
    results['photo_url'] = portrait_result['photo_url']

    # sensitive weibo number statistics
    date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
    date = '20130907'  # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type='bci',
                                   id=uid)['_source']
        results['sensitive_origin_weibo_number'] = influence_results.get(
            's_origin_weibo_number', 0)
        results['sensitive_retweeted_weibo_number'] = influence_results.get(
            's_retweeted_weibo_number', 0)
        results['sensitive_comment_weibo_number'] = int(
            influence_results.get('s_comment_weibo_number', 0))
        results[
            'sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get(
                's_retweeted_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_retweeted_total_number'] = influence_results.get(
                's_origin_weibo_retweeted_total_number', 0)
        results[
            'sensitive_origin_weibo_comment_total_number'] = influence_results.get(
                's_origin_weibo_comment_total_number', 0)
        results[
            'sensitive_retweeted_weibo_comment_total_number'] = influence_results.get(
                's_retweeted_weibo_comment_total_number', 0)
    except:
        results['sensitive_origin_weibo_number'] = 0
        results['sensitive_retweeted_weibo_number'] = 0
        results['sensitive_comment_weibo_number'] = 0
        results['sensitive_origin_weibo_retweeted_total_number'] = 0
        results['sensitive_origin_weibo_comment_total_number'] = 0
        results['sensitive_retweeted_weibo_retweeted_total_number'] = 0
        results['sensitive_retweeted_weibo_comment_total_number'] = 0

    try:
        item = es.get(index=date, doc_type='bci', id=uid)['_source']
    except:
        item = {}
    results['origin_weibo_total_number'] = item.get(
        'origin_weibo_number', 0) + results['sensitive_origin_weibo_number']
    results['retweeted_weibo_total_number'] = item.get(
        'retweeted_weibo_number',
        0) + results['sensitive_retweeted_weibo_number']
    results['comment_weibo_total_number'] = int(
        item.get('comment_weibo_number', 0)) + int(
            results['sensitive_comment_weibo_number'])
    results['origin_weibo_retweeted_total_number'] = item.get(
        'origin_weibo_retweeted_total_number',
        0) + results['sensitive_origin_weibo_retweeted_total_number']
    results['origin_weibo_comment_total_number'] = item.get(
        'origin_weibo_comment_total_number',
        0) + results['sensitive_origin_weibo_comment_total_number']
    results['retweeted_weibo_retweeted_total_number'] = item.get(
        'retweeted_weibo_retweeted_total_number',
        0) + results['sensitive_retweeted_weibo_retweeted_total_number']
    results['retweeted_weibo_comment_total_number'] = item.get(
        'retweeted_weibo_comment_total_number',
        0) + results['sensitive_retweeted_weibo_comment_total_number']

    results['sensitive_text'] = sort_sensitive_text(uid)

    results['sensitive_geo_distribute'] = []
    results['sensitive_time_distribute'] = get_user_trend(uid)[1]
    results['sensitive_hashtag'] = []
    results['sensitive_words'] = []
    results['sensitive_hashtag_dict'] = []
    results['sensitive_words_dict'] = []
    results['sensitive_hashtag_description'] = ''

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results['negetive_index'] = float(emotion_number[2]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    results['negetive_influence'] = float(emotion_number[1]) / (
        emotion_number[2] + emotion_number[1] + emotion_number[0])
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace('-', '')
    return_sentiment = dict()
    return_sentiment['positive'] = []
    return_sentiment['neutral'] = []
    return_sentiment['negetive'] = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
    for i in range(1, 8):
        ts = ts + 24 * 3600
        date = ts2datetime(ts).replace('-', '')
        temp = sentiment_dict.get(date, {})
        return_sentiment['positive'].append([temp.get('positive', 0), date])
        return_sentiment['negetive'].append([temp.get('negetive', 0), date])
        return_sentiment['neutral'].append([temp.get('neutral', 0), date])
    results['sentiment_trend'] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait",
                                  doc_type='user',
                                  id=uid)['_source']
        results['politics_trend'] = portrait_results['politics_trend']
        results['domain'] = portrait_results['domain']
        results['sensitive'] = portrait_results['sensitive']
        temp_hashtag = portrait_results['sensitive_hashtag_dict']
        temp_sensitive_words = portrait_results['sensitive_words_dict']
        temp_sensitive_geo = portrait_results['sensitive_geo_activity']
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(),
                                          key=lambda x: x[0],
                                          reverse=False)
            sensitive_geo_list = []
            for k, v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(),
                                    key=lambda x: x[1],
                                    reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results['sensitive_geo_distribute'] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(
                portrait_results['sensitive_hashtag_dict'])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(
                            hashtag_dict_detail.items(),
                            key=lambda x: x[1],
                            reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results['sensitive_hashtag_description'] = hashtag_description(
                hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
                for i in range(7):
                    ts = ts + 24 * 3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time() - 24 * 3600).replace('-', '')
        date = '20130907'
        today_sensitive_words = sensitive_words_dict.get(date, {})
        results['today_sensitive_words'] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(),
                              key=lambda x: x[1],
                              reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(),
                                     key=lambda x: x[0],
                                     reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results['sensitive_hashtag'] = sorted_hashtag
        results['sensitive_words'] = new_sorted_dict
        results['sensitive_hashtag_dict'] = sorted_hashtag_dict
        results['sensitive_words_dict'] = sorted_words_dict

    results['sensitive_retweet'] = search_retweet(uid, 1)
    results['sensitive_follow'] = search_follower(uid, 1)
    results['sensitive_at'] = search_mention(uid, 1)

    return results
Ejemplo n.º 43
0
def imagine(uid, query_fields_dict,index_name="sensitive_user_portrait", doctype='user'):

    """
    uid: search users relate to uid
    query_fields_dict: defined search field weight
    fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag
    for example: "domain": 2
    domain, psycho_feature
    """
    personal_info = es.get(index="sensitive_user_portrait", doc_type="user", id=uid, _source=True)['_source']

    keys_list = query_fields_dict.keys()
    keys_list.remove('field')
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    for iter_key in keys_list:
        if iter_key not in personal_info or personal_info[iter_key] == '':
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                },
                "field_value_factor":{
                }
            }
        }
    }

    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == "sensitive":
        score_standard['field'] = "sensitive"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard

    query_fields_dict.pop('field')
    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body['size'] = 100 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}})

        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})


    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence', 'sensitive']
    return_list = []
    count = 0
    for item in result:
        if uid == item['_id']:
            score = item['_score']
            continue
        info = []
        if not item['_source']['uname']:
            item['_source']['uname'] = 'unknown'
        for field in field_list:
            info.append(item['_source'][field])
        info.append(item['_score'])
        common_dict = dict()
        for iter_key in iter_list:
            iter_common_list = item['_source'][iter_key].split('&')
            search_common_list = list(set(iter_common_list) & set(search_dict[iter_key]))   
            iter_key = shift_dict[iter_key]
            common_dict[iter_key] = search_common_list
        info.append(common_dict)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        temp_list.append(personal_info[field])

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results
def search_attribute_portrait(uid):
    return_results = {}
    index_name = "sensitive_user_portrait"
    index_type = "user"

    try:
        search_result = es.get(index=index_name, doc_type=index_type, id=uid)
    except:
        return None
    results = search_result['_source']
    #return_results = results
    user_sensitive = user_type(uid)
    if user_sensitive:
        #return_results.update(sensitive_attribute(uid))
        return_results['user_type'] = 1
        return_results['sensitive'] = 1
    else:
        return_results['user_type'] = 0
        return_results['sensitive'] = 0

    if results['photo_url'] == 0:
        results['photo_url'] = 'unknown'
    if results['location'] == 0:
        results['location'] = 'unknown'
    return_results['photo_url'] = results['photo_url']
    return_results['uid'] = results['uid']
    return_results['uname'] = results['uname']
    if return_results['uname'] == 0:
        return_results['uname'] = 'unknown'
    return_results['location'] = results['location']
    return_results['fansnum'] = results['fansnum']
    return_results['friendsnum'] = results['friendsnum']
    return_results['gender'] = results['gender']
    return_results['psycho_status'] = json.loads(results['psycho_status'])

    keyword_list = []
    if results['keywords']:
        keywords_dict = json.loads(results['keywords'])
        sort_word_list = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['keywords'] = sort_word_list
    else:
        return_results['keywords'] = []


    return_results['retweet'] = search_retweet(uid, 0)
    return_results['follow'] = search_follower(uid, 0)
    return_results['at'] = search_mention(uid, 0)

    if results['ip'] and results['geo_activity']:
        ip_dict = json.loads(results['ip'])
        geo_dict = json.loads(results['geo_activity'])
        geo_description = active_geo_description(ip_dict, geo_dict)
        return_results['geo_description'] = geo_description
    else:
        return_results['geo_description'] = ''

    geo_top = []
    temp_geo = {}

    if results['geo_activity']:
        geo_dict = json.loads(results['geo_activity'])
        if len(geo_dict) < 7:
            ts = time.time()
            ts = datetime2ts('2013-09-08') - 8*24*3600
            for i in range(7):
                ts = ts + 24*3600
                date = ts2datetime(ts).replace('-', '')
                if geo_dict.has_key(date):
                    pass
                else:
                    geo_dict[date] = {}
        activity_geo_list = sorted(geo_dict.items(), key=lambda x:x[0], reverse=False)
        geo_list = geo_dict.values()
        for k,v in activity_geo_list:
            sort_v = sorted(v.items(), key=lambda x:x[1], reverse=True)
            top_geo = [item[0] for item in sort_v]
            geo_top.append([k, top_geo[0:2]])
            for iter_key in v.keys():
                if temp_geo.has_key(iter_key):
                    temp_geo[iter_key] += v[iter_key]
                else:
                    temp_geo[iter_key] = v[iter_key]
        sort_geo_dict = sorted(temp_geo.items(), key=lambda x:x[1], reverse=True)
        return_results['top_activity_geo'] = sort_geo_dict
        return_results['activity_geo_distribute'] = geo_top
    else:
        return_results['top_activity_geo'] = []
        return_results['activity_geo_distribute'] = geo_top

    hashtag_dict = get_user_hashtag(uid)[0]
    return_results['hashtag'] = hashtag_dict

    '''
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                word_list = sort_word_dict[:5]
            except:
                results['emotion_words'] = emotion_result
            emotion_result[emotion_mark_dict[word_type]] = word_list
    return_results['emotion_words'] = emotion_result
    '''

    # topic
    if results['topic']:
        topic_dict = json.loads(results['topic'])
        sort_topic_dict = sorted(topic_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['topic'] = sort_topic_dict[:5]
    else:
        return_results['topic'] = []

    # domain
    if results['domain']:
        domain_string = results['domain']
        domain_list = domain_string.split('_')
        return_results['domain'] = domain_list
    else:
        return_results['domain'] = []
    '''
    # emoticon
    if results['emotion']:
        emotion_dict = json.loads(results['emotion'])
        sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['emotion'] = sort_emotion_dict[:5]
    else:
        return_results['emotion'] = []
    '''

    # on_line pattern
    if results['online_pattern']:
        online_pattern_dict = json.loads(results['online_pattern'])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['online_pattern'] = sort_online_pattern_dict[:5]
    else:
        return_results['online_pattern'] = []



    '''
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        return_results['psycho_feature'] = psycho_feature_list
    else:
        return_results['psycho_feature'] = []
    '''

    # self_state
    try:
        profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)
        self_state = profile_result['_source'].get('description', '')
        return_results['description'] = self_state
    except:
        return_results['description'] = ''
    if results['importance']:
        query_body = {
            'query':{
                'range':{
                    'importance':{
                        'from':results['importance'],
                        'to': 100000
                    }
                }
            }
        }
        importance_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if importance_rank['_shards']['successful'] != 0:
            return_results['importance_rank'] = importance_rank['count']
        else:
            return_results['importance_rank'] = 0
    else:
        return_results['importance_rank'] = 0
    return_results['importance'] = results['importance']

    if results['activeness']:
        query_body = {
            'query':{
                'range':{
                    'activeness':{
                        'from':results['activeness'],
                        'to': 10000
                    }
                }
            }
        }
        activeness_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if activeness_rank['_shards']['successful'] != 0:
            return_results['activeness_rank'] = activeness_rank['count']
        else:
            return_results['activeness_rank'] = 0
    else:
        return_results['activeness_rank'] = 0
    return_results['activeness'] = results['activeness']

    if results['influence']:
        query_body = {
            'query':{
                'range':{
                    'influence':{
                        'from':results['influence'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['influence_rank'] = influence_rank['count']
        else:
            return_results['influence_rank'] = 0
    else:
        return_results['influence_rank'] = 0
    return_results['influence'] = results['influence']


    if results['sensitive']:
        query_body = {
            'query':{
                'range':{
                    'sensitive':{
                        'from':results['sensitive'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['sensitive_rank'] = influence_rank['count']
        else:
            return_results['sensitive_rank'] = 0
    else:
        return_results['sensitive_rank'] = 0
    return_results['sensitive'] = results['sensitive']

    query_body = {
        'query':{
            "match_all":{}
        }
    }
    all_count = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body)
    if all_count['_shards']['successful'] != 0:
        return_results['all_count'] = all_count['count']
    else:
        print 'es_sensitive_user_portrait error'
        return_results['all_count'] = 0

    # link
    link_ratio = results['link']
    return_results['link'] = link_ratio

    weibo_trend = get_user_trend(uid)[0]
    return_results['time_description'] = active_time_description(weibo_trend)
    return_results['time_trend'] = weibo_trend

    # user influence trend
    influence_detail = []
    influence_value = []
    attention_value = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8*24*3600
    for i in range(1,8):
        date = ts2datetime(ts + i*24*3600).replace('-', '')
        detail = [0]*10
        try:
            item = es.get(index=date, doc_type='bci', id=uid)['_source']
            '''
            if return_results['utype']:
                detail[0] = item.get('s_origin_weibo_number', 0)
                detail[1] = item.get('s_retweeted_weibo_number', 0)
                detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0)
            else:
            '''
            if 1:
                detail[0] = item.get('origin_weibo_number', 0)
                detail[1] = item.get('retweeted_weibo_number', 0)
                detail[2] = item.get('origin_weibo_retweeted_total_number', 0) + item.get('retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('origin_weibo_comment_total_number', 0) + item.get('retweeted_weibo_comment_total_number', 0)
                retweeted_id = item.get('origin_weibo_top_retweeted_id', '0')
                detail[4] = retweeted_id
                if retweeted_id:
                    try:
                        detail[5] = es.get(index='sensitive_user_text', doc_type='user', id=retweeted_id)['_source']['text']
                    except:
                        detail[5] = ''
                else:
                    detail[5] = ''
                detail[6] = item.get('origin_weibo_retweeted_top_number', 0)
                detail[7] = item.get('origin_weibo_top_comment_id', '0')
                if detail[7]:
                    try:
                        detail[8] = es.get(index='sensitive_user_text', doc_type='user', id=detail[7])['_source']['text']
                    except:
                        detail[8] = ''
                else:
                    detail[8] = ''
                detail[9] = item.get('origin_weibo_comment_top_number', 0)
                attention_number = detail[2] + detail[3]
                attention = 2/(1+math.exp(-0.005*attention_number)) - 1
            influence_value.append([date, item['user_index']])
            influence_detail.append([date, detail])
            attention_value.append(attention)
        except:
            influence_value.append([date, 0])
            influence_detail.append([date, detail])
            attention_value.append(0)
    return_results['influence_trend'] = influence_value
    return_results['common_influence_detail'] = influence_detail
    return_results['attention_degree'] = attention_value

    return return_results
Ejemplo n.º 45
0
def get_group_results(task_name, module):
    result = []
    try:
        es_result = es.get(index=index_name, doc_type=index_type,
                           id=task_name)['_source']
        #print 'result:', result
    except:
        return None
    #basic module: gender, count, verified
    if module == 'overview':
        task_name = es_result['task_name']
        submit_date = es_result['submit_date']
        state = es_result['state']
        tightness = es_result['tightness']
        activeness = es_result['activeness']
        importance = es_result['importance']
        influence = es_result['influence']
        result = [
            task_name, submit_date, state, tightness, activeness, importance,
            influence
        ]
    if module == 'basic':
        gender_dict = json.loads(es_result['gender'])
        count = es_result['count']
        verified = es_result['verified']
        if verified:
            verified_dict = json.loads(verified)
        result = [gender_dict, count, verified]
    if module == 'activity':
        activity_geo_dict = json.loads(es_result['activity_geo'])
        sort_activity_geo = sorted(activity_geo_dict.items(),
                                   key=lambda x: x[1],
                                   reverse=True)
        activity_geo = sort_activity_geo[:50]
        activity_trend = json.loads(es_result['activity_trend'])
        online_pattern_dict = json.loads(es_result['online_pattern'])
        sort_online_pattern = sorted(online_pattern_dict.items(),
                                     key=lambda x: x[1],
                                     reverse=True)
        online_pattern = sort_online_pattern[:50]
        geo_track = json.loads(es_result['geo_track'])
        result = [activity_geo, activity_trend, online_pattern, geo_track]
    if module == 'social':
        #degree_his = json.loads(es_result['degree_his'])
        density = es_result['density']
        retweet_weibo_count = es_result['retweet_weibo_count']
        retweet_user_count = es_result['retweet_user_count']
        retweet_relation = json.loads(es_result['retweet_relation'])
        uid_list = []
        for relation in retweet_relation:
            uid_list.append(relation[0])
            uid_list.append(relation[1])
        es_portrait_result = es.mget(index='user_portrait',
                                     doc_type='user',
                                     body={'ids': uid_list})['docs']
        es_count = 0
        new_retweet_relation = []
        for relation in retweet_relation:
            source_uid = relation[0]
            source_item = es_portrait_result[es_count]
            try:
                source = source_item['_source']
                source_uname = source['uname']
            except:
                source_uname = ''
            target_uid = relation[1]
            es_count += 1
            target_item = es_portrait_result[es_count]
            try:
                source = target_item['_source']
                target_uname = source['uname']
            except:
                target_uname = ''

            count = relation[2]
            new_retweet_relation.append(
                [source_uid, source_uname, target_uid, target_uname, count])
        uid_list = []
        out_beretweet_relation = json.loads(
            es_result['out_beretweet_relation'])
        uid_list = []
        uid_list = [item[0] for item in out_beretweet_relation]
        es_portrait_result = es.mget(index='user_portrait',
                                     doc_type='user',
                                     body={'ids': uid_list})['docs']
        es_count = 0
        new_out_beretweet_relation = []
        for i in range(len(uid_list)):
            item = es_portrait_result[i]
            uid = item['_id']
            try:
                source = item['_source']
                uname = source['uname']
            except:
                uname = ''
            out_relation_item = out_beretweet_relation[i][1:]
            a = [uid, uname]
            a.extend(out_relation_item)
            #print 'add_item:', add_item
            new_out_beretweet_relation.append(a)
        result = [
            new_retweet_relation, density, retweet_weibo_count,
            retweet_user_count, new_out_beretweet_relation
        ]
    if module == 'think':
        domain_dict = json.loads(es_result['domain'])
        topic_dict = json.loads(es_result['topic'])
        psycho_status = json.loads(es_result['psycho_status'])
        psycho_feature = json.loads(es_result['psycho_feature'])
        result = [domain_dict, topic_dict, psycho_status, psycho_feature]
    if module == 'text':
        hashtag_dict = json.loads(es_result['hashtag'])
        sort_hashtag = sorted(hashtag_dict.items(),
                              key=lambda x: x[1],
                              reverse=True)
        hashtag = sort_hashtag[:50]
        emoticon_dict = json.loads(es_result['emoticon'])
        sort_emoticon = sorted(emoticon_dict.items(),
                               key=lambda x: x[1],
                               reverse=True)
        emoticon = sort_emoticon[:5]
        keyword_dict = json.loads(es_result['keywords'])
        sort_keyword = sorted(keyword_dict.items(),
                              key=lambda x: x[1],
                              reverse=True)
        keyword = sort_keyword[:50]
        result = [hashtag, keyword, emoticon]
    if module == 'influence':
        importance_dis = json.loads(es_result['importance_his'])
        activeness_his = json.loads(es_result['activeness_his'])
        influence_his = json.loads(es_result['influence_his'])
        user_influence_list = json.loads(es_result['user_influence_list'])
        user_influence_result = []
        for user_item in user_influence_list:
            uid = user_item[0]
            result_item = user_item[:5]
            for i in range(5, 9):
                item = user_item[i]
                mid = item[1]
                number = item[0]
                if mid != 0 and uid:
                    weibolink = weiboinfo2url(uid, mid)
                else:
                    weibolink = None
                result_item.append((number, mid, weibolink))
            user_influence_result.append(result_item)
        '''
        origin_max_retweeted_number =es_result['origin_max_retweeted_number']
        origin_max_retweeted_id = es_result['origin_max_retweeted_id']
        origin_max_retweeted_user = es_result['origin_max_retweeted_user']
        if origin_max_retweeted_id != 0 and origin_max_retweeted_user != 0:
            origin_max_retweeted_weibolink = weiboinfo2url(origin_max_retweeted_user, origin_max_retweeted_id)
        else:
            origin_max_retweeted_weibolink = None

        origin_max_comment_number = es_result['origin_max_comment_number']
        origin_max_comment_id = es_result['origin_max_comment_id']
        origin_max_comment_user = es_result['origin_max_comment_user']
        if origin_max_comment_id !=0 and origin_max_comment_user != 0:
            origin_max_comment_weibolink = weiboinfo2url(origin_max_comment_user, origin_max_comment_id)
        else:
            origin_max_comment_weibolink = None
        
        retweet_max_retweeted_number = es_result['retweet_max_retweeted_number']
        retweet_max_retweeted_id = es_result['retweet_max_retweeted_id']
        retweet_max_retweeted_user = es_result['retweet_max_retweeted_user']
        if retweet_max_retweeted_id != 0 and retweet_max_retweeted_user != 0:
            retweet_max_retweeted_weibolink = weiboinfo2url(retweet_max_retweeted_user, retweet_max_retweeted_id)
        else:
            retweet_max_retweeted_weibolink = None

        retweet_max_comment_number = es_result['retweet_max_comment_number']
        retweet_max_comment_id = es_result['retweet_max_comment_id']
        retweet_max_comment_user = es_result['retweet_max_comment_user']
        if retweet_max_comment_id != 0 and retweet_max_comment_user != 0:
            retweet_max_comment_weibolink = weiboinfo2url(retweet_max_comment_user, retweet_max_comment_id)
        else:
            retweet_max_comment_weibolink = None
        '''
        result = [
            importance_dis, activeness_his, influence_his,
            user_influence_result
        ]
    #print result
    return result
Ejemplo n.º 46
0
def ajax_portrait_related():
    uid = request.args.get('uid', '') # uid
    results = dict()

    if uid:
        portrait_result = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source']
        results["domain"] = portrait_result['domain']
        results["topic"] = portrait_result["topic_string"].replace("&", " ")
        results["politics"] = portrait_result["politics"]
        custom_tag = []
        for key in portrait_result:
            if "tag-" in key:
                tag_value = portrait_result[key]
                temp_list = key.split("-")
                key = "-".join(temp_list[1:])
                custom_string = key + "-" + tag_value
                custom_tag.append([key, tag_value])
        if custom_tag:
            results["tag_detail"] = custom_tag
            results["tag_string"] = custom_string
        else:
            results["tag_detail"] = []
            results["tag_string"] = ""
        sensitive_words_dict = json.loads(portrait_result["sensitive_words_dict"])
        if sensitive_words_dict:
            sorted_sensitive_words = sorted(sensitive_words_dict.items(), key=lambda x:x[1], reverse=True)
            tmp = sorted_sensitive_words[:3]
            sensitive_words_list = [item[0] for item in tmp]
            results["sensitive_words_string"] = " ".join(sensitive_words_list)
            results["sensitive_words_detail"] = sorted_sensitive_words
        else:
            results["sensitive_words_string"] = ""
            results["sensitive_words_detail"] = []
        keywords_dict = json.loads(portrait_result["keywords_dict"])
        results["keywords_detail"] = keywords_dict
        if keywords_dict:
            tmp = keywords_dict[:3]
            keywords_list = [item[0] for item in tmp]
            results["keywords_string"] = " ".join(keywords_list)
        else:
            results["keywords_string"] = ""
        activity_geo_dict = json.loads(portrait_result["activity_geo_dict"])
        geo_dict = {}
        for item in activity_geo_dict:
            for k, v in item.iteritems():
                if geo_dict.has_key(k):
                    geo_dict[k] += v
                else:
                    geo_dict[k] = v
        if geo_dict:
            sorted_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
            results["geo_activity"] = sorted_geo_dict[0][0]
            results["geo_activity_detail"] = sorted_geo_dict
        else:
            results["geo_activity"] = ""
            results["geo_activity_detail"] = []
        hashtag_dict = json.loads(portrait_result["hashtag_dict"])
        sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True)
        if sorted_hashtag_dict:
            results["hashtag"] = sorted_hashtag_dict[0][0]
            results["hashtag_detail"] = sorted_hashtag_dict
        else:
            results["hashtag"] = ""
            results["hashtag_detail"] = []

    return json.dumps(results)
Ejemplo n.º 47
0
def get_group_results(task_name, module):
    result = []
    try:
        es_result = es.get(index=index_name, doc_type=index_type, id=task_name)['_source']
        #print 'result:', result
    except:
        return None
    #basic module: gender, count, verified
    if module=='overview':
        task_name = es_result['task_name']
        submit_date = es_result['submit_date']
        state = es_result['state']
        tightness = es_result['tightness']
        activeness = es_result['activeness']
        importance = es_result['importance']
        influence = es_result['influence']
        result = [task_name, submit_date, state, tightness, activeness, importance, influence]
    if module=='basic':
        gender_dict = json.loads(es_result['gender'])
        count = es_result['count']
        verified = es_result['verified']
        if verified:
            verified_dict = json.loads(verified)
        result = [gender_dict, count, verified]
    if module=='activity':
        activity_geo_dict = json.loads(es_result['activity_geo'])
        sort_activity_geo = sorted(activity_geo_dict.items(), key=lambda x:x[1], reverse=True)
        activity_geo = sort_activity_geo[:50]
        activity_trend = json.loads(es_result['activity_trend'])
        online_pattern_dict = json.loads(es_result['online_pattern'])
        sort_online_pattern = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True)
        online_pattern = sort_online_pattern[:50]
        geo_track = json.loads(es_result['geo_track'])
        result = [activity_geo, activity_trend, online_pattern, geo_track]
    if module=='social':
        #degree_his = json.loads(es_result['degree_his'])
        density = es_result['density']
        retweet_weibo_count = es_result['retweet_weibo_count']
        retweet_user_count = es_result['retweet_user_count']
        retweet_relation = json.loads(es_result['retweet_relation'])
        uid_list = []
        for relation in retweet_relation:
            uid_list.append(relation[0])
            uid_list.append(relation[1])
        es_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs']
        es_count = 0
        new_retweet_relation = []
        for relation in retweet_relation:
            source_uid = relation[0]
            source_item = es_portrait_result[es_count]
            try:
                source = source_item['_source']
                source_uname = source['uname']
            except:
                source_uname = ''
            target_uid = relation[1]
            es_count += 1
            target_item = es_portrait_result[es_count]
            try:
                source = target_item['_source']
                target_uname = source['uname']
            except:
                target_uname = ''

            count = relation[2]
            new_retweet_relation.append([source_uid, source_uname, target_uid, target_uname, count])
        uid_list = []
        out_beretweet_relation = json.loads(es_result['out_beretweet_relation'])
        uid_list = []
        uid_list = [item[0] for item in out_beretweet_relation]
        es_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs']
        es_count = 0
        new_out_beretweet_relation = []
        for i in range(len(uid_list)):
            item = es_portrait_result[i]
            uid = item['_id']
            try:
                source = item['_source']
                uname = source['uname']
            except:
                uname = ''
            out_relation_item = out_beretweet_relation[i][1:]
            a = [uid, uname]
            a.extend(out_relation_item)
            #print 'add_item:', add_item
            new_out_beretweet_relation.append(a)
        result = [new_retweet_relation, density, retweet_weibo_count, retweet_user_count, new_out_beretweet_relation]
    if module=='think':
        domain_dict = json.loads(es_result['domain'])
        topic_dict = json.loads(es_result['topic'])
        psycho_status = json.loads(es_result['psycho_status'])
        psycho_feature = json.loads(es_result['psycho_feature'])
        result = [domain_dict, topic_dict, psycho_status, psycho_feature]
    if module=='text':
        hashtag_dict = json.loads(es_result['hashtag'])
        sort_hashtag = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True)
        hashtag = sort_hashtag[:50]
        emoticon_dict = json.loads(es_result['emoticon'])
        sort_emoticon = sorted(emoticon_dict.items(), key=lambda x:x[1], reverse=True)
        emoticon = sort_emoticon[:5]
        keyword_dict = json.loads(es_result['keywords'])
        sort_keyword = sorted(keyword_dict.items(), key=lambda x:x[1], reverse=True)
        keyword = sort_keyword[:50]
        result = [hashtag, keyword, emoticon]
    if module=='influence':
        importance_dis = json.loads(es_result['importance_his'])
        activeness_his = json.loads(es_result['activeness_his'])
        influence_his = json.loads(es_result['influence_his'])
        user_influence_list = json.loads(es_result['user_influence_list'])
        user_influence_result = []
        for user_item in user_influence_list:
            uid = user_item[0]
            result_item = user_item[:5]
            for i in range(5,9):
                item = user_item[i]
                mid = item[1]
                number = item[0]
                if mid != 0 and uid:
                    weibolink = weiboinfo2url(uid, mid)
                else:
                    weibolink = None
                result_item.append((number, mid, weibolink))
            user_influence_result.append(result_item)
        '''
        origin_max_retweeted_number =es_result['origin_max_retweeted_number']
        origin_max_retweeted_id = es_result['origin_max_retweeted_id']
        origin_max_retweeted_user = es_result['origin_max_retweeted_user']
        if origin_max_retweeted_id != 0 and origin_max_retweeted_user != 0:
            origin_max_retweeted_weibolink = weiboinfo2url(origin_max_retweeted_user, origin_max_retweeted_id)
        else:
            origin_max_retweeted_weibolink = None

        origin_max_comment_number = es_result['origin_max_comment_number']
        origin_max_comment_id = es_result['origin_max_comment_id']
        origin_max_comment_user = es_result['origin_max_comment_user']
        if origin_max_comment_id !=0 and origin_max_comment_user != 0:
            origin_max_comment_weibolink = weiboinfo2url(origin_max_comment_user, origin_max_comment_id)
        else:
            origin_max_comment_weibolink = None
        
        retweet_max_retweeted_number = es_result['retweet_max_retweeted_number']
        retweet_max_retweeted_id = es_result['retweet_max_retweeted_id']
        retweet_max_retweeted_user = es_result['retweet_max_retweeted_user']
        if retweet_max_retweeted_id != 0 and retweet_max_retweeted_user != 0:
            retweet_max_retweeted_weibolink = weiboinfo2url(retweet_max_retweeted_user, retweet_max_retweeted_id)
        else:
            retweet_max_retweeted_weibolink = None

        retweet_max_comment_number = es_result['retweet_max_comment_number']
        retweet_max_comment_id = es_result['retweet_max_comment_id']
        retweet_max_comment_user = es_result['retweet_max_comment_user']
        if retweet_max_comment_id != 0 and retweet_max_comment_user != 0:
            retweet_max_comment_weibolink = weiboinfo2url(retweet_max_comment_user, retweet_max_comment_id)
        else:
            retweet_max_comment_weibolink = None
        '''
        result = [importance_dis, activeness_his, influence_his, user_influence_result]
    #print result
    return result
Ejemplo n.º 48
0
def search_attribute_portrait(uid):
    return_results = {}
    index_name = "sensitive_user_portrait"
    index_type = "user"

    try:
        search_result = es.get(index=index_name, doc_type=index_type, id=uid)
    except:
        return None
    results = search_result['_source']
    #return_results = results
    user_sensitive = user_type(uid)
    if user_sensitive:
        #return_results.update(sensitive_attribute(uid))
        return_results['user_type'] = 1
        return_results['sensitive'] = 1
    else:
        return_results['user_type'] = 0
        return_results['sensitive'] = 0

    if results['photo_url'] == 0:
        results['photo_url'] = 'unknown'
    if results['location'] == 0:
        results['location'] = 'unknown'
    return_results['photo_url'] = results['photo_url']
    return_results['uid'] = results['uid']
    return_results['uname'] = results['uname']
    if return_results['uname'] == 0:
        return_results['uname'] = 'unknown'
    return_results['location'] = results['location']
    return_results['fansnum'] = results['fansnum']
    return_results['friendsnum'] = results['friendsnum']
    return_results['gender'] = results['gender']
    return_results['psycho_status'] = json.loads(results['psycho_status'])

    keyword_list = []
    if results['keywords']:
        keywords_dict = json.loads(results['keywords'])
        sort_word_list = sorted(keywords_dict.items(),
                                key=lambda x: x[1],
                                reverse=True)
        return_results['keywords'] = sort_word_list
    else:
        return_results['keywords'] = []

    return_results['retweet'] = search_retweet(uid, 0)
    return_results['follow'] = search_follower(uid, 0)
    return_results['at'] = search_mention(uid, 0)

    if results['ip'] and results['geo_activity']:
        ip_dict = json.loads(results['ip'])
        geo_dict = json.loads(results['geo_activity'])
        geo_description = active_geo_description(ip_dict, geo_dict)
        return_results['geo_description'] = geo_description
    else:
        return_results['geo_description'] = ''

    geo_top = []
    temp_geo = {}

    if results['geo_activity']:
        geo_dict = json.loads(results['geo_activity'])
        if len(geo_dict) < 7:
            ts = time.time()
            ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
            for i in range(7):
                ts = ts + 24 * 3600
                date = ts2datetime(ts).replace('-', '')
                if geo_dict.has_key(date):
                    pass
                else:
                    geo_dict[date] = {}
        activity_geo_list = sorted(geo_dict.items(),
                                   key=lambda x: x[0],
                                   reverse=False)
        geo_list = geo_dict.values()
        for k, v in activity_geo_list:
            sort_v = sorted(v.items(), key=lambda x: x[1], reverse=True)
            top_geo = [item[0] for item in sort_v]
            geo_top.append([k, top_geo[0:2]])
            for iter_key in v.keys():
                if temp_geo.has_key(iter_key):
                    temp_geo[iter_key] += v[iter_key]
                else:
                    temp_geo[iter_key] = v[iter_key]
        sort_geo_dict = sorted(temp_geo.items(),
                               key=lambda x: x[1],
                               reverse=True)
        return_results['top_activity_geo'] = sort_geo_dict
        return_results['activity_geo_distribute'] = geo_top
    else:
        return_results['top_activity_geo'] = []
        return_results['activity_geo_distribute'] = geo_top

    hashtag_dict = get_user_hashtag(uid)[0]
    return_results['hashtag'] = hashtag_dict
    '''
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                word_list = sort_word_dict[:5]
            except:
                results['emotion_words'] = emotion_result
            emotion_result[emotion_mark_dict[word_type]] = word_list
    return_results['emotion_words'] = emotion_result
    '''

    # topic
    if results['topic']:
        topic_dict = json.loads(results['topic'])
        sort_topic_dict = sorted(topic_dict.items(),
                                 key=lambda x: x[1],
                                 reverse=True)
        return_results['topic'] = sort_topic_dict[:5]
    else:
        return_results['topic'] = []

    # domain
    if results['domain']:
        domain_string = results['domain']
        domain_list = domain_string.split('_')
        return_results['domain'] = domain_list
    else:
        return_results['domain'] = []
    '''
    # emoticon
    if results['emotion']:
        emotion_dict = json.loads(results['emotion'])
        sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True)
        return_results['emotion'] = sort_emotion_dict[:5]
    else:
        return_results['emotion'] = []
    '''

    # on_line pattern
    if results['online_pattern']:
        online_pattern_dict = json.loads(results['online_pattern'])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(),
                                          key=lambda x: x[1],
                                          reverse=True)
        return_results['online_pattern'] = sort_online_pattern_dict[:5]
    else:
        return_results['online_pattern'] = []
    '''
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        return_results['psycho_feature'] = psycho_feature_list
    else:
        return_results['psycho_feature'] = []
    '''

    # self_state
    try:
        profile_result = es_user_profile.get(index='weibo_user',
                                             doc_type='user',
                                             id=uid)
        self_state = profile_result['_source'].get('description', '')
        return_results['description'] = self_state
    except:
        return_results['description'] = ''
    if results['importance']:
        query_body = {
            'query': {
                'range': {
                    'importance': {
                        'from': results['importance'],
                        'to': 100000
                    }
                }
            }
        }
        importance_rank = es.count(index='sensitive_user_portrait',
                                   doc_type='user',
                                   body=query_body)
        if importance_rank['_shards']['successful'] != 0:
            return_results['importance_rank'] = importance_rank['count']
        else:
            return_results['importance_rank'] = 0
    else:
        return_results['importance_rank'] = 0
    return_results['importance'] = results['importance']

    if results['activeness']:
        query_body = {
            'query': {
                'range': {
                    'activeness': {
                        'from': results['activeness'],
                        'to': 10000
                    }
                }
            }
        }
        activeness_rank = es.count(index='sensitive_user_portrait',
                                   doc_type='user',
                                   body=query_body)
        print activeness_rank
        if activeness_rank['_shards']['successful'] != 0:
            return_results['activeness_rank'] = activeness_rank['count']
        else:
            return_results['activeness_rank'] = 0
    else:
        return_results['activeness_rank'] = 0
    return_results['activeness'] = results['activeness']

    if results['influence']:
        query_body = {
            'query': {
                'range': {
                    'influence': {
                        'from': results['influence'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['influence_rank'] = influence_rank['count']
        else:
            return_results['influence_rank'] = 0
    else:
        return_results['influence_rank'] = 0
    return_results['influence'] = results['influence']

    if results['sensitive']:
        query_body = {
            'query': {
                'range': {
                    'sensitive': {
                        'from': results['sensitive'],
                        'to': 100000
                    }
                }
            }
        }
        influence_rank = es.count(index='sensitive_user_portrait',
                                  doc_type='user',
                                  body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            return_results['sensitive_rank'] = influence_rank['count']
        else:
            return_results['sensitive_rank'] = 0
    else:
        return_results['sensitive_rank'] = 0
    return_results['sensitive'] = results['sensitive']

    query_body = {'query': {"match_all": {}}}
    all_count = es.count(index='sensitive_user_portrait',
                         doc_type='user',
                         body=query_body)
    if all_count['_shards']['successful'] != 0:
        return_results['all_count'] = all_count['count']
    else:
        print 'es_sensitive_user_portrait error'
        return_results['all_count'] = 0

    # link
    link_ratio = results['link']
    return_results['link'] = link_ratio

    weibo_trend = get_user_trend(uid)[0]
    return_results['time_description'] = active_time_description(weibo_trend)
    return_results['time_trend'] = weibo_trend

    # user influence trend
    influence_detail = []
    influence_value = []
    attention_value = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8 * 24 * 3600
    for i in range(1, 8):
        date = ts2datetime(ts + i * 24 * 3600).replace('-', '')
        detail = [0] * 10
        try:
            item = es.get(index=date, doc_type='bci', id=uid)['_source']
            '''
            if return_results['utype']:
                detail[0] = item.get('s_origin_weibo_number', 0)
                detail[1] = item.get('s_retweeted_weibo_number', 0)
                detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0)
            else:
            '''
            if 1:
                detail[0] = item.get('origin_weibo_number', 0)
                detail[1] = item.get('retweeted_weibo_number', 0)
                detail[2] = item.get(
                    'origin_weibo_retweeted_total_number', 0) + item.get(
                        'retweeted_weibo_retweeted_total_number', 0)
                detail[3] = item.get(
                    'origin_weibo_comment_total_number', 0) + item.get(
                        'retweeted_weibo_comment_total_number', 0)
                retweeted_id = item.get('origin_weibo_top_retweeted_id', '0')
                detail[4] = retweeted_id
                if retweeted_id:
                    try:
                        detail[5] = es.get(index='sensitive_user_text',
                                           doc_type='user',
                                           id=retweeted_id)['_source']['text']
                    except:
                        detail[5] = ''
                else:
                    detail[5] = ''
                detail[6] = item.get('origin_weibo_retweeted_top_number', 0)
                detail[7] = item.get('origin_weibo_top_comment_id', '0')
                if detail[7]:
                    try:
                        detail[8] = es.get(index='sensitive_user_text',
                                           doc_type='user',
                                           id=detail[7])['_source']['text']
                    except:
                        detail[8] = ''
                else:
                    detail[8] = ''
                detail[9] = item.get('origin_weibo_comment_top_number', 0)
                attention_number = detail[2] + detail[3]
                attention = 2 / (1 + math.exp(-0.005 * attention_number)) - 1
            influence_value.append([date, item['user_index']])
            influence_detail.append([date, detail])
            attention_value.append(attention)
        except:
            influence_value.append([date, 0])
            influence_detail.append([date, detail])
            attention_value.append(0)
    return_results['influence_trend'] = influence_value
    return_results['common_influence_detail'] = influence_detail
    return_results['attention_degree'] = attention_value

    return return_results
Ejemplo n.º 49
0
def search_full_text(uid, date):
    result = []
    ts = datetime2ts(date)
    next_ts = ts + 24 * 3600
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "term": {
                                "uid": uid
                            }
                        }, {
                            "range": {
                                "timestamp": {
                                    "gte": ts,
                                    "lt": next_ts
                                }
                            }
                        }]
                    }
                }
            }
        },
        "size": 200
    }

    search_results = es.search(index='sensitive_user_text',
                               doc_type="user",
                               body=query_body)['hits']['hits']
    for item in search_results:
        detail = []
        source = item['_source']
        detail.append(source['sensitive'])
        detail.append(source['message_type'])
        ts = source['timestamp']
        re_time = time.strftime('%H:%M:%S', time.localtime(float(ts)))
        detail.append(re_time)
        geo_string = source['geo']
        geo_list = geo_string.split('/t')
        if len(geo_list) >= 3:
            geo = '/t'.join(geo_list[-2:])
        else:
            geo = geo_string
        detail.append(geo)
        detail.append(source['text'])
        date = date.replace('-', '')
        mid = source['mid']
        try:
            weibo_bci = es.get(index=date, doc_type='bci', id=uid)['_source']
        except:
            weibo_bci = {}
        retweeted_number = 0
        comment_number = 0
        if source['sensitive']:
            if int(source['message_type']) == 1:
                if weibo_bci:
                    if weibo_bci.get('s_origin_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['s_origin_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('s_origin_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['s_origin_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            elif int(source['message_type']) == 2:
                if weibo_bci:
                    if weibo_bci.get('s_retweeted_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['s_retweeted_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('s_retweetd_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['s_retweeted_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            else:
                pass
        else:
            if int(source['message_type']) == 1:
                if weibo_bci:
                    print weibo_bci['origin_weibo_retweeted_detail']
                    if weibo_bci.get('origin_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['origin_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('origin_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['origin_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            elif int(source['message_type']) == 2:
                if weibo_bci:
                    if weibo_bci.get('retweeted_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(
                            weibo_bci['retweeted_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('retweetd_weibo_comment_detail', {}):
                        comment_detail = json.loads(
                            weibo_bci['retweeted_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            else:
                pass
        detail.append(retweeted_number)
        detail.append(comment_number)
        result.append(detail)

    return result
Ejemplo n.º 50
0
def sort_sensitive_text(uid):
    sensitive_text = search_sensitive_text(uid)
    text_all = []
    if sensitive_text:
        for item in sensitive_text:
            text_detail = []
            item = item['_source']
            if not item['sensitive']:
                continue
            text = item['text'].encode('utf-8', 'ignore')
            sentiment_dict = json.loads(item['sentiment'])
            if not sentiment_dict:
                sentiment = 0
            else:
                positive = len(sentiment_dict.get('126', {}))
                negetive = len(sentiment_dict.get('127', {})) + len(
                    sentiment_dict.get('128', {})) + len(
                        sentiment_dict.get('129', {}))
                if positive > negetive:
                    sentiment = 1
                elif positive < negetive:
                    sentiment = -1
                else:
                    sentiment = 0
            ts = item['timestamp']
            uid = item['uid']
            mid = item['mid']
            message_type = item.get('message_type', 0)
            date = ts2datetime(float(ts)).replace('-', '')
            try:
                bci_result = es.get(index=date, doc_type='bci',
                                    id=uid)['_source']
                if int(message_type) == 1:
                    retweeted_number = bci_result[
                        's_origin_weibo_retweeted_detail'].get(mid)
                    comment_number = bci_result[
                        's_origin_weibo_comment_detail'].get(mid)
                elif int(message_type) == 2:
                    retweeted_number = bci_result[
                        's_retweeted_weibo_retweeted_detail'].get(mid)
                    comment_number = bci_result[
                        's_retweeted_weibo_comment_detail'].get(mid)
                else:
                    retweeted_number = 0
                    comment_number = 0
            except:
                retweeted_number = 0
                comment_number = 0
            single_sw = item.get('sensitive_words', {})
            if single_sw:
                sw = json.loads(single_sw).keys()
            else:
                # print item
                sw = []
            geo = item['geo']
            retweeted_link = extract_uname(text)
            text_detail.extend([
                ts, geo, text, sw, retweeted_link, sentiment, message_type,
                retweeted_number, comment_number
            ])
            text_all.append(text_detail)
    return text_all
def sensitive_attribute(uid, date):
    results = {}
    portrait = {}
    utype = user_type(uid)
    if not utype:
        results['utype'] = 0
        return results
    results['utype'] = 1

    results['uid'] = uid
    portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source']
    results['uname'] = portrait_result['uname']
    if portrait_result['uname'] == 0:
        results['uname'] = 'unknown'
    if portrait_result['photo_url'] == 0:
        portrait_result['photo_url'] = 'unknown'
    if portrait_result['location'] == 0:
        portrait_result['location'] = 'unknown'
    results['photo_url'] = portrait_result['photo_url']

    # sensitive weibo number statistics
    date = ts2datetime(time.time()-24*3600).replace('-', '')
    date = '20130907' # test
    influence_results = []
    try:
        influence_results = es.get(index=date, doc_type='bci', id=uid)['_source']
        results['sensitive_origin_weibo_number'] = influence_results.get('s_origin_weibo_number', 0)
        results['sensitive_retweeted_weibo_number'] = influence_results.get('s_retweeted_weibo_number', 0)
        results['sensitive_comment_weibo_number'] = int(influence_results.get('s_comment_weibo_number', 0))
        results['sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get('s_retweeted_weibo_retweeted_total_number', 0)
        results['sensitive_origin_weibo_retweeted_total_number'] = influence_results.get('s_origin_weibo_retweeted_total_number', 0)
        results['sensitive_origin_weibo_comment_total_number'] = influence_results.get('s_origin_weibo_comment_total_number', 0) 
        results['sensitive_retweeted_weibo_comment_total_number'] = influence_results.get('s_retweeted_weibo_comment_total_number', 0)
    except:
        results['sensitive_origin_weibo_number'] = 0
        results['sensitive_retweeted_weibo_number'] = 0
        results['sensitive_comment_weibo_number'] = 0
        results['sensitive_origin_weibo_retweeted_total_number'] = 0
        results['sensitive_origin_weibo_comment_total_number'] = 0
        results['sensitive_retweeted_weibo_retweeted_total_number'] = 0
        results['sensitive_retweeted_weibo_comment_total_number'] = 0

    try:
        item = es.get(index=date, doc_type='bci', id=uid)['_source']
    except:
        item = {}
    results['origin_weibo_total_number'] = item.get('origin_weibo_number', 0) + results['sensitive_origin_weibo_number']
    results['retweeted_weibo_total_number'] = item.get('retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number']
    results['comment_weibo_total_number'] = int(item.get('comment_weibo_number', 0)) + int(results['sensitive_comment_weibo_number'])
    results['origin_weibo_retweeted_total_number'] = item.get('origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number']
    results['origin_weibo_comment_total_number'] = item.get('origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number']
    results['retweeted_weibo_retweeted_total_number'] = item.get('retweeted_weibo_retweeted_total_number', 0)+ results['sensitive_retweeted_weibo_retweeted_total_number']
    results['retweeted_weibo_comment_total_number'] = item.get('retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number']

    results['sensitive_text'] = sort_sensitive_text(uid)

    results['sensitive_geo_distribute'] = []
    results['sensitive_time_distribute'] = get_user_trend(uid)[1]
    results['sensitive_hashtag'] = []
    results['sensitive_words'] = []
    results['sensitive_hashtag_dict'] = []
    results['sensitive_words_dict'] = []
    results['sensitive_hashtag_description'] = ''

    sentiment_trend = user_sentiment_trend(uid)
    emotion_number = sentiment_trend[0]
    results['negetive_index'] = float(emotion_number[2])/(emotion_number[2]+emotion_number[1]+emotion_number[0])
    results['negetive_influence'] = float(emotion_number[1])/(emotion_number[2]+emotion_number[1]+emotion_number[0])
    sentiment_dict = sentiment_trend[1]
    datetime = ts2datetime(time.time()).replace('-', '')
    return_sentiment = dict()
    return_sentiment['positive'] = []
    return_sentiment['neutral'] = []
    return_sentiment['negetive'] = []
    ts = time.time()
    ts = datetime2ts('2013-09-08') - 8*24*3600
    for i in range(1,8):
        ts = ts + 24*3600
        date = ts2datetime(ts).replace('-', '')
        temp = sentiment_dict.get(date, {})
        return_sentiment['positive'].append([temp.get('positive', 0), date])
        return_sentiment['negetive'].append([temp.get('negetive', 0), date])
        return_sentiment['neutral'].append([temp.get('neutral', 0), date])
    results['sentiment_trend'] = return_sentiment

    if 1:
        portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source']
        results['politics_trend'] = portrait_results['politics_trend']
        results['domain'] = portrait_results['domain']
        results['sensitive'] = portrait_results['sensitive']
        temp_hashtag = portrait_results['sensitive_hashtag_dict']
        temp_sensitive_words = portrait_results['sensitive_words_dict']
        temp_sensitive_geo =  portrait_results['sensitive_geo_activity']
        if temp_sensitive_geo:
            sensitive_geo_dict = json.loads(temp_sensitive_geo)
            if len(sensitive_geo_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_geo_dict.has_key(date):
                        pass
                    else:
                        sensitive_geo_dict[date] = {}
            sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x:x[0], reverse=False)
            sensitive_geo_list = []
            for k,v in sorted_sensitive_geo:
                temp_list = []
                sorted_geo = sorted(v.items(), key=lambda x:x[1], reverse=True)[0:2]
                # print sorted_geo
                temp_list.extend([k, sorted_geo])
                sensitive_geo_list.append(temp_list)
            results['sensitive_geo_distribute'] = sensitive_geo_list
        if temp_hashtag:
            hashtag_dict = json.loads(portrait_results['sensitive_hashtag_dict'])
            if len(hashtag_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if hashtag_dict.has_key(date):
                        hashtag_dict_detail = hashtag_dict[date]
                        hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x:x[1], reverse=True)
                    else:
                        hashtag_dict[date] = {}
            results['sensitive_hashtag_description'] = hashtag_description(hashtag_dict)
        else:
            hashtag_dict = {}
        if temp_sensitive_words:
            sensitive_words_dict = json.loads(temp_sensitive_words)
            if len(sensitive_words_dict) < 7:
                ts = time.time()
                ts = datetime2ts('2013-09-08') - 8*24*3600
                for i in range(7):
                    ts = ts + 24*3600
                    date = ts2datetime(ts).replace('-', '')
                    if sensitive_words_dict.has_key(date):
                        pass
                    else:
                        sensitive_words_dict[date] = {}
        else:
            sensitive_words_dict = {}
        date = ts2datetime(time.time()-24*3600).replace('-', '')
        date = '20130907'
        today_sensitive_words = sensitive_words_dict.get(date,{})
        results['today_sensitive_words'] = today_sensitive_words
        all_hashtag_dict = {}
        for item in hashtag_dict:
            detail_hashtag_dict = hashtag_dict[item]
            for key in detail_hashtag_dict:
                if all_hashtag_dict.has_key(key[0]):
                    all_hashtag_dict[key[0]] += key[1]
                else:
                    all_hashtag_dict[key[0]] = key[1]

        all_sensitive_words_dict = {}
        for item in sensitive_words_dict:
            detail_words_dict = sensitive_words_dict[item]
            for key in detail_words_dict:
                if all_sensitive_words_dict.has_key(key):
                    all_sensitive_words_dict[key] += detail_words_dict[key]
                else:
                    all_sensitive_words_dict[key] = detail_words_dict[key]

        sorted_hashtag = sorted(all_hashtag_dict.items(), key = lambda x:x[1], reverse=True)
        sorted_words = sorted(all_sensitive_words_dict.items(), key = lambda x:x[1], reverse=True)
        sorted_hashtag_dict = sorted(hashtag_dict.items(), key = lambda x:x[0], reverse=False)
        sorted_words_dict = sorted(sensitive_words_dict.items(), key = lambda x:x[0], reverse=False)
        new_sorted_dict = sort_sensitive_words(sorted_words)
        results['sensitive_hashtag'] = sorted_hashtag
        results['sensitive_words'] = new_sorted_dict
        results['sensitive_hashtag_dict'] = sorted_hashtag_dict
        results['sensitive_words_dict'] = sorted_words_dict

    results['sensitive_retweet'] = search_retweet(uid, 1)
    results['sensitive_follow'] = search_follower(uid, 1)
    results['sensitive_at'] = search_mention(uid, 1)

    return results
def search_full_text(uid, date):
    index_flow_text = flow_text_index_name_pre + date
    doctype_flow_text = flow_text_index_type
    result = []
    ts = datetime2ts(date)
    next_ts = ts + 24*3600
    query_body = {
        "query": {
            "filtered":{
                "filter":{
                    "bool": {
                        "must": [
                            {"term": {"uid": uid}}
                        ]
                    }
                }
            }
        },
        "size": 200,
        "sort":{"timestamp":{"order": "desc"}}
    }

    search_results = es.search(index=index_flow_text, doc_type=doctype_flow_text, body=query_body)['hits']['hits']
    for item in search_results:
        detail = []
        source = item['_source']
        detail.append(source.get('sensitive', 0))
        detail.append(source['message_type'])
        ts =source['timestamp']
        re_time = time.strftime('%H:%M:%S', time.localtime(float(ts)))
        detail.append(re_time)
        geo_string = source['geo']
        geo_list = geo_string.split('/t')
        if len(geo_list) >= 3:
            geo = '/t'.join(geo_list[-2:])
        else:
            geo = geo_string
        detail.append(geo)
        detail.append(source['text'])
        date = date.replace('-', '')
        mid = source['mid']
        try:
            weibo_bci = es.get(index=date, doc_type='bci', id=uid)['_source']
        except:
            weibo_bci = {}
        retweeted_number = 0
        comment_number = 0
        if source.get('sensitive', 0):
            if int(source['message_type']) == 1:
                if weibo_bci:
                    if weibo_bci.get('s_origin_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(weibo_bci['s_origin_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('s_origin_weibo_comment_detail', {}):
                        comment_detail = json.loads(weibo_bci['s_origin_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            elif int(source['message_type']) == 2:
                if weibo_bci:
                    if weibo_bci.get('s_retweeted_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(weibo_bci['s_retweeted_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('s_retweetd_weibo_comment_detail', {}):
                        comment_detail = json.loads(weibo_bci['s_retweeted_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            else:
                pass
        else:
            if int(source['message_type']) == 1:
                if weibo_bci:
                    if weibo_bci.get('origin_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(weibo_bci['origin_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('origin_weibo_comment_detail', {}):
                        comment_detail = json.loads(weibo_bci['origin_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            elif int(source['message_type']) == 2:
                if weibo_bci:
                    if weibo_bci.get('retweeted_weibo_retweeted_detail', {}):
                        retweeted_detail = json.loads(weibo_bci['retweeted_weibo_retweeted_detail'])
                    else:
                        retweeted_detail = {}
                    retweeted_number = retweeted_detail.get(mid, 0)
                    if weibo_bci.get('retweetd_weibo_comment_detail', {}):
                        comment_detail = json.loads(weibo_bci['retweeted_weibo_comment_detail'])
                    else:
                        comment_detail = {}
                    comment_number = comment_detail.get(mid, 0)
            else:
                pass
        detail.append(retweeted_number)
        detail.append(comment_number)
        result.append(detail)

    return result
Ejemplo n.º 53
0
def ajax_sort_sensitive_words():
    level_order = request.args.get('level',
                                   '')  # 0:all, 1:level 1, 2:level2, 3:level3
    category_order = request.args.get('category', '')  # '': all
    uid = request.args.get('uid', '')
    words_dict = es.get(index='sensitive_user_portrait',
                        doc_type='user',
                        id=uid)['_source']['sensitive_words_dict']
    words_dict = json.loads(words_dict)
    all_words_dict = dict()
    for v in words_dict.values():
        for key in v:
            if all_words_dict.has_key(key):
                all_words_dict[key] += v[key]
            else:
                all_words_dict[key] = v[key]
    sorted_words = sorted(all_words_dict.items(),
                          key=lambda x: x[1],
                          reverse=True)
    new_words_list = sort_sensitive_words(sorted_words)
    print new_words_list
    if 1:
        level_1 = []
        level_2 = []
        level_3 = []
        for item in new_words_list:
            if int(item[2]) == 1:
                if not category_order:
                    level_1.append(item)
                else:
                    if item[3] == category_order:
                        level_1.append(item)
                    else:
                        pass
            elif int(item[2]) == 2:
                if not category_order:
                    level_2.append(item)
                else:
                    if item[3] == category_order:
                        level_2.append(item)
                    else:
                        pass
            elif int(item[2]) == 3:
                if not category_order:
                    level_3.append(item)
                else:
                    if item[3] == category_order:
                        level_3.append(item)
                    else:
                        pass
    new_list = []
    if int(level_order) == 0:
        if not category_order:
            return json.dumps(new_words_list)
        else:
            new_list.extend(level_1)
            new_list.extend(level_2)
            new_list.extend(level_3)
    elif int(level_order) == 1:
        new_list = level_1
    elif int(level_order) == 2:
        new_list = level_2
    else:
        new_list = level_3

    return json.dumps(new_list)