def statistics_influence_people(uid, date, style):
    # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution
    results = {}  # retwweted weibo people and comment weibo people
    date1 = str(date).replace("-", "")
    index_name = pre_index + date1
    index_flow_text = pre_text_index + date

    try:
        bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
    except:
        bci_result = []
        return results
    origin_retweeted_mid = []  # origin weibo mid
    retweeted_retweeted_mid = []  # retweeted weibo mid
    origin_comment_mid = []
    retweeted_comment_mid = []
    origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"])
    retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"])
    origin_comment = json.loads(bci_result["origin_weibo_comment_detail"])
    retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"])
    retweeted_total_number = sum(origin_retweeted.values()) + sum(retweeted_retweeted.values())
    comment_total_number = sum(origin_comment.values()) + sum(retweeted_comment.values())
    if origin_retweeted:
        origin_retweeted_mid = filter_mid(origin_retweeted)
    if retweeted_retweeted:
        retweeted_retweeted_mid = filter_mid(retweeted_retweeted)
    if origin_comment:
        origin_comment_mid = filter_mid(origin_comment)
    if retweeted_comment:
        retweeted_comment_mid = filter_mid(retweeted_comment)

    query_body = {"query": {"filtered": {"filter": {"bool": {"should": [], "must": []}}}}, "size": 10000}

    if int(style) == 0:  # retweeted
        retweeted_origin = []
        if retweeted_retweeted_mid:
            text_result = es.mget(
                index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_retweeted_mid}
            )["docs"]
            for item in text_result:
                mid = item.get("source", {}).get("root_mid", "0")
                retweeted_origin.append(mid)
        retweeted_results = influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_origin, 3)
        retweeted_results["total_number"] = retweeted_total_number
        results = retweeted_results
    else:
        retweeted_origin = []
        if retweeted_comment_mid:
            text_result = es.mget(
                index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_comment_mid}
            )["docs"]
            for item in text_result:
                mid = item.get("source", {}).get("root_mid", "0")
                retweeted_origin.append(mid)
        comment_results = influenced_user_detail(uid, date, origin_comment_mid, retweeted_origin, 2)
        comment_results["total_number"] = comment_total_number
        results = comment_results

    return results
def get_text(top_list, date, style):

# input: [[mid1, no.1], [mid2, no.2], ['mid3', no.3]]
# output: [[text1, no.1], [text2, no.2], [text3, no.3]]
# mid, retweeted, comment, text, geo, timestamp, sentiment, mid_url
    results = []
    index_flow_text = pre_text_index + date
    #index_list = get_text_index(date)
    if len(top_list) != 0: # no one
        mid_list = []
        for item in top_list:
            mid_list.append(item[0])
	search_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids":mid_list})["docs"]
        for i in range(len(top_list)):
            temp = []
            temp.extend(top_list[i])
            if search_result[i]['found']:
                source = search_result[i]['_source']
                temp.append(source["text"])
                temp.append(source["geo"])
                temp.append(ts2date(source["timestamp"]))
                temp.append(source["sentiment"])
                temp.append(weiboinfo2url(source['uid'], source['mid']))
                temp.append(uid_url+source['uid'])
                temp.append(source['uid'])
                try:
                    uname = es_profile.get(index=profile_index_name, doc_type=profile_index_type, id=source['uid'])["_source"]["nick_name"]
                    temp.append(uname)
                except:
                    temp.append("unknown")
            else:
                temp.extend(["", "", "", "", "", "", "", ""])
            results.append(temp)
    return results
def get_text(top_list, date, user_info, style):

    # input: [[mid1, no.1], [mid2, no.2], ['mid3', no.3]]
    # output: [[text1, no.1], [text2, no.2], [text3, no.3]]
    # mid, retweeted, comment, text, geo, timestamp, sentiment, mid_url
    results = []
    detail_list = [
        "origin_weibo_retweeted_detail",
        "origin_weibo_comment_detail",
        "retweeted_weibo_retweeted_detail",
        "retweeted_weibo_comment_detail",
    ]
    index_flow_text = pre_text_index + date
    if len(top_list) != 0:  # no one
        mid_list = []
        for i in range(len(top_list)):
            mid_list.append(top_list[i][0])
        search_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids": mid_list})["docs"]
        for i in range(len(top_list)):
            temp = []
            temp.append(mid_list[i])
            if int(style) == 0:
                temp.append(top_list[i][1])
                temp.append(json.loads(user_info[detail_list[1]]).get(top_list[i][0], 0))
            elif int(style) == 1:
                temp.append(json.loads(user_info[detail_list[0]]).get(top_list[i][0], 0))
                temp.append(top_list[i][1])
            elif int(style) == 2:
                temp.append(top_list[i][1])
                temp.append(json.loads(user_info[detail_list[3]]).get(top_list[i][0], 0))
            else:
                temp.append(json.loads(user_info[detail_list[2]]).get(top_list[i][0], 0))
                temp.append(top_list[i][1])
            if search_result[i]["found"]:
                source = search_result[i]["_source"]
                temp.append(source["text"])
                temp.append(source["geo"])
                temp.append(ts2date(source["timestamp"]))
                temp.append(source["sentiment"])
                temp.append(weiboinfo2url(source["uid"], source["mid"]))
                temp.append(uid_url + source["uid"])
                temp.append(source["uid"])
                try:
                    uname = es_profile.get(index=profile_index_name, doc_type=profile_index_type, id=source["uid"])[
                        "_source"
                    ]["nick_name"]
                    temp.append(uname)
                except:
                    temp.append("unknown")
            else:
                temp.extend(["", "", "", "", "", "", "", ""])
            results.append(temp)
    return results
Exemple #4
0
def get_text(top_list, date, user_info, style):

# input: [[mid1, no.1], [mid2, no.2], ['mid3', no.3]]
# output: [[text1, no.1], [text2, no.2], [text3, no.3]]
# mid, retweeted, comment, text, geo, timestamp, sentiment, mid_url
    results = []
    detail_list = ["origin_weibo_retweeted_detail", "origin_weibo_comment_detail", "retweeted_weibo_retweeted_detail", "retweeted_weibo_comment_detail"]
    index_flow_text = pre_text_index + date
    if len(top_list) != 0: # no one
        mid_list = []
        for i in range(len(top_list)):
            mid_list.append(top_list[i][0])
        search_result = es.mget(index=index_flow_text, doc_type=flow_text_index_type, body={"ids":mid_list})["docs"]
        for i in range(len(top_list)):
            temp = []
            temp.append(mid_list[i])
            if int(style) == 0:
                temp.append(top_list[i][1])
                temp.append(json.loads(user_info[detail_list[1]]).get(top_list[i][0], 0))
            elif int(style) == 1:
                temp.append(json.loads(user_info[detail_list[0]]).get(top_list[i][0], 0))
                temp.append(top_list[i][1])
            elif int(style) == 2:
                temp.append(top_list[i][1])
                temp.append(json.loads(user_info[detail_list[3]]).get(top_list[i][0], 0))
            else:
                temp.append(json.loads(user_info[detail_list[2]]).get(top_list[i][0], 0))
                temp.append(top_list[i][1])
            if search_result[i]["found"]:
                source = search_result[i]["_source"]
                temp.append(source["text"])
                temp.append(source["geo"])
                temp.append(ts2date(source["timestamp"]))
                temp.append(source["sentiment"])
                temp.append(weiboinfo2url(source['uid'], source['mid']))
                temp.append(uid_url+source['uid'])
                temp.append(source['uid'])
                try:
                    uname = es_profile.get(index=profile_index_name, doc_type=profile_index_type, id=source['uid'])["_source"]["nick_name"]
                    temp.append(uname)
                except:
                    temp.append("unknown")
            else:
                temp.extend(["", "", "", "", "", "", "", ""])
            results.append(temp)
    return results
Exemple #5
0
def statistics_influence_people(uid, date, style):
    # output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution
    results = {}  # retwweted weibo people and comment weibo people
    date1 = str(date).replace('-', '')
    index_name = pre_index + date1
    index_flow_text = pre_text_index + date

    try:
        bci_result = es_cluster.get(index=index_name,
                                    doc_type=influence_doctype,
                                    id=uid)["_source"]
    except:
        bci_result = []
        return results
    origin_retweeted_mid = []  # origin weibo mid
    retweeted_retweeted_mid = []  # retweeted weibo mid
    origin_comment_mid = []
    retweeted_comment_mid = []
    origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"])
    retweeted_retweeted = json.loads(
        bci_result["retweeted_weibo_retweeted_detail"])
    origin_comment = json.loads(bci_result["origin_weibo_comment_detail"])
    retweeted_comment = json.loads(
        bci_result["retweeted_weibo_comment_detail"])
    retweeted_total_number = sum(origin_retweeted.values()) + sum(
        retweeted_retweeted.values())
    comment_total_number = sum(origin_comment.values()) + sum(
        retweeted_comment.values())
    if origin_retweeted:
        origin_retweeted_mid = filter_mid(origin_retweeted)
    if retweeted_retweeted:
        retweeted_retweeted_mid = filter_mid(retweeted_retweeted)
    if origin_comment:
        origin_comment_mid = filter_mid(origin_comment)
    if retweeted_comment:
        retweeted_comment_mid = filter_mid(retweeted_comment)

    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "should": [],
                        "must": []
                    }
                }
            }
        },
        "size": 10000
    }

    if int(style) == 0:  # retweeted
        retweeted_origin = []
        if retweeted_retweeted_mid:
            text_result = es.mget(index=index_flow_text,
                                  doc_type=flow_text_index_type,
                                  body={"ids":
                                        retweeted_retweeted_mid})["docs"]
            for item in text_result:
                mid = item.get("source", {}).get("root_mid", '0')
                retweeted_origin.append(mid)
        retweeted_results = influenced_user_detail(uid, date,
                                                   origin_retweeted_mid,
                                                   retweeted_origin, 3)
        retweeted_results["total_number"] = retweeted_total_number
        results = retweeted_results
    else:
        retweeted_origin = []
        if retweeted_comment_mid:
            text_result = es.mget(index=index_flow_text,
                                  doc_type=flow_text_index_type,
                                  body={"ids": retweeted_comment_mid})["docs"]
            for item in text_result:
                mid = item.get("source", {}).get("root_mid", '0')
                retweeted_origin.append(mid)
        comment_results = influenced_user_detail(uid, date, origin_comment_mid,
                                                 retweeted_origin, 2)
        comment_results["total_number"] = comment_total_number
        results = comment_results

    return results