def get_group_list(task_name, submit_user):
    results = []
    query_body = {
        "query": {"bool": {"must": [{"term": {"task_name": task_name}}, {"term": {"submit_user": submit_user}}]}}
    }
    es_results = es_group_result.search(index=group_index_name, doc_type=group_index_type, body=query_body)["hits"][
        "hits"
    ][0]["_source"]

    uid_list = es_results["uid_list"]
    user_portrait_attribute = es_user_portrait.mget(
        index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}
    )["docs"]
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item["_id"]
        try:
            source = item["_source"]
            uname = source["uname"]
            gender = source["gender"]
            location = source["location"]
            importance = source["importance"]
            normal_importance = math.log(importance / evaluate_max["importance"] * 9 + 1, 10) * 100
            influence = source["influence"]
            normal_influence = math.log(influence / evaluate_max["influence"] * 9 + 1, 10) * 100
            results.append([uid, uname, gender, location, normal_importance, normal_influence])
        except:
            results.append([uid, "", "", "", "", ""])
    return results
def show_detect_task(submit_user):
    results = []
    query = [{
        'match': {
            'task_type': 'detect'
        }
    }, {
        'term': {
            'submit_user': submit_user
        }
    }]
    try:
        search_results = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
                body={'query':{'bool':{'must':query}}, 'sort':[{'submit_date': 'desc'}], 'size':MAX_VALUE})['hits']['hits']
    except:
        search_results = []
    for group_item in search_results:
        source = group_item['_source']
        task_name = source['task_name']
        submit_date = ts2datetime(int(source['submit_date']))
        submit_user = source['submit_user']
        detect_type = source['detect_type']
        state = source['state']
        process = source['detect_process']
        results.append(
            [task_name, submit_user, submit_date, detect_type, state, process])

    return results
def search_detect_task(task_name, submit_date, state, process, detect_type, submit_user):
    results = []
    query = [{'match':{'task_type': 'detect'}}]
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard':{'task_name': '*'+item+'*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_from = submit_date_ts
        submit_date_to = submit_date_ts + DAY
        query.append({'range':{'submit_date':{'gte':submit_date_from, 'lt':submit_date_to}}})
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard':{'state': '*'+item+'*'}})
            condition_num += 1
    if process:
        query.append({'range':{'detect_process':{'from': int(process), 'to': MAX_PROCESS}}})
        condition_num += 1
    if detect_type:
        
        detect_type_list = detect_type.split(',')
        nest_body_list = []
        for type_item in detect_type_list:
            nest_body_list.append({'wildcard':{'detect_type': '*'+type_item+'*'}})
        query.append({'bool':{'should': nest_body_list}})
        
        condition_num += 1
    if submit_user:
        query.append({'wildcard':{'submit_user': '******'+submit_user+'*'}})
        condition_num += 1
    try:
        search_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
                    body={'query':{'bool': {'must': query}}, 'sort':[{'submit_date': {'order': 'desc'}}], 'size':MAX_VALUE})['hits']['hits']
    except:
        search_result = []
    #get group information table
    for group_item in search_result:
        source = group_item['_source']
        task_name = source['task_name']
        submit_date = ts2datetime(int(source['submit_date']))
        submit_user = source['submit_user']
        detect_type = source['detect_type']
        state = source['state']
        process = source['detect_process']

        results.append([task_name, submit_user, submit_date, detect_type, state, process])
        
    return results
Exemple #4
0
def get_group_list(task_name, submit_user):
    results = []
    query_body = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "task_name": task_name
                    }
                }, {
                    "term": {
                        "submit_user": submit_user
                    }
                }]
            }
        }
    }
    es_results = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
        body=query_body)["hits"]["hits"][0]["_source"]

    uid_list = es_results['uid_list']
    user_portrait_attribute = es_user_portrait.mget(
        index=portrait_index_name,
        doc_type=portrait_index_type,
        body={'ids': uid_list})['docs']
    evaluate_max = get_evaluate_max()
    for item in user_portrait_attribute:
        uid = item['_id']
        try:
            source = item['_source']
            uname = source['uname']
            gender = source['gender']
            location = source['location']
            importance = source['importance']
            normal_importance = math.log(
                importance / evaluate_max['importance'] * 9 + 1, 10) * 100
            influence = source['influence']
            normal_influence = math.log(
                influence / evaluate_max['influence'] * 9 + 1, 10) * 100
            results.append([
                uid, uname, gender, location, normal_importance,
                normal_influence
            ])
        except:
            results.append([uid, '', '', '', '', ''])
    return results
def show_detect_task(submit_user):
    results = []
    query = [{'match':{'task_type': 'detect'}}, {'term': {'submit_user': submit_user}}]
    try:
        search_results = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
                body={'query':{'bool':{'must':query}}, 'sort':[{'submit_date': 'desc'}], 'size':MAX_VALUE})['hits']['hits']
    except:
        search_results = []
    for group_item in search_results:
        source = group_item['_source']
        task_name = source['task_name']
        submit_date = ts2datetime(int(source['submit_date']))
        submit_user = source['submit_user']
        detect_type = source['detect_type']
        state = source['state']
        process = source['detect_process']
        results.append([task_name, submit_user, submit_date, detect_type, state, process])

    return results
def search_group_results(task_name, submit_user, module):
    result = {}
    # step1:identify the task_name exist
    query_body = {
        "query": {"bool": {"must": [{"term": {"task_name": task_name}}, {"term": {"submit_user": submit_user}}]}}
    }
    # try:
    source = es_group_result.search(index=group_index_name, doc_type=group_index_type, body=query_body)["hits"]["hits"][
        0
    ]["_source"]

    # except:
    #    return 'group task is not exist'

    # step2: identify the task status=1(analysis completed)
    status = source["status"]
    if status != 1:
        return "group task is not completed"
    # step3:get module result
    if module == "overview":
        result["task_name"] = source["task_name"]
        result["submit_date"] = ts2datetime(source["submit_date"])
        result["state"] = source["state"]
        result["submit_user"] = source["submit_user"]
        result["density_star"] = source["density_star"]
        result["activeness_star"] = source["activeness_star"]
        result["influence_star"] = source["influence_star"]
        result["importance_star"] = source["importance_star"]
        result["tag_vector"] = json.loads(source["tag_vector"])
        result["count"] = source["count"]
    elif module == "basic":
        result["politics"] = json.loads(source["politics"])
        result["domain"] = json.loads(source["domain"])
        result["topic"] = json.loads(source["topic"])
        result["keywords"] = json.loads(source["keywords"])
        result["sensitive_hashtag"] = json.loads(source["sensitive_hashtag"])
        result["hashtag"] = json.loads(source["hashtag"])
        result["sensitive_words"] = json.loads(source["sensitive_words"])
        result["influence_his"] = json.loads(source["influence_his"])
        result["sensitive_his"] = json.loads(source["sensitive_his"])
    elif module == "activity":
        result["activity_trend"] = json.loads(source["activity_trend"])
        result["activity_time"] = json.loads(source["activity_time"])
        result["activeness_trend"] = json.loads(source["activeness"])
        result["activeness_his"] = json.loads(source["activeness_his"])
        result["activeness_description"] = source["activeness_description"]
        result["online_pattern"] = json.loads(source["online_pattern"])
    elif module == "geo":
        # result['activity_geo_disribution'] = json.loads(source['activity_geo_distribution'])
        new_activity_geo_distribution = deal_geo_distribution(json.loads(source["activity_geo_distribution"]))
        result["activity_geo_disribution"] = new_activity_geo_distribution
        result["activiy_geo_vary"] = json.loads(source["activity_geo_vary"])
    elif module == "preference":
        result["sentiment_word"] = json.loads(source["sentiment_word"])
    elif module == "influence":
        result["influence_trend"] = json.loads(source["influence"])
        result["influence_in_user"] = json.loads(source["influence_in_user"])
        result["influence_out_user"] = json.loads(source["influence_out_user"])
    elif module == "social":
        result["in_density"] = source["in_density"]
        result["in_inter_user_ratio"] = source["in_inter_user_ratio"]
        result["in_inter_weibo_ratio"] = source["in_inter_weibo_ratio"]
        result["social_in_record"] = json.loads(source["social_in_record"])
        result["out_inter_user_ratio"] = source["out_inter_user_ratio"]
        result["out_inter_weibo_ratio"] = source["out_inter_weibo_ratio"]
        result["social_out_record"] = json.loads(source["social_out_record"])
        result["density_description"] = source["density_description"]
        result["mention"] = source["mention"]
    elif module == "think":
        result["sentiment_trend"] = json.loads(source["sentiment_trend"])
        result["sentiment_pie"] = json.loads(source["sentiment_pie"])
        result["character"] = json.loads(source["character"])
    return result
def search_task(task_name, submit_user, submit_date, state, status):
    results = []
    # query = [{"term": {"submit_user": submit_user}}]
    query = []
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(" ")
        for item in task_name_list:
            query.append({"wildcard": {"task_name": "*" + item + "*"}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_start = submit_date_ts
        submit_date_end = submit_date_ts + DAY
        query.append({"range": {"submit_date": {"gte": submit_date_start, "lt": submit_date_end}}})
        condition_num += 1
    if state:
        state_list = state.split(" ")
        for item in state_list:
            query.append({"wildcard": {"state": "*" + item + "*"}})
            condition_num += 1
    if status:
        query.append({"match": {"status": status}})
        condition_num += 1
    if condition_num > 0:
        query.append({"term": {"task_type": "analysis"}})
        try:
            source = es_group_result.search(
                index=group_index_name,
                doc_type=group_index_type,
                body={"query": {"bool": {"must": query}}, "sort": [{"count": {"order": "desc"}}], "size": MAX_VALUE},
            )
        except Exception as e:
            raise e
    else:
        query.append({"term": {"task_type": "analysis"}})
        source = es.search(
            index=group_index_name,
            doc_type=group_index_type,
            body={"query": {"bool": {"must": query}}, "sort": [{"count": {"order": "desc"}}], "size": MAX_VALUE},
        )

    try:
        task_dict_list = source["hits"]["hits"]
    except:
        return None
    print "step yes"
    result = []
    for task_dict in task_dict_list:
        try:
            state = task_dict["_source"]["state"]
        except:
            state = ""
        try:
            status = task_dict["_source"]["status"]
        except:
            status = 0
        result.append(
            [
                task_dict["_source"]["task_name"],
                task_dict["_source"]["submit_date"],
                task_dict["_source"]["count"],
                state,
                status,
            ]
        )

    return result
def search_detect_task(task_name, submit_date, state, process, detect_type,
                       submit_user):
    results = []
    query = [{'match': {'task_type': 'detect'}}]
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard': {'task_name': '*' + item + '*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_from = submit_date_ts
        submit_date_to = submit_date_ts + DAY
        query.append({
            'range': {
                'submit_date': {
                    'gte': submit_date_from,
                    'lt': submit_date_to
                }
            }
        })
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard': {'state': '*' + item + '*'}})
            condition_num += 1
    if process:
        query.append({
            'range': {
                'detect_process': {
                    'from': int(process),
                    'to': MAX_PROCESS
                }
            }
        })
        condition_num += 1
    if detect_type:

        detect_type_list = detect_type.split(',')
        nest_body_list = []
        for type_item in detect_type_list:
            nest_body_list.append(
                {'wildcard': {
                    'detect_type': '*' + type_item + '*'
                }})
        query.append({'bool': {'should': nest_body_list}})

        condition_num += 1
    if submit_user:
        query.append({'wildcard': {'submit_user': '******' + submit_user + '*'}})
        condition_num += 1
    try:
        search_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
                    body={'query':{'bool': {'must': query}}, 'sort':[{'submit_date': {'order': 'desc'}}], 'size':MAX_VALUE})['hits']['hits']
    except:
        search_result = []
    #get group information table
    for group_item in search_result:
        source = group_item['_source']
        task_name = source['task_name']
        submit_date = ts2datetime(int(source['submit_date']))
        submit_user = source['submit_user']
        detect_type = source['detect_type']
        state = source['state']
        process = source['detect_process']

        results.append(
            [task_name, submit_user, submit_date, detect_type, state, process])

    return results
Exemple #9
0
def search_group_results(task_name, submit_user, module):
    result = {}
    #step1:identify the task_name exist
    query_body = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "task_name": task_name
                    }
                }, {
                    "term": {
                        "submit_user": submit_user
                    }
                }]
            }
        }
    }
    #try:
    source = es_group_result.search(index=group_index_name, doc_type=group_index_type, \
        body=query_body)["hits"]["hits"][0]["_source"]

    #except:
    #    return 'group task is not exist'

    #step2: identify the task status=1(analysis completed)
    status = source['status']
    if status != 1:
        return 'group task is not completed'
    #step3:get module result
    if module == 'overview':
        result['task_name'] = source['task_name']
        result['submit_date'] = ts2datetime(source['submit_date'])
        result['state'] = source['state']
        result['submit_user'] = source['submit_user']
        result['density_star'] = source['density_star']
        result['activeness_star'] = source['activeness_star']
        result['influence_star'] = source['influence_star']
        result['importance_star'] = source['importance_star']
        result['tag_vector'] = json.loads(source['tag_vector'])
        result['count'] = source['count']
    elif module == 'basic':
        result['politics'] = json.loads(source['politics'])
        result['domain'] = json.loads(source['domain'])
        result['topic'] = json.loads(source['topic'])
        result['keywords'] = json.loads(source['keywords'])
        result['sensitive_hashtag'] = json.loads(source['sensitive_hashtag'])
        result['hashtag'] = json.loads(source['hashtag'])
        result['sensitive_words'] = json.loads(source['sensitive_words'])
        result['influence_his'] = json.loads(source['influence_his'])
        result['sensitive_his'] = json.loads(source['sensitive_his'])
    elif module == 'activity':
        result['activity_trend'] = json.loads(source['activity_trend'])
        result['activity_time'] = json.loads(source['activity_time'])
        result['activeness_trend'] = json.loads(source['activeness'])
        result['activeness_his'] = json.loads(source['activeness_his'])
        result['activeness_description'] = source['activeness_description']
        result['online_pattern'] = json.loads(source['online_pattern'])
    elif module == 'geo':
        #result['activity_geo_disribution'] = json.loads(source['activity_geo_distribution'])
        new_activity_geo_distribution = deal_geo_distribution(
            json.loads(source['activity_geo_distribution']))
        result['activity_geo_disribution'] = new_activity_geo_distribution
        result['activiy_geo_vary'] = json.loads(source['activity_geo_vary'])
    elif module == 'preference':
        result['sentiment_word'] = json.loads(source['sentiment_word'])
    elif module == 'influence':
        result['influence_trend'] = json.loads(source['influence'])
        result['influence_in_user'] = json.loads(source['influence_in_user'])
        result['influence_out_user'] = json.loads(source['influence_out_user'])
    elif module == 'social':
        result['in_density'] = source['in_density']
        result['in_inter_user_ratio'] = source['in_inter_user_ratio']
        result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio']
        result['social_in_record'] = json.loads(source['social_in_record'])
        result['out_inter_user_ratio'] = source['out_inter_user_ratio']
        result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio']
        result['social_out_record'] = json.loads(source['social_out_record'])
        result['density_description'] = source['density_description']
        result['mention'] = source['mention']
    elif module == 'think':
        result['sentiment_trend'] = json.loads(source['sentiment_trend'])
        result['sentiment_pie'] = json.loads(source['sentiment_pie'])
        result['character'] = json.loads(source['character'])
    return result
Exemple #10
0
def search_task(task_name, submit_user, submit_date, state, status):
    results = []
    #query = [{"term": {"submit_user": submit_user}}]
    query = []
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard': {'task_name': '*' + item + '*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_start = submit_date_ts
        submit_date_end = submit_date_ts + DAY
        query.append({
            'range': {
                'submit_date': {
                    'gte': submit_date_start,
                    'lt': submit_date_end
                }
            }
        })
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard': {'state': '*' + item + '*'}})
            condition_num += 1
    if status:
        query.append({'match': {'status': status}})
        condition_num += 1
    if condition_num > 0:
        query.append({'term': {'task_type': 'analysis'}})
        try:
            source = es_group_result.search(index=group_index_name,
                                            doc_type=group_index_type,
                                            body={
                                                'query': {
                                                    'bool': {
                                                        'must': query
                                                    }
                                                },
                                                'sort': [{
                                                    'count': {
                                                        'order': 'desc'
                                                    }
                                                }],
                                                'size':
                                                MAX_VALUE
                                            })
        except Exception as e:
            raise e
    else:
        query.append({'term': {'task_type': 'analysis'}})
        source = es.search(index=group_index_name,
                           doc_type=group_index_type,
                           body={
                               'query': {
                                   'bool': {
                                       'must': query
                                   }
                               },
                               'sort': [{
                                   'count': {
                                       'order': 'desc'
                                   }
                               }],
                               'size': MAX_VALUE
                           })

    try:
        task_dict_list = source['hits']['hits']
    except:
        return None
    print 'step yes'
    result = []
    for task_dict in task_dict_list:
        try:
            state = task_dict['_source']['state']
        except:
            state = ''
        try:
            status = task_dict['_source']['status']
        except:
            status = 0
        result.append([
            task_dict['_source']['task_name'],
            task_dict['_source']['submit_date'], task_dict['_source']['count'],
            state, status
        ])

    return result