コード例 #1
0
def submit_sensing(input_dict):
    status = True
    #step1: identify the task name is valid
    task_name = input_dict['task_information']['task_name']
    task_id = input_dict['task_information']['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'

    #step2: save to compute es
    submit_date = int(time.time())
    input_dict['task_information']['submit_date'] = submit_date
    input_dict['task_information']['count'] = len(input_dict['task_information']['uid_list'])
    input_dict['task_information']['state'] = input_dict['task_information']['state']
    input_dict['task_information']['status'] = 0
    input_dict['task_information']['detect_type'] = 'sensing'
    input_dict['task_information']['task_type'] = input_dict['task_information']['task_type']
    es_status = save_compute2es(input_dict)
    #step3: save to compute redis
    add_dict2redis = input_dict['task_information']
    redis_status = save_compute2redis(add_dict2redis)
    #identify the operation status
    if es_status == True and redis_status ==True:
        status = True
    else:
        status = False
    return status
コード例 #2
0
def save_detect_event_task(input_dict):
    status = True
    #step1:identify the task name is valid----is not in group es
    task_information = input_dict['task_information']
    task_name = task_information['task_name']
    task_id = task_information['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name,
                                                doc_type=group_index_type,
                                                id=task_id)
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'

    #step2:save to es
    es_status = save_detect2es(input_dict)
    #step3:save to redis
    redis_status = save_detect2redis(input_dict)
    #identify the operation status
    if es_status == True and redis_status == True:
        status = True
    else:
        status = False

    return status
コード例 #3
0
def submit_sensing(input_dict):
    status = True
    #step1: identify the task name is valid
    task_name = input_dict['task_information']['task_name']
    task_id = input_dict['task_information']['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name,
                                                doc_type=group_index_type,
                                                id=task_id)
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'

    #step2: save to compute es
    submit_date = int(time.time())
    input_dict['task_information']['submit_date'] = submit_date
    input_dict['task_information']['count'] = len(
        input_dict['task_information']['uid_list'])
    input_dict['task_information']['state'] = input_dict['task_information'][
        'state']
    input_dict['task_information']['status'] = 0
    input_dict['task_information']['detect_type'] = 'sensing'
    input_dict['task_information']['task_type'] = input_dict[
        'task_information']['task_type']
    es_status = save_compute2es(input_dict)
    #step3: save to compute redis
    add_dict2redis = input_dict['task_information']
    redis_status = save_compute2redis(add_dict2redis)
    #identify the operation status
    if es_status == True and redis_status == True:
        status = True
    else:
        status = False
    return status
コード例 #4
0
def save_detect_attribute_task(input_dict):
    status = True
    #step1: identify the detect task name id valid---is not in group es
    task_information = input_dict['task_information']
    task_name = task_information['task_name']
    task_id = task_information['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'
    #print 'input_dict:', input_dict
    
    #step2: save to es
    es_status = save_detect2es(input_dict)
    #step3: save to redis
    redis_status = save_detect2redis(input_dict)
    #identify the operation status
    if es_status==True and redis_status==True:
        status = True
    else:
        status = False
    

    return status
コード例 #5
0
def submit_task(input_data):
    status = 0  # mark it can not submit
    task_name = input_data['task_name']
    submit_user = input_data['submit_user']
    task_id = submit_user + task_name
    try:
        result = es_group_result.get(index=group_index_name,
                                     doc_type=group_index_type,
                                     id=task_id)['_source']
    except:
        status = 1

    if status != 0 and 'uid_file' not in input_data:
        input_data['status'] = 0  # mark the task not compute
        count = len(input_data['uid_list'])
        input_data['count'] = count
        input_data['task_type'] = 'analysis'
        input_data['submit_user'] = '******'
        input_data['detect_type'] = ''
        input_data['detect_process'] = ''
        add_es_dict = {'task_information': input_data, 'query_condition': ''}
        es_group_result.index(index=group_index_name,
                              doc_type=group_index_type,
                              id=task_id,
                              body=input_data)
        r.lpush(group_analysis_queue_name, json.dumps(input_data))

    return status
コード例 #6
0
def get_activity_weibo(task_name,
                       submit_user,
                       start_ts,
                       time_segment=FOUR_HOUR):
    results = []
    #step1: get task_name uid
    task_id = submit_user + task_name
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\
                id=task_id, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step2: get uid2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                body = {'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
        uid2uname[uid] = uname
    #step3: search time_segment weibo
    end_ts = start_ts + time_segment
    time_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + time_date
    query = []
    query.append({'terms': {'uid': uid_list}})
    query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}})
    try:
        flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
                body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits']
    except:
        flow_text_es_result = []
    for item in flow_text_es_result:
        weibo = {}
        source = item['_source']
        weibo['timestamp'] = ts2date(source['timestamp'])
        weibo['ip'] = source['ip']
        weibo['text'] = source['text']
        if source['geo']:
            weibo['geo'] = '\t'.join(source['geo'])
        else:
            weibo['geo'] = ''
        results.append(weibo)

    return results
コード例 #7
0
def show_detect_result(task_id):
    user_result = []
    #step1:identify the task name id exist
    try:
        task_exist_result = es_group_result.get(index=group_index_name,
                                                doc_type=group_index_type,
                                                id=task_id)['_source']
    except:
        task_exist_result = {}
    if task_exist_result == {}:
        return 'task name is not exist'
    #step2:get uid list
    uid_list = json.loads(task_exist_result['uid_list'])
    #step3:get user evaluation information---uid/uname/activeness/importance/influence
    iter_count = 0
    uid_count = len(uid_list)
    while iter_count < uid_count:
        iter_user_list = uid_list[iter_count:iter_count + DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                                                    body={'ids':iter_user_list}, _source=True)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found'] == True:
                source = item['_source']
                uname = source['uname']
                evaluate_max = get_evaluate_max()
                activeness = math.log(
                    source['activeness'] / evaluate_max['activeness'] * 9 + 1,
                    10) * 100
                importance = math.log(
                    source['importance'] / evaluate_max['importance'] * 9 + 1,
                    10) * 100
                influence = math.log(
                    source['influence'] / evaluate_max['influence'] * 9 + 1,
                    10) * 100

            else:
                uname = u'未知'
                activeness = u'未知'
                importance = u'未知'
                influence = u'未知'
            user_result.append([uid, uname, activeness, importance, influence])
        iter_count += DETECT_ITER_COUNT
    sort_user_result = sorted(user_result, key=lambda x: x[4], reverse=True)

    return sort_user_result
コード例 #8
0
def save_detect_multi_task(input_dict, extend_mark):
    results = {}
    task_information_dict = input_dict['task_information']
    input_uid_list = task_information_dict['uid_list']
    #step1: identify user is in user_portrait and not in user_portrait
    in_user_list, out_user_list = identify_user_out(input_uid_list)
    input_dict['task_information']['uid_list'] = in_user_list
    print 'step1'
    #step2: identify task name is valid
    task_name = input_dict['task_information']['task_name']
    task_id = input_dict['task_information']['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name,
                                                doc_type=group_index_type,
                                                id=task_id)['_source']
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'
    print 'step2'
    #step3: identify whether or not to extend----extend mark
    if extend_mark == '1':
        print 'step3 save'
        es_status = save_detect2es(input_dict)
        redis_status = save_detect2redis(input_dict)  # detect redis queue
    elif extend_mark == '0':
        uid_list = input_dict['task_information']['uid_list']
        input_dict['task_information']['uid_list'] = uid_list
        input_dict['task_information']['status'] = 0
        print 'uid_list:', len(uid_list), uid_list, type(uid_list)
        input_dict['task_information']['count'] = len(uid_list)
        print 'step3 save'
        es_status = save_compute2es(input_dict)
        add_redis_dict = input_dict['task_information']
        redis_status = save_compute2redis(
            add_redis_dict)  # compute redis queue
    #identify the operation status
    if es_status == True and redis_status == True:
        status = True
    else:
        status = False

    return status, out_user_list
コード例 #9
0
def detect2analysis(input_data):
    results = {}
    status = True
    task_name = input_data['task_name']
    submit_user = input_data['submit_user']
    task_id = submit_user + task_name
    uid_list = input_data['uid_list']
    #step1: identify the task is exist
    try:
        task_exist_result = es_group_result.get(index=group_index_name,
                                                doc_type=group_index_type,
                                                id=task_id)['_source']
    except:
        task_exist_result = {}
    if task_exist_result == {}:
        return 'task name is not exsit'
    #step2: update task uid list
    task_exist_result['uid_list'] = uid_list
    #step3: update task_type in es
    task_exist_result['status'] = 0  # mark the compute status
    task_exist_result['count'] = len(uid_list)
    task_exist_result['task_type'] = 'analysis'
    #get task information dict
    task_information_dict = {'task_id': task_id, 'task_name':task_name, 'uid_list':uid_list, 'status':0, 'count':len(uid_list),\
            'task_type':'analysis', 'submit_user':task_exist_result['submit_user'], 'submit_date':task_exist_result['submit_date'], \
            'detect_type':task_exist_result['detect_type'], 'detect_process':task_exist_result['detect_process'], \
            'state': task_exist_result['state']}

    add_es_dict = {
        'task_information': task_information_dict,
        'query_condition': task_exist_result['query_condition']
    }
    es_status = save_compute2es(add_es_dict)
    #step4: add task to analysis queue
    redis_status = save_compute2redis(task_exist_result)
    #identify the operation status
    if es_status == True and redis_status == True:
        status = True
    else:
        status = False

    return status
コード例 #10
0
def show_detect_result(task_id):
    user_result = []
    #step1:identify the task name id exist
    try:
        task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
    except:
        task_exist_result = {}
    if task_exist_result == {}:
        return 'task name is not exist'
    #step2:get uid list
    uid_list = json.loads(task_exist_result['uid_list'])
    #step3:get user evaluation information---uid/uname/activeness/importance/influence
    iter_count = 0
    uid_count = len(uid_list)
    while iter_count < uid_count:
        iter_user_list = uid_list[iter_count: iter_count+DETECT_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
                                                    body={'ids':iter_user_list}, _source=True)['docs']
        except:
            portrait_result = []
        for item in portrait_result:
            uid = item['_id']
            if item['found']==True:
                source = item['_source']
                uname = source['uname']
                evaluate_max = get_evaluate_max()
                activeness = math.log(source['activeness']/evaluate_max['activeness'] * 9 + 1 ,10)*100
                importance = math.log(source['importance']/evaluate_max['importance'] * 9 + 1 ,10)*100
                influence = math.log(source['influence']/evaluate_max['influence'] * 9 + 1 ,10)*100
            
            else:
                uname = u'未知'
                activeness = u'未知'
                importance = u'未知'
                influence = u'未知'
            user_result.append([uid, uname, activeness, importance, influence])
        iter_count += DETECT_ITER_COUNT
    sort_user_result = sorted(user_result, key=lambda x:x[4], reverse=True)
  
    return sort_user_result
コード例 #11
0
def save_detect_multi_task(input_dict, extend_mark):
    results = {}
    task_information_dict = input_dict['task_information']
    input_uid_list = task_information_dict['uid_list']
    #step1: identify user is in user_portrait and not in user_portrait
    in_user_list, out_user_list = identify_user_out(input_uid_list)
    input_dict['task_information']['uid_list'] = in_user_list
    print 'step1'
    #step2: identify task name is valid
    task_name = input_dict['task_information']['task_name']
    task_id = input_dict['task_information']['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'
    print 'step2'
    #step3: identify whether or not to extend----extend mark
    if extend_mark=='1':
        print 'step3 save'
        es_status = save_detect2es(input_dict)
        redis_status = save_detect2redis(input_dict) # detect redis queue
    elif extend_mark=='0':
        uid_list = input_dict['task_information']['uid_list']
        input_dict['task_information']['uid_list'] = uid_list
        input_dict['task_information']['status'] = 0
        print 'uid_list:', len(uid_list), uid_list, type(uid_list)
        input_dict['task_information']['count'] = len(uid_list)
        print 'step3 save'
        es_status = save_compute2es(input_dict)
        add_redis_dict = input_dict['task_information']
        redis_status = save_compute2redis(add_redis_dict) # compute redis queue
    #identify the operation status
    if es_status==True and redis_status==True:
        status = True
    else:
        status = False
        
    return status, out_user_list
コード例 #12
0
def submit_task(input_data):
    status = 0  # mark it can not submit
    task_name = input_data["task_name"]
    submit_user = input_data["submit_user"]
    task_id = submit_user + task_name
    try:
        result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)["_source"]
    except:
        status = 1

    if status != 0 and "uid_file" not in input_data:
        input_data["status"] = 0  # mark the task not compute
        count = len(input_data["uid_list"])
        input_data["count"] = count
        input_data["task_type"] = "analysis"
        input_data["submit_user"] = "******"
        input_data["detect_type"] = ""
        input_data["detect_process"] = ""
        add_es_dict = {"task_information": input_data, "query_condition": ""}
        es_group_result.index(index=group_index_name, doc_type=group_index_type, id=task_id, body=input_data)
        r.lpush(group_analysis_queue_name, json.dumps(input_data))

    return status
コード例 #13
0
def detect2analysis(input_data):
    results = {}
    status = True
    task_name = input_data['task_name']
    submit_user = input_data['submit_user']
    task_id = submit_user + task_name
    uid_list = input_data['uid_list']
    #step1: identify the task is exist
    try:
        task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source']
    except:
        task_exist_result = {}
    if task_exist_result == {}:
        return 'task name is not exsit'
    #step2: update task uid list
    task_exist_result['uid_list'] = uid_list
    #step3: update task_type in es
    task_exist_result['status'] = 0 # mark the compute status
    task_exist_result['count'] = len(uid_list)
    task_exist_result['task_type'] = 'analysis'
    #get task information dict
    task_information_dict = {'task_id': task_id, 'task_name':task_name, 'uid_list':uid_list, 'status':0, 'count':len(uid_list),\
            'task_type':'analysis', 'submit_user':task_exist_result['submit_user'], 'submit_date':task_exist_result['submit_date'], \
            'detect_type':task_exist_result['detect_type'], 'detect_process':task_exist_result['detect_process'], \
            'state': task_exist_result['state']}
    
    add_es_dict = {'task_information':task_information_dict, 'query_condition':task_exist_result['query_condition']}
    es_status = save_compute2es(add_es_dict)
    #step4: add task to analysis queue
    redis_status = save_compute2redis(task_exist_result)
    #identify the operation status
    if es_status==True and redis_status==True:
        status = True
    else:
        status = False

    return status
コード例 #14
0
def get_group_member_name(task_name, submit_user):
    results = {}
    task_id = submit_user + task_name
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                id=task_id)['_source']
    except:
        return results
    uid_list = group_result['uid_list']
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
                body={'ids':uid_list})['docs']
    except:
        return results
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            source = item['_source']
            uname = source['uname']
        else:
            uname = 'unkown'
        results[uid] = uname

    return results
コード例 #15
0
def get_group_member_name(task_name, submit_user):
    results = {}
    task_id = submit_user + task_name
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)["_source"]
    except:
        return results
    uid_list = group_result["uid_list"]
    try:
        user_portrait_result = es_user_portrait.mget(
            index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}
        )["docs"]
    except:
        return results
    for item in user_portrait_result:
        uid = item["_id"]
        if item["found"] == True:
            source = item["_source"]
            uname = source["uname"]
        else:
            uname = "unkown"
        results[uid] = uname

    return results
コード例 #16
0
        seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
                            body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits']
    except Exception, e:
        raise e
    try:
        seed_user_source = seed_user_result[0]['_source']
    except:
        return 'seed user invalid'

    #step2: identify the detect task name is valid----is not in group es
    task_information = input_dict['task_information']
    task_name = task_information['task_name']
    task_id = task_information['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name,
                                                doc_type=group_index_type,
                                                id=task_id)
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'

    #step3: save to es
    es_status = save_detect2es(input_dict)
    #step4: save to redis queue
    redis_status = save_detect2redis(input_dict)
    #identify the operation status
    if es_status == True and redis_status == True:
        status = True
    else:
        status = False
コード例 #17
0
def search_group_sentiment_weibo(task_name, submit_user, start_ts, sentiment):
    weibo_list = []
    # step1:get task_name uid
    task_id = submit_user + task_name
    try:
        group_result = es_group_result.get(
            index=group_index_name, doc_type=group_index_type, id=task_id, _source=False, fields=["uid_list"]
        )
    except:
        group_result = {}
    if group_result == {}:
        return "task name invalid"
    try:
        uid_list = group_result["fields"]["uid_list"]
    except:
        uid_list = []
    if uid_list == []:
        return "task uid list null"
    # step3: get ui2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(
            index=portrait_index_name,
            doc_type=portrait_index_type,
            body={"ids": uid_list},
            _source=False,
            fields=["uname"],
        )["docs"]
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item["_id"]
        if item["found"] == True:
            uname = item["fields"]["uname"][0]
            uid2uname[uid] = uname
    # step4:iter date to search weibo
    weibo_list = []
    iter_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + str(iter_date)
    # step4: get query_body
    if sentiment != "2":
        query_body = [
            {"terms": {"uid": uid_list}},
            {"term": {"sentiment": sentiment}},
            {"range": {"timestamp": {"gte": start_ts, "lt": start_ts + DAY}}},
        ]
    else:
        query_body = [
            {"terms": {"uid": uid_list}},
            {"terms": {"sentiment": SENTIMENT_SECOND}},
            {"range": {"timestamp": {"gte": start_ts, "lt": start_ts + DAY}}},
        ]
    try:
        flow_text_result = es_flow_text.search(
            index=flow_text_index_name,
            doc_type=flow_text_index_type,
            body={
                "query": {"bool": {"must": query_body}},
                "sort": [{"timestamp": {"order": "asc"}}],
                "size": MAX_VALUE,
            },
        )["hits"]["hits"]
    except:
        flow_text_result = []
    for flow_text_item in flow_text_result:
        source = flow_text_item["_source"]
        weibo = {}
        weibo["uid"] = source["uid"]
        weibo["uname"] = uid2uname[weibo["uid"]]
        weibo["ip"] = source["ip"]
        try:
            weibo["geo"] = "\t".join(source["geo"].split("&"))
        except:
            weibo["geo"] = ""
        weibo["text"] = source["text"]
        weibo["timestamp"] = source["timestamp"]
        weibo["sentiment"] = source["sentiment"]
        weibo_list.append(weibo)

    return weibo_list
コード例 #18
0
def get_activity_weibo(task_name, submit_user, start_ts, time_segment=FOUR_HOUR):
    results = []
    # step1: get task_name uid
    task_id = submit_user + task_name
    try:
        group_result = es_group_result.get(
            index=group_index_name, doc_type=group_index_type, id=task_id, _source=False, fields=["uid_list"]
        )
    except:
        group_result = {}
    if group_result == {}:
        return "task name invalid"
    try:
        uid_list = group_result["fields"]["uid_list"]
    except:
        uid_list = []
    if uid_list == []:
        return "task uid list null"
    # step2: get uid2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(
            index=portrait_index_name,
            doc_type=portrait_index_type,
            body={"ids": uid_list},
            _source=False,
            fields=["uname"],
        )["docs"]
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item["_id"]
        if item["found"] == True:
            uname = item["fields"]["uname"][0]
        uid2uname[uid] = uname
    # step3: search time_segment weibo
    end_ts = start_ts + time_segment
    time_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + time_date
    query = []
    query.append({"terms": {"uid": uid_list}})
    query.append({"range": {"timestamp": {"gte": start_ts, "lt": end_ts}}})
    try:
        flow_text_es_result = es_flow_text.search(
            index=flow_text_index_name,
            doc_type=flow_text_index_type,
            body={"query": {"bool": {"must": query}}, "sort": "timestamp", "size": MAX_VALUE},
        )["hits"]["hits"]
    except:
        flow_text_es_result = []
    for item in flow_text_es_result:
        weibo = {}
        source = item["_source"]
        weibo["timestamp"] = ts2date(source["timestamp"])
        weibo["ip"] = source["ip"]
        weibo["text"] = source["text"]
        if source["geo"]:
            weibo["geo"] = "\t".join(source["geo"])
        else:
            weibo["geo"] = ""
        results.append(weibo)

    return results
コード例 #19
0
def search_group_sentiment_weibo(task_name, submit_user, start_ts, sentiment):
    weibo_list = []
    #step1:get task_name uid
    task_id = submit_user + task_name
    try:
        group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
                        id=task_id, _source=False, fields=['uid_list'])
    except:
        group_result = {}
    if group_result == {}:
        return 'task name invalid'
    try:
        uid_list = group_result['fields']['uid_list']
    except:
        uid_list = []
    if uid_list == []:
        return 'task uid list null'
    #step3: get ui2uname
    uid2uname = {}
    try:
        user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
                        body={'ids':uid_list}, _source=False, fields=['uname'])['docs']
    except:
        user_portrait_result = []
    for item in user_portrait_result:
        uid = item['_id']
        if item['found'] == True:
            uname = item['fields']['uname'][0]
            uid2uname[uid] = uname
    #step4:iter date to search weibo
    weibo_list = []
    iter_date = ts2datetime(start_ts)
    flow_text_index_name = flow_text_index_name_pre + str(iter_date)
    #step4: get query_body
    if sentiment != '2':
        query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \
                {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}]
    else:
        query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\
                {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}]
    try:
        flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
                body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits']
    except:
        flow_text_result = []
    for flow_text_item in flow_text_result:
        source = flow_text_item['_source']
        weibo = {}
        weibo['uid'] = source['uid']
        weibo['uname'] = uid2uname[weibo['uid']]
        weibo['ip'] = source['ip']
        try:
            weibo['geo'] = '\t'.join(source['geo'].split('&'))
        except:
            weibo['geo'] = ''
        weibo['text'] = source['text']
        weibo['timestamp'] = source['timestamp']
        weibo['sentiment'] = source['sentiment']
        weibo_list.append(weibo)

    return weibo_list
コード例 #20
0
    try:
        seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
                            body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits']
    except Exception, e:
        raise e
    try:
        seed_user_source = seed_user_result[0]['_source']
    except:
        return 'seed user invalid'

    #step2: identify the detect task name is valid----is not in group es
    task_information = input_dict['task_information']
    task_name = task_information['task_name']
    task_id = task_information['task_id']
    try:
        task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)
    except:
        task_exist_result = {}
    if task_exist_result != {}:
        return 'task name invalid'

    #step3: save to es
    es_status = save_detect2es(input_dict)
    #step4: save to redis queue
    redis_status = save_detect2redis(input_dict)
    #identify the operation status
    if es_status==True and redis_status==True:
        status = True
    else:
        status = False