Exemple #1
0
def compute_group_inner(task_name, task_user, start_ts):
    #step1: get task_user in-monitor task user retweet relation from monitor_inner_r
    #step2: get task_user in-task user retweet relation
    #step3: compute every inner user be-retweet ratio in task
    #step4: save top5 to es--monitor_result, doc_type=task_name, _id='inner_'+date  e:'inner_2013-09-01'
    group_status = 0
    time_segment = 3600 * 24
    iter_time_segment = 900
    iter_ts = start_ts - time_segment
    inner_group_dict = {}
    user_count_dict = {}
    print 'group inner ask_user:'******'''
            if iter_ts >= start_ts:
                break
            '''
            key = 'inner_' + str(iter_ts)
            print 'iter_ts:', ts2date(iter_ts)
            inner_retweet_string = monitor_inner_r.hget(root_uid, key)
            print 'root_uid, key, inner_retweet_string:', root_uid, key, inner_retweet_string
            if inner_retweet_string:
                print 'yes'
                inner_retweet_dict = json.loads(inner_retweet_string)
            else:
                inner_retweet_dict = None
            if inner_retweet_dict:
                inner_group_dict[root_uid] = merge_dict(
                    inner_group_dict[root_uid], inner_retweet_dict)
            iter_ts += iter_time_segment
        user_inner_retweet_count = sum(inner_group_dict[root_uid].values())
        user_count_dict[root_uid] = user_inner_retweet_count
    all_be_retweet_count = sum(user_count_dict.values())
    if all_be_retweet_count == 0:
        group_status = 1
        return group_status
    sort_user_inner_retweet_count = sorted(user_count_dict.items(),
                                           key=lambda x: x[1],
                                           reverse=True)
    top5_user = sort_user_inner_retweet_count[:5]

    # timestamp: '2013-09-01'
    date = ts2datetime(start_ts - 24 * 3600)
    index_body = {'date': date}
    for rank in range(1, 6):
        key = 'top' + str(rank)
        index_body[key] = json.dumps(top5_user[rank - 1])
    key = 'inner_' + date
    # save inner-retweet graph by dict {root_uid1:{uid1:count1, uid2:count2}, ...}
    index_body['inner_graph'] = json.dumps(inner_group_dict)

    es.index(index=monitor_index_name,
             doc_type=task_name,
             id=key,
             body=index_body)
    group_status = 1
    return group_status
def inner_group_retweet(item):
    root_uid = str(item['root_uid'])
    uid = str(item['uid'])
    timestamp = item['timestamp']
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    time_segment = int((timestamp - date_ts) / 900)
    start_ts = date_ts + time_segment * 900
    key = 'inner_' + str(start_ts)
    inner_retweet_exist = monitor_inner_r.hget(root_uid, key)
    if not inner_retweet_exist:
        monitor_inner_r.hset(root_uid, key, json.dumps({uid: 1}))
    else:
        inner_retweet_dict = json.loads(inner_retweet_exist)
        if uid in inner_retweet_dict:
            inner_retweet_dict[uid] += 1
        else:
            inner_retweet_dict[uid] = 1
        monitor_inner_r.hset(root_uid, key, json.dumps(inner_retweet_dict))
def inner_group_retweet(item):
    root_uid = str(item['root_uid'])
    uid = str(item['uid'])
    timestamp = item['timestamp']
    date = ts2datetime(timestamp)
    date_ts = datetime2ts(date)
    time_segment = int((timestamp - date_ts) / 900)
    start_ts = date_ts + time_segment * 900
    key = 'inner_' + str(start_ts)
    inner_retweet_exist = monitor_inner_r.hget(root_uid, key)
    if not inner_retweet_exist:
        monitor_inner_r.hset(root_uid, key, json.dumps({uid: 1}))
    else:
        inner_retweet_dict = json.loads(inner_retweet_exist)
        if uid in inner_retweet_dict:
            inner_retweet_dict[uid] += 1
        else:
            inner_retweet_dict[uid] = 1
        monitor_inner_r.hset(root_uid, key, json.dumps(inner_retweet_dict))
def compute_group_inner(task_name, task_user, start_ts):
    #step1: get task_user in-monitor task user retweet relation from monitor_inner_r
    #step2: get task_user in-task user retweet relation
    #step3: compute every inner user be-retweet ratio in task
    #step4: save top5 to es--monitor_result, doc_type=task_name, _id='inner_'+date  e:'inner_2013-09-01'
    group_status = 0
    time_segment = 3600*24
    iter_time_segment = 900
    iter_ts = start_ts - time_segment
    inner_group_dict = {}
    user_count_dict = {}
    print 'group inner ask_user:'******'''
            if iter_ts >= start_ts:
                break
            '''
            key = 'inner_' + str(iter_ts)
            print 'iter_ts:', ts2date(iter_ts)
            inner_retweet_string = monitor_inner_r.hget(root_uid, key)
            print 'root_uid, key, inner_retweet_string:', root_uid, key, inner_retweet_string
            if inner_retweet_string:
                print 'yes'
                inner_retweet_dict = json.loads(inner_retweet_string)
            else:
                inner_retweet_dict = None
            if inner_retweet_dict:
                inner_group_dict[root_uid] = merge_dict(inner_group_dict[root_uid], inner_retweet_dict)
            iter_ts += iter_time_segment
        user_inner_retweet_count = sum(inner_group_dict[root_uid].values())
        user_count_dict[root_uid] = user_inner_retweet_count
    all_be_retweet_count = sum(user_count_dict.values())
    if all_be_retweet_count==0:
        group_status = 1
        return group_status
    sort_user_inner_retweet_count = sorted(user_count_dict.items(), key=lambda x:x[1], reverse=True)
    top5_user = sort_user_inner_retweet_count[:5]

    # timestamp: '2013-09-01'
    date = ts2datetime(start_ts - 24*3600)
    index_body = {'date': date}
    for rank in range(1,6):
        key = 'top' + str(rank)
        index_body[key] = json.dumps(top5_user[rank-1])
    key = 'inner_' + date
    # save inner-retweet graph by dict {root_uid1:{uid1:count1, uid2:count2}, ...}
    index_body['inner_graph'] = json.dumps(inner_group_dict)
    
    es.index(index=monitor_index_name, doc_type=task_name, id=key, body=index_body)
    group_status = 1
    return group_status