Exemple #1
0
def split_bulk_action(bulk_action, index_name):
    new_bulk_action = []
    for i in range(0, len(bulk_action)):
        if i % 2 == 0:
            new_bulk_action = [bulk_action[i], bulk_action[i + 1]]
            try:
                es.bulk(new_bulk_action, index=index_name, doc_type='user')
            except:
                print 'cron/flow3/scan_redis2es_comment.py&error-1&'
def split_bulk_action(bulk_action, index_name):
    new_bulk_action = []
    for i in range(0, len(bulk_action)):
        if i % 2 == 0:
            new_bulk_action = [bulk_action[i], bulk_action[i+1]]
            try:
                es.bulk(new_bulk_action, index=index_name, doc_type='user')
            except:
                print 'cron/flow3/scan_redis2es_comment.py&error-1&'
Exemple #3
0
def scan_comment():
    count = 0
    scan_cursor = 0
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    #get redis db number
    db_number = get_db_num(now_date_ts)
    #comment/be_comment es mappings

    #get redis db
    comment_redis = comment_redis_dict[str(db_number)]

    # 1. 判断即将切换的db中是否有数据
    if RUN_TYPE:
        redis_host_list.remove(str(db_number))
        while 1:
            other_db_number = comment_redis_dict[redis_host_list[0]]
            current_dbsize = other_db_number.dbsize()
            if current_dbsize:
                break  # 已经开始写入新的db,说明前一天的数据已经写完
            else:
                time.sleep(60)

    # 2. 删除之前的es
    comment_es_mappings(str(db_number))
    be_comment_es_mappings(str(db_number))

    # 3. scan
    comment_bulk_action = []
    be_comment_bulk_action = []
    start_ts = time.time()
    #comment count/be_comment count
    comment_count = 0
    be_comment_count = 0
    while True:
        re_scan = comment_redis.scan(scan_cursor, count=100)
        re_scan_cursor = re_scan[0]
        for item in re_scan[1]:
            count += 1
            item_list = item.split('_')
            save_dict = {}
            if len(item_list) == 2:
                comment_count += 1
                uid = item_list[1]
                item_result = comment_redis.hgetall(item)
                save_dict['uid'] = uid
                save_dict['uid_comment'] = json.dumps(item_result)
                comment_bulk_action.extend([{
                    'index': {
                        '_id': uid
                    }
                }, save_dict])
            elif len(item_list) == 3:
                be_comment_count += 1
                uid = item_list[2]
                item_result = comment_redis.hgetall(item)
                save_dict['uid'] = uid
                save_dict['uid_be_comment'] = json.dumps(item_result)
                be_comment_bulk_action.extend([{
                    'index': {
                        '_id': uid
                    }
                }, save_dict])

        #try:
        if comment_bulk_action:
            es.bulk(comment_bulk_action,
                    index=comment_index_name_pre + str(db_number),
                    doc_type='user')
        #except:
        #    index_name = '1225_comment_'+str(db_number)
        #    split_bulk_action(comment_bulk_action, index_name)

        #try:
        if be_comment_bulk_action:
            es.bulk(be_comment_bulk_action,
                    index=be_comment_index_name_pre + str(db_number),
                    doc_type='user')
        #except:
        #    index_name = '1225_be_comment_'+str(db_number)
        #    split_bulk_action(be_comment_bulk_action, index_name)

        comment_bulk_action = []
        be_comment_bulk_action = []
        end_ts = time.time()
        #run_type
        #if RUN_TYPE == 1:
        print '%s sec scan %s count user' % (end_ts - start_ts, count)

        start_ts = end_ts
        scan_cursor = re_scan[0]
        if scan_cursor == 0:
            break

    # 4. flush redis
    if RUN_TYPE:
        comment_redis.flushdb()
def scan_comment():
    count = 0
    scan_cursor = 0
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    #get redis db number
    db_number = get_db_num(now_date_ts)
    #comment/be_comment es mappings
    
    #get redis db
    comment_redis = comment_redis_dict[str(db_number)]
   
    # 1. 判断即将切换的db中是否有数据
    if RUN_TYPE:
        redis_host_list.remove(str(db_number))
        while 1:
            other_db_number = comment_redis_dict[redis_host_list[0]]
            current_dbsize = other_db_number.dbsize()
            if current_dbsize:
                break # 已经开始写入新的db,说明前一天的数据已经写完
            else:
                time.sleep(60)

    # 2. 删除之前的es
    comment_es_mappings(str(db_number))
    be_comment_es_mappings(str(db_number))

    # 3. scan
    comment_bulk_action = []
    be_comment_bulk_action = []
    start_ts = time.time()
    #comment count/be_comment count
    comment_count = 0
    be_comment_count = 0
    while True:
        re_scan = comment_redis.scan(scan_cursor, count=100)
        re_scan_cursor = re_scan[0]
        for item in re_scan[1]:
            count += 1
            item_list = item.split('_')
            save_dict = {}
            if len(item_list)==2:
                comment_count += 1
                uid = item_list[1]
                item_result = comment_redis.hgetall(item)
                save_dict['uid'] = uid
                save_dict['uid_comment'] = json.dumps(item_result)
                comment_bulk_action.extend([{'index':{'_id':uid}}, save_dict])
            elif len(item_list)==3:
                be_comment_count += 1
                uid = item_list[2]
                item_result = comment_redis.hgetall(item)
                save_dict['uid'] = uid
                save_dict['uid_be_comment'] = json.dumps(item_result)
                be_comment_bulk_action.extend([{'index':{'_id': uid}}, save_dict])
            
        #try:
        if comment_bulk_action:
            es.bulk(comment_bulk_action, index=comment_index_name_pre+str(db_number), doc_type='user')
        #except:
        #    index_name = '1225_comment_'+str(db_number)
        #    split_bulk_action(comment_bulk_action, index_name)
        
        #try:
        if be_comment_bulk_action:
            es.bulk(be_comment_bulk_action, index=be_comment_index_name_pre+str(db_number), doc_type='user')
        #except:
        #    index_name = '1225_be_comment_'+str(db_number)
        #    split_bulk_action(be_comment_bulk_action, index_name)
        
        comment_bulk_action = []
        be_comment_bulk_action = []
        end_ts = time.time()
        #run_type
        #if RUN_TYPE == 1:
        print '%s sec scan %s count user' % (end_ts - start_ts, count)

        start_ts = end_ts
        scan_cursor = re_scan[0]
        if scan_cursor==0:
            break

    # 4. flush redis
    if RUN_TYPE:
        comment_redis.flushdb()