def get_vec_list_norm_ndarray_redis(word_list):
    # 向量转化为ndarray且归一化,返回list,跟get_vec_list_norm_ndarray相比,这个顺序可能会打乱
    # step 1 尝试从redis获取vec_list,不在Redis的NOT_IN_REDIS_SIGN表示
    word_list = list(word_list)
    redis_res = Word2VecCache.get_vec_list(word_list)

    # 更换数据期间,所有词假设都不在redis中,而从api中重新获取,故redis_res = [NOT_IN_REDIS_SIGN]*len(word_list)
    # redis_res = [NOT_IN_REDIS_SIGN]*len(word_list)

    not_in_redis_indices = [
        i for i in range(len(redis_res)) if redis_res[i] == NOT_IN_REDIS_SIGN
    ]
    vec_list = [vec for vec in redis_res if vec not in BAD_VEC_SIGN]
    # step 2 redis里没有的,从接口里取,并把它们放进redis
    not_in_redis_word_list = [word_list[i] for i in not_in_redis_indices]
    vec_dict_from_api = get_vec_list(not_in_redis_word_list)

    for word in vec_dict_from_api:
        vec = vec_dict_from_api[word]
        vec = norm_list(vec)  # 归一化
        # 若是非向量词,vec=[]
        Word2VecCache.set_vec(word, vec)  # 存入redis
        if vec != NOT_IN_WORD2VEC_SIMILAR_SIGN_API:
            vec_list.append(vec)

    return [np.array(vec) for vec in vec_list]  # 转换为ndarray
def test():
    uid = sys.argv[1]
    t1 = time.time()
    solr_ids = Word2VecCache.get_user_all_news(uid)
    before_redis_ids = Word2VecCache.get_user_showed_news(uid)
    push_ids = get_random_topn_news_from_redis(uid, 2)
    after_redis_ids = Word2VecCache.get_user_showed_news(uid)
    print 'push_ids', push_ids
    print 'solr_ids', solr_ids
    print 'before_redis_ids', before_redis_ids
    print 'after_redis_ids', after_redis_ids

    t2 = time.time()
    print 't', t2 - t1
def get_random_topn_news_from_redis(uid, num):
    # 给那些没有被离线计算过的用户提供短期的存储
    redis_ids_all = Word2VecCache.get_user_all_news(uid)
    redis_ids_showed = Word2VecCache.get_user_showed_news(uid)
    if not redis_ids_all:
        return []
    all_id_set = set()
    for id_list in redis_ids_all:
        all_id_set.update(set(id_list))
    if len(all_id_set - set(redis_ids_showed)) == 0:
        push_ids = select_newsid(redis_ids_all, num)
        write_user_showed_news_to_redis(uid, push_ids)
        return push_ids
    push_ids = select_newsid(redis_ids_all, num, redis_ids_showed)
    redis_ids_showed.extend(push_ids)
    write_user_showed_news_to_redis(uid, redis_ids_showed)
    return push_ids
def get_vec_list_redis(word_list):
    # get_vec_list的redis版本,但向量归一化了,是list,不是ndarray
    # step 1 尝试从redis获取vec_list,不在Redis的NOT_IN_REDIS_SIGN表示
    word_list = [ensure_unicode(x) for x in word_list]
    vec_dict = {}
    not_in_redis_indices = []
    redis_res = Word2VecCache.get_vec_list(word_list)  #

    # 更换数据期间,所有词假设都不在redis中,而从api中重新获取,故redis_res = [NOT_IN_REDIS_SIGN]*len(word_list)
    # redis_res = [NOT_IN_REDIS_SIGN]*len(word_list)
    for i, vec in enumerate(redis_res):
        # vec 是归一化了的
        if vec == NOT_IN_REDIS_SIGN:
            not_in_redis_indices.append(i)
        else:
            if vec not in BAD_VEC_SIGN:  # 只加入有向量的词
                vec_dict[word_list[i]] = vec
    # step 2 redis里没有的,从接口里取,并把它们放进redis
    not_in_redis_word_list = [word_list[i] for i in not_in_redis_indices]

    # print 'not_in_redis_word_list', '|||'.join(not_in_redis_word_list)
    # for x in vec_dict:
    #     print 'already in redis good word', x, len(vec_dict[x])

    vec_dict_from_api = get_vec_list(not_in_redis_word_list)

    # for x in vec_dict_from_api:
    #     print 'vec_dict_from_api', x, len(vec_dict_from_api[x])

    for word in vec_dict_from_api:
        vec = vec_dict_from_api[word]
        vec = norm_list(vec)  # 归一化

        if vec != NOT_IN_WORD2VEC_SIGN_API:  # 只加入有向量的词,word2vec接口返回的
            vec_dict[word] = vec
        Word2VecCache.set_vec(word, vec)  # 存入redis

    # vec_dict.update(vec_dict_from_api)

    # for x in vec_dict:
    #     vec = vec_dict[x]
    #     print 'final word', x, type(x),len(vec), type(vec), math.sqrt(sum([y * y for y in vec]))

    return vec_dict
def get_similar_redis(word, num=30):
    # get_similar 的 redis 版
    # step 1 尝试从redis里边取
    print 'get_similar_redis', word
    similar_word_list = Word2VecCache.get_similar(word, num)
    # if isinstance(similar_word_list,list):
    #     for w,s in similar_word_list:
    #         print '-'*15
    #         print 'similar word from redis',w,s
    # else:
    #     print 'similar word from redis',similar_word_list

    if similar_word_list == NOT_IN_REDIS_SIGN:
        # 从api取
        similar_word_list = get_similar(word, num)
        # for w,s in similar_word_list:
        #     print '+'*15
        #     print 'similar word from api',w,s
        # 存入redis
        Word2VecCache.set_similar(word, similar_word_list)

    return similar_word_list
def get_ramdom_topn_news_from_solr(uid, num):
    # solr 存的推荐id
    solr_caled_ids = get_caled_user_topn_news_from_solr(uid)
    # redis 存的已推荐id
    redis_ids = Word2VecCache.get_user_showed_news(uid)
    # 若solr_caled_ids 都在 redis_ids中,则清空redis中的id,重新开始

    cleaned_caled_ids, is_new_format = clean_caled_ids(solr_caled_ids)
    if is_new_format is False:
        clean_id_set = set(cleaned_caled_ids)
    else:
        clean_id_set = set()
        for id_list in cleaned_caled_ids:
            clean_id_set.update(set(id_list))
    if len(clean_id_set - set(redis_ids)) == 0:
        push_ids = select_newsid(cleaned_caled_ids, num)
        write_user_showed_news_to_redis(uid, push_ids)
        return push_ids
    push_ids = select_newsid(cleaned_caled_ids, num, redis_ids)
    redis_ids.extend(push_ids)
    write_user_showed_news_to_redis(uid, redis_ids)
    return push_ids
def write_user_showed_news_to_redis(uid, ids):
    return Word2VecCache.set_user_showed_news(uid, ids)
def write_user_all_news_to_redis(uid, id_list):
    return Word2VecCache.set_user_all_news(uid, id_list)