Example #1
0
def compute_cfss():
    '''
    计算shop-shop相似关系矩阵。
    Input:
        shop_actu:用户对店铺做的动作
    Process: 
        取用户动作表示的shop向量,计算向量点积。
    Output:
        shop-shop 相似关系,cfss.kv
    '''
    # shop_actu -> shop-shop关系矩阵,并保存cfss.kv,shop\tshop:weight;
    kvg = KVEngine()
    kvg.load([full_path('shop_actu.kv')])

    # get normialized vectors
    shop_users = {}
    skeys = kvg.keymatch('S\d+_ACTU')
    for skey in skeys:
        sid = key_id(skey)
        vector = dict([(int(key), float(value))
                       for (key, value) in kvg.getd(skey).items()
                       if key and value])
        # tailor to top 20
        items = vector.items()
        items.sort(key=lambda x: x[1], reverse=True)
        items = items[:20]
        vector = dict(items)
        normalize(vector)
        shop_users[sid] = vector

    # similarity calculation
    shop_similarity = {}
    sids = shop_users.keys()
    sids.sort()
    l = len(sids)
    print "Calculating shop-shop similarity matrix, total %d..." % l
    for i in range(l):
        if i % 1000 == 0:
            print "%d" % i
            sys.stdout.flush()
        for j in range(i + 1, l):
            sim = norm_dot_product(shop_users[sids[i]], shop_users[sids[j]])
            if abs(sim) < 1e-5:
                continue
            shop_similarity.setdefault(sids[i], {})[sids[j]] = sim
            shop_similarity.setdefault(sids[j], {})[sids[i]] = sim

    # save as kvfile
    write_kv_dict(shop_similarity, 'S%s_CFSIMS', 'cfss.kv')
Example #2
0
def compute_cfgg():
    '''
    计算goods-goods相似关系矩阵。
    Input:
        user_actg.kv -> goods_actu.kv:用户对店铺做的动作
    Process: 
        取用户动作表示的goods向量,计算向量点积。
    Output:
        goods-goods 相似关系,cfss.kv
    '''
    kvg = KVEngine()
    kvg.load([full_path('goods_actu.kv')])

    # get normialized vectors
    goods_users = {}
    gkeys = kvg.keymatch('G\d+_ACTU')
    for gkey in gkeys:
        gid = key_id(gkey)
        vector = dict([(int(key), float(value))
                       for (key, value) in kvg.getd(gkey).items()
                       if key and value])
        # tailor to top 20
        items = vector.items()
        items.sort(key=lambda x: x[1], reverse=True)
        items = items[:20]
        vector = dict(items)
        normalize(vector)
        goods_users[gid] = vector

    # similarity calculation
    goods_similarity = {}
    gids = goods_users.keys()
    gids.sort()
    l = len(gids)
    print "Calculating goods-goods similarity matrix, total %d..." % l
    for i in range(l):
        if i % 100 == 0:
            print "%d" % i
            sys.stdout.flush()
        for j in range(i + 1, l):
            sim = norm_dot_product(goods_users[gids[i]], goods_users[gids[j]])
            if abs(sim) < 1e-5:
                continue
            goods_similarity.setdefault(gids[i], {})[gids[j]] = sim
            goods_similarity.setdefault(gids[j], {})[gids[i]] = sim

    # save as kvfile
    write_kv_dict(goods_similarity, 'G%s_CFSIMG', 'cfgg.kv')
Example #3
0
def compute_cfss():
    '''
    计算shop-shop相似关系矩阵。
    Input:
        shop_actu:用户对店铺做的动作
    Process: 
        取用户动作表示的shop向量,计算向量点积。
    Output:
        shop-shop 相似关系,cfss.kv
    '''
    # shop_actu -> shop-shop关系矩阵,并保存cfss.kv,shop\tshop:weight;
    kvg = KVEngine()
    kvg.load([full_path('shop_actu.kv')])

    # get normialized vectors
    shop_users = {}
    skeys = kvg.keymatch('S\d+_ACTU')
    for skey in skeys:
        sid = key_id(skey)
        vector = dict([(int(key), float(value)) for (key, value) in kvg.getd(skey).items() if key and value])
        # tailor to top 20
        items = vector.items()
        items.sort(key=lambda x:x[1], reverse=True)
        items = items[:20]
        vector = dict(items)
        normalize(vector)
        shop_users[sid] = vector

    # similarity calculation
    shop_similarity = {}
    sids = shop_users.keys()
    sids.sort()
    l = len(sids)
    print "Calculating shop-shop similarity matrix, total %d..." % l
    for i in range(l):
        if i % 1000 == 0:
            print "%d" % i
            sys.stdout.flush()
        for j in range(i+1, l):
            sim = norm_dot_product(shop_users[sids[i]], shop_users[sids[j]])
            if abs(sim) < 1e-5:
                continue
            shop_similarity.setdefault(sids[i], {})[sids[j]] = sim
            shop_similarity.setdefault(sids[j], {})[sids[i]] = sim

    # save as kvfile
    write_kv_dict(shop_similarity, 'S%s_CFSIMS', 'cfss.kv')
Example #4
0
def compute_cfgg():
    '''
    计算goods-goods相似关系矩阵。
    Input:
        user_actg.kv -> goods_actu.kv:用户对店铺做的动作
    Process: 
        取用户动作表示的goods向量,计算向量点积。
    Output:
        goods-goods 相似关系,cfss.kv
    '''
    kvg = KVEngine()
    kvg.load([full_path('goods_actu.kv')])

    # get normialized vectors
    goods_users = {}
    gkeys = kvg.keymatch('G\d+_ACTU')
    for gkey in gkeys:
        gid = key_id(gkey)
        vector = dict([(int(key), float(value)) for (key, value) in kvg.getd(gkey).items() if key and value])
        # tailor to top 20
        items = vector.items()
        items.sort(key=lambda x:x[1], reverse=True)
        items = items[:20]
        vector = dict(items)
        normalize(vector)
        goods_users[gid] = vector

    # similarity calculation
    goods_similarity = {}
    gids = goods_users.keys()
    gids.sort()
    l = len(gids)
    print "Calculating goods-goods similarity matrix, total %d..." % l
    for i in range(l):
        if i % 100 == 0:
            print "%d" % i
            sys.stdout.flush()
        for j in range(i+1, l):
            sim = norm_dot_product(goods_users[gids[i]], goods_users[gids[j]])
            if abs(sim) < 1e-5:
                continue
            goods_similarity.setdefault(gids[i], {})[gids[j]] = sim
            goods_similarity.setdefault(gids[j], {})[gids[i]] = sim

    # save as kvfile
    write_kv_dict(goods_similarity, 'G%s_CFSIMG', 'cfgg.kv')