コード例 #1
0
def construct(path):
    # 获取文件列表
    files = get_filelist(path)
    files = [os.path.join(home, file) for home, file in files]
    # cfg.segment:场比赛
    train_num = int(cfg.data_num * cfg.train)
    val_num = int(cfg.data_num * cfg.val)
    test_num = int(cfg.data_num * cfg.test)
    sizes = [size // cfg.segment for size in [0, train_num, val_num, test_num]]
    # size:每场比赛选取的样本数
    size = sum(sizes)
    # 训练集,验证集,测试集的分割点,如50000,60000,70000
    sizes = np.cumsum(sizes)
    # 随机选取cfg.segment场比赛
    files = random.sample(files, cfg.segment)
    data = [[] for _ in range(3)]
    for file in files:
        state, cat = read_pickle(file)
        # 从一场比赛中随机选取size个样本
        idx = np.random.choice(range(len(state)), size, replace=False)
        state, cat = state[idx], cat[idx]
        # 将所选样本按比例分配到训练集,验证集,测试集
        for i in range(3):
            data[i].append(
                (state[sizes[i]:sizes[i + 1]], cat[sizes[i]:sizes[i + 1]]))
    data = [[np.concatenate(j) for j in zip(*i)] for i in data]
    return data
コード例 #2
0
ファイル: IF.py プロジェクト: jiadede/INFUNE
def match(train_test: list, k: int, option: str = '') -> None:
    """
    Main function for training.

    Args:
        train_test: matched pairs for training and testing, [[(i, j), ...], [(i, j), ...]]
        k: number of candidates, default: 30
        option: a string indicating what kinds of user information to use, typically structure, profile and content
    """

    # assert valid types of information
    options = set(option.split(' '))
    all_options = {'structure', 'profile', 'content'}
    if len(options - all_options) > 0 or len(options) == 0:
        raise ValueError('options: structure, profile, content')

    # create path for data if not existed.
    dir = 'data'
    if not os.path.exists(dir):
        os.makedirs(dir)

    # place holders for pre-computed intra-network and inter-network similarity matrices
    sims = {'intra': [], 'inter': []}

    if 'structure' in options:
        # adjacency matrix of the source network
        adj_s = read_pickle('{}/adj_s.pkl'.format(dir))
        # adjacency matrix of the target network
        adj_t = read_pickle('{}/adj_t.pkl'.format(dir))
        # intra-network similarity matrices are assumed in pairs
        sims['intra'].append([adj_s, adj_t])
    if 'profile' in options:
        # inter-network profile similarity matrix
        sim = read_pickle('{}/sims_p.pkl'.format(dir))
        sims['inter'].append(sim)
    if 'content' in options:
        # inter-network content similarity matrix
        sim = read_pickle('{}/sims_c.pkl'.format(dir))
        sims['inter'].append(sim)

    # set cuda device, multiple gpus are not supported yet.
    torch.cuda.set_device(cfg.cuda)
    # initialize an instructor that makes necessary reports.
    ins = IFIns(sims, train_test, k)
    # train the model
    ins.train()
コード例 #3
0
ファイル: NE.py プロジェクト: jiadede/INFUNE
def test_row_nei_index_mul():
    """
    Find potentially matched user pairs and split their neighborhood into two disjoint subsets, containing similar neighbors and dissimilar neighbors.
    """
    global sims, gs
    path = 'data/sims_{:.1f}.pkl'.format(cfg.ratio)
    # compute the user similarity matrix if not existed.
    if not os.path.exists(path):
        # load the pre-trained user embeddings of users in the source/target network.
        left, right = read_pickle('data/emb_{:.1f}.pkl'.format(cfg.ratio))
        # compute the pairwise cosine similarities among users from the source and target networks.
        sims = cosine(left, right)
        # save the similarity matrix as a dense numpy array.
        sims = sims.detach().numpy()
        write_pickle(sims, path)
    else:
        sims = read_pickle(path)
    train, test = read_pickle('data/train_test_{:.1f}.pkl'.format(cfg.ratio))
    row_train, col_train = list(zip(*train))
    # elements of the similarity matrix are in [-1, 1]
    # assign the largest value 1 to matched user pairs in the training set.
    sims[row_train, col_train] = 1.
    # candidate users for neighborhood enhancement.
    # candidate[:len(train), 0] are ground truth on training set.
    k = 250
    sims, candidate = row_k_max(sims, k)

    # potentially matched user pairs
    left, right = [i.tolist() for i in sims.nonzero()]
    # adjacency list
    gs = read_pickle('results/adj_list.pkl')
    # use multiple processes to find similar and dissimilar neighbors
    pool = mp.Pool(8)
    t = time.time()
    pairs = list(zip(left, right))
    idx = list(pool.map(sim_nei_index, pairs))
    pool.close()
    pool.join()
    print('time: {:.2f}'.format(time.time() - t))
    # idx: indices of matched and unmatched neighbors
    # pairs: potentially matched user pairs, [(i, j), ...]
    # candidate: candidate users, same thing as pairs, [[i, ...], ...]
    write_pickle(idx, 'results/col_nei_idx_{:.1f}.pkl'.format(cfg.ratio))
    write_pickle(pairs, 'results/pairs_{:.1f}.pkl'.format(cfg.ratio))
    write_pickle(candidate, 'results/candidate_{:.1f}.pkl'.format(cfg.ratio))
コード例 #4
0
ファイル: NE.py プロジェクト: jiadede/INFUNE
def load_data():
    """
    Load data for neighborhood enhancement.

    Return:
        emb: user embeddings
        nei: potentially matched user pairs and their splitted neighborhoods
        candidate: candidate users
        train_test: ground truth user pairs for training and testing
    """
    path = 'results/nei_{:.1f}.pkl'.format(cfg.ratio)
    if not os.path.exists(path):
        pairs = read_pickle('results/pairs_{:.1f}.pkl'.format(cfg.ratio))
        nei_idx = read_pickle('results/col_nei_idx_{:.1f}.pkl'.format(
            cfg.ratio))
        # key: pairs, value: idx
        # building the dict is time-consuming
        nei = dict(zip(pairs, nei_idx))
        write_pickle(nei, path)
    else:
        nei = read_pickle(path)
    return \
        read_pickle('results/emb_{:.1f}.pkl'.format(cfg.ratio)), \
        nei, \
        read_pickle('results/candidate_{:.1f}.pkl'.format(cfg.ratio)), \
        read_pickle('data/train_test_{:.1f}.pkl'.format(cfg.ratio))
コード例 #5
0
    file = path.replace('raw', 'data_100seg.pkl')
    write_pickle(data, file)


def construct_H3D():
    pass


def construct_Stanford():
    pass


def normalize(x, a=100, b=50):
    x = torch.cat([x[..., [0]] / a, x[..., [1]] / b], dim=-1)
    x = x * 2 - 1
    return x


def denormalize(x, a=100, b=50):
    x = (x + 1) / 2
    x = torch.cat([x[..., [0]] * a, x[..., [1]] * b], dim=-1)
    return x


if __name__ == "__main__":
    path = 'data/NBA/raw'
    construct_NBA(path)
    data = read_pickle('data/NBA/data.pkl')
    # x = torch.rand(2, 4, 3, 2)
    pass
コード例 #6
0
ファイル: IF.py プロジェクト: jiadede/INFUNE
def load_data(ratio: float) -> list:
    """
    Given a ratio of training data, get the corr
    """
    return read_pickle('data/train_test_{:.1f}.pkl'.format(ratio))
コード例 #7
0
ファイル: run.py プロジェクト: hilbert9221/NRI-MPM
def load_data(args):
    path = 'data/{}/{}.pkl'.format(args.dyn, args.size)
    train, val, test = read_pickle(path)
    data = {'train': train, 'val': val, 'test': test}
    return data