コード例 #1
0
 def __init__(self,
              dataset: BasicDataset,
              student: PairWiseModel,
              teacher: PairWiseModel,
              dns_k: int,
              method: int = 3,
              beta=world.beta):
     """
         method 1 for convex combination
         method 2 for random indicator
         method 3 for simplified method 2
     """
     self.beta = beta
     self.W = torch.Tensor([world.p0])
     self.dataset = dataset
     self.student = student
     self.teacher = teacher
     # self.methods = {
     #     'combine' : self.convex_combine, # not yet
     #     'indicator' : self.random_indicator,
     #     'simple' : self.max_min,
     #     'weight' : self.weight_pair,
     # }
     self.method = 'combine'
     self.Sample = self.convex_combine
     cprint(f"Using {self.method}")
     self.dns_k = dns_k
     self.soft = Softmax(dim=1)
コード例 #2
0
ファイル: model.py プロジェクト: dongwu92/lightgcn-torch
 def __init_weight(self):
     self.num_users = self.dataset.n_users
     self.num_items = self.dataset.m_items
     self.latent_dim = self.config['latent_dim_rec']
     self.n_layers = self.config['lightGCN_n_layers']
     self.keep_prob = self.config['keep_prob']
     self.A_split = self.config['A_split']
     self.embedding_user = torch.nn.Embedding(num_embeddings=self.num_users,
                                              embedding_dim=self.latent_dim)
     self.embedding_item = torch.nn.Embedding(num_embeddings=self.num_items,
                                              embedding_dim=self.latent_dim)
     if self.config['pretrain'] == 0:
         #             nn.init.xavier_uniform_(self.embedding_user.weight, gain=1)
         #             nn.init.xavier_uniform_(self.embedding_item.weight, gain=1)
         #             print('use xavier initilizer')
         # random normal init seems to be a better choice when lightGCN actually don't use any non-linear activation function
         nn.init.normal_(self.embedding_user.weight, std=0.1)
         nn.init.normal_(self.embedding_item.weight, std=0.1)
         world.cprint('use NORMAL distribution initilizer')
     else:
         self.embedding_user.weight.data.copy_(
             torch.from_numpy(self.config['user_emb']))
         self.embedding_item.weight.data.copy_(
             torch.from_numpy(self.config['item_emb']))
         print('use pretarined data')
     self.f = nn.Sigmoid()
     self.Graph = self.dataset.getSparseGraph()
     print(f"lgn is already to go(dropout:{self.config['dropout']})")
コード例 #3
0
ファイル: model.py プロジェクト: huangniu1124/LGCACF
    def __init_weight(self):
        self.map_table = self.dataset.map_table

        self.num_users = self.dataset.n_users
        self.num_all_items = self.dataset.n_all_item
        self.latent_dim = self.config['latent_dim_rec']
        self.n_layers = self.config['GCN_n_layers']
        self.keep_prob = self.config['keep_prob']
        self.A_split = self.config['A_split']
        self.aspect = self.config['aspect']

        self.embedding_user = nn.ParameterList(
            nn.Parameter(torch.randn(self.num_users, self.latent_dim))
            for i in range(len(self.aspect)))
        self.embedding_item = nn.ParameterList()
        for i in range(len(self.aspect)):
            self.embedding_item.append(
                nn.Parameter(
                    torch.randn(self.num_all_items[i], self.latent_dim)))

        if self.config['pretrain'] == 0:
            # nn.init.xavier_uniform_(self.embedding_dict[''].weight, gain=0.1)
            # print('use xavier initializer')

            for i in range(len(self.aspect)):
                nn.init.normal_(self.embedding_user[i], std=0.1)
                nn.init.normal_(self.embedding_item[i], std=0.1)
            world.cprint('use NORMAL distribution initializer')
        else:
            # not implemented
            print('use pre-trained data')
        self.f = nn.Sigmoid()
        self.Graph = self.dataset.getSparseGraph()  # adjacency matrix
        print(f"multi-lgn is already to go(dropout:{self.config['dropout']}")
コード例 #4
0
    def __init__(self, config=world.config, path="../data/TaoBao"):
        super(Amazon, self).__init__()
        # train of test
        cprint(f"loading [{path}]")
        self.mode_dict = {'train': 0, 'test': 1}
        self.mode = self.mode_dict['train']
        self.aspect = config['aspect']
        self.split = config['A_split']
        self.folds = config['A_n_fold']
        self.path = path
        self.map_table = pd.read_csv(join(path + '/map-table.csv'),
                                     sep=',',
                                     header=0)
        self.n_user = 13201
        self.n_all_item = [14094, 2771, 15]
        train_data = pd.read_csv(join(path + '/train.txt'),
                                 sep=' ',
                                 header=None)
        test_data = pd.read_csv(join(path + '/test.txt'), sep=' ', header=None)
        self.trainData = train_data
        self.testData = test_data
        self.trainUser = np.array(train_data[:][0])  # train user list
        self.trainUniqueUsers = np.unique(self.trainUser)
        self.trainAllItem = []  # train item of multi aspect info
        for i in range(len(self.aspect)):
            self.trainAllItem.append(np.array(train_data[:][i + 1]))
        self.testUser = np.array(test_data[:][0])  # test user list
        self.testUniqueUsers = np.unique(self.testUser)
        self.testItem = np.array(test_data[:][1])  # test item list
        self.Graph = None

        print(f"{self.trainDataSize} interactions for training")
        print(f"{self.testDataSize} interactions for testing")
        print(
            f"{world.dataset} Sparsity: {(self.trainDataSize + self.testDataSize) / self.n_users / self.m_items}"
        )

        # bipartite graph
        self.InteractNet = []
        self.users_D = []
        self.all_items_D = []
        for i in range(len(self.aspect)):
            self.InteractNet.append(
                csr_matrix((np.ones(len(self.trainUser)),
                            (self.trainUser, self.trainAllItem[i]))))
            self.users_D.append(
                np.array(self.InteractNet[i].sum(axis=1)).squeeze())
            self.users_D[i][self.users_D[i] == 0.] = 1
            self.all_items_D.append(
                np.array(self.InteractNet[i].sum(axis=0)).squeeze())
            self.all_items_D[i][self.all_items_D[i] == 0.] = 1

        # pre-calculate
        self._allPos = self.getUserPosItems(list(range(self.n_users)))
        self.__testDict = self.__build_test()
コード例 #5
0
ファイル: dataloader.py プロジェクト: dongwu92/lightgcn-torch
    def __init__(self, path="../data/lastfm"):
        # train or test
        cprint("loading [last fm]")
        self.mode_dict = {'train': 0, "test": 1}
        self.mode = self.mode_dict['train']
        # self.n_users = 1892
        # self.m_items = 4489
        trainData = pd.read_table(join(path, 'data1.txt'), header=None)
        # print(trainData.head())
        testData = pd.read_table(join(path, 'test1.txt'), header=None)
        # print(testData.head())
        trustNet = pd.read_table(join(path, 'trustnetwork.txt'),
                                 header=None).to_numpy()
        # print(trustNet[:5])
        trustNet -= 1
        trainData -= 1
        testData -= 1
        self.trustNet = trustNet
        self.trainData = trainData
        self.testData = testData
        self.trainUser = np.array(trainData[:][0])
        self.trainUniqueUsers = np.unique(self.trainUser)
        self.trainItem = np.array(trainData[:][1])
        # self.trainDataSize = len(self.trainUser)
        self.testUser = np.array(testData[:][0])
        self.testUniqueUsers = np.unique(self.testUser)
        self.testItem = np.array(testData[:][1])
        self.Graph = None
        print(
            f"LastFm Sparsity : {(len(self.trainUser) + len(self.testUser))/self.n_users/self.m_items}"
        )

        # (users,users)
        self.socialNet = csr_matrix(
            (np.ones(len(trustNet)), (trustNet[:, 0], trustNet[:, 1])),
            shape=(self.n_users, self.n_users))
        # (users,items), bipartite graph
        self.UserItemNet = csr_matrix(
            (np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
            shape=(self.n_users, self.m_items))

        # pre-calculate
        self._allPos = self.getUserPosItems(list(range(self.n_users)))
        self.allNeg = []
        allItems = set(range(self.m_items))
        for i in range(self.n_users):
            pos = set(self._allPos[i])
            neg = allItems - pos
            self.allNeg.append(np.array(list(neg)))
        self.__testDict = self.__build_test()
コード例 #6
0
    def __init_weight(self):
        self.num_users  = self.dataset.n_users
        self.num_items  = self.dataset.m_items
        self.latent_dim = self.config['latent_dim_rec']
        self.mini_latent_dim = self.config['mini_latent_dim_rec']
        self.n_layers = self.config['lightGCN_n_layers']
        self.keep_prob = self.config['keep_prob']
        self.A_split = self.config['A_split']
        
        # Full model params
        self.embedding_user = torch.nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=self.latent_dim)
        self.embedding_item = torch.nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=self.latent_dim)
        self.bias_user = nn.Embedding(self.num_users, 1)
        self.bias_item = nn.Embedding(self.num_items, 1)
        self.bias_user.weight.data.fill_(0.)
        self.bias_item.weight.data.fill_(0.)

        # Mini model params
        self.mm_embedding_user = torch.nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=self.mini_latent_dim)
        self.mm_embedding_item = torch.nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=self.mini_latent_dim)

        # proj mini embeddings to the full embeddings space
        self.proj = nn.Upsample(size=self.latent_dim)

        nn.init.normal_(self.embedding_user.weight, std=0.1)
        nn.init.normal_(self.embedding_item.weight, std=0.1)
        nn.init.normal_(self.mm_embedding_user.weight, std=0.1)
        nn.init.normal_(self.mm_embedding_item.weight, std=0.1)
        world.cprint('use NORMAL distribution initilizer')

        # Load state_dict for just the full model:
        self.embedding_user.weight.data.copy_(self.checkpt['embedding_user.weight'])
        self.embedding_item.weight.data.copy_(self.checkpt['embedding_item.weight'])
        self.bias_user.weight.data.copy_(self.checkpt['bias_user.weight'])
        self.bias_item.weight.data.copy_(self.checkpt['bias_item.weight'])

        self.f = nn.Sigmoid()
        self.Graph = self.dataset.getSparseGraph()
        print(f"lgn is already to go(dropout:{self.config['dropout']})")
コード例 #7
0
    def __init__(self, path="../data/skills-predictor"):
        self.path = path
        # train or test
        print(self.m_items)
        cprint("loading [last fm]")
        self.mode_dict = {'train': 0, "test": 1}
        self.mode = self.mode_dict['train']
        trainData = pd.read_csv(join(path, 'all_data.csv'))
        # print(trainData.head())
        testData = pd.read_csv(join(path, 'test.csv'))
        users, skills, rates = trainData["user"], trainData[
            "skill"], trainData["rate"]
        self.UserItemNet = csr_matrix(
            (rates, (users, skills)),
            dtype="float32",
            shape=(self.n_users, self.m_items),
        )
        # self.trustNet  = trustNet
        self.trainData = trainData
        self.testData = testData
        self.trainUser = np.array(trainData['user'])
        self.trainUniqueUsers = np.unique(self.trainUser)
        self.trainItem = np.array(trainData['skill'])
        # self.trainDataSize = len(self.trainUser)
        self.testUser = np.array(testData['user'])
        self.testUniqueUsers = np.unique(self.testUser)
        self.testItem = np.array(testData['skill'])
        self.Graph = None

        print(
            f"LastFm Sparsity : {(len(self.trainUser) + len(self.testUser))/self.n_users/self.m_items}"
        )

        # pre-calculate
        self._allPos = self.getUserPosItems(list(range(self.n_users)))
        self.allNeg = []
        allItems = set(range(self.m_items))
        for i in range(self.n_users):
            pos = set(self._allPos[i])
            neg = allItems - pos
            self.allNeg.append(np.array(list(neg)))
        self.__testDict = self.__build_test()
コード例 #8
0
 def __init_weight(self):
     self.num_users = self.dataset.n_users
     self.num_items = self.dataset.m_items
     self.latent_dim = self.config['latent_dim_rec']
     self.n_layers = self.config['lightGCN_n_layers']
     self.keep_prob = self.config['keep_prob']
     self.A_split = self.config['A_split']
     self.embedding_user = torch.nn.Embedding(num_embeddings=self.num_users,
                                              embedding_dim=self.latent_dim)
     self.embedding_item = torch.nn.Embedding(num_embeddings=self.num_items,
                                              embedding_dim=self.latent_dim)
     if self.config['pretrain'] == 0:
         nn.init.normal_(self.embedding_user.weight, std=0.1)
         nn.init.normal_(self.embedding_item.weight, std=0.1)
         world.cprint('use NORMAL distribution initilizer')
     else:
         self.embedding_user.weight.data.copy_(
             torch.from_numpy(self.config['user_emb']))
         self.embedding_item.weight.data.copy_(
             torch.from_numpy(self.config['item_emb']))
         print('use pretarined data')
     self.f = nn.Sigmoid()
     self.Graph = self.dataset.getSparseGraph()
     print(f"lgn is already to go(dropout:{self.config['dropout']})")
コード例 #9
0
ファイル: main.py プロジェクト: gusye1234/KD_on_Ranking
# ----------------------------------------------------------------------------
# init model
world.DISTILL = False
if len(world.comment) == 0:
    comment = f"{world.method}"
    if world.EMBEDDING:
        comment = comment + "-embed"
    world.comment = comment
import register
from register import dataset

if world.EMBEDDING:
    # embedding distillation
    print("distill")
    tea_config = utils.getTeacherConfig(world.config)
    world.cprint('teacher')
    teacher_model = register.MODELS[world.model_name](tea_config,
                                                      dataset,
                                                      fix=True)
    teacher_model.eval()
    teacher_file = utils.getFileName(world.model_name,
                                     world.dataset,
                                     world.config['teacher_dim'],
                                     layers=world.config['teacher_layer'])
    teacher_weight_file = os.path.join(world.FILE_PATH, teacher_file)
    print('-------------------------')
    world.cprint("loaded teacher weights from")
    print(teacher_weight_file)
    print('-------------------------')
    utils.load(teacher_model, teacher_weight_file)
    teacher_model = teacher_model.to(world.DEVICE)
コード例 #10
0
ファイル: dataloader.py プロジェクト: dongwu92/lightgcn-torch
    def __init__(self, config=world.config, path="../data/gowalla"):
        # train or test
        cprint(f'loading [{path}]')
        self.split = config['A_split']
        self.folds = config['A_n_fold']
        self.mode_dict = {'train': 0, "test": 1}
        self.mode = self.mode_dict['train']
        self.n_user = 0
        self.m_item = 0
        train_file = path + '/train.txt'
        test_file = path + '/test.txt'
        self.path = path
        trainUniqueUsers, trainItem, trainUser = [], [], []
        testUniqueUsers, testItem, testUser = [], [], []
        self.traindataSize = 0
        self.testDataSize = 0

        with open(train_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    trainUniqueUsers.append(uid)
                    trainUser.extend([uid] * len(items))
                    trainItem.extend(items)
                    self.m_item = max(self.m_item, max(items))
                    self.n_user = max(self.n_user, uid)
                    self.traindataSize += len(items)
        self.trainUniqueUsers = np.array(trainUniqueUsers)
        self.trainUser = np.array(trainUser)
        self.trainItem = np.array(trainItem)

        with open(test_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    testUniqueUsers.append(uid)
                    testUser.extend([uid] * len(items))
                    testItem.extend(items)
                    self.m_item = max(self.m_item, max(items))
                    self.n_user = max(self.n_user, uid)
                    self.testDataSize += len(items)
        self.m_item += 1
        self.n_user += 1
        self.testUniqueUsers = np.array(testUniqueUsers)
        self.testUser = np.array(testUser)
        self.testItem = np.array(testItem)

        self.Graph = None
        print(f"{self.trainDataSize} interactions for training")
        print(f"{self.testDataSize} interactions for testing")
        print(
            f"{world.dataset} Sparsity : {(self.trainDataSize + self.testDataSize) / self.n_users / self.m_items}"
        )

        # (users,items), bipartite graph
        self.UserItemNet = csr_matrix(
            (np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
            shape=(self.n_user, self.m_item))
        self.users_D = np.array(self.UserItemNet.sum(axis=1)).squeeze()
        self.users_D[self.users_D == 0.] = 1
        self.items_D = np.array(self.UserItemNet.sum(axis=0)).squeeze()
        self.items_D[self.items_D == 0.] = 1.
        # pre-calculate
        self._allPos = self.getUserPosItems(list(range(self.n_user)))
        self.__testDict = self.__build_test()
        print(f"{world.dataset} is ready to go")
コード例 #11
0
bpr = utils.BPRLoss(Recmodel, world.config)
best_result = {
    'recall': np.array([0.0]),
    'precision': np.array([0.0]),
    'ndcg': np.array([0.0]),
    'auc': np.array([0.0])
}
weight_file = utils.getFileName()
print(f"load and save to {weight_file}")
print(Recmodel)

if world.LOAD:
    try:
        Recmodel.load_state_dict(
            torch.load(weight_file, map_location=torch.device('cpu')))
        world.cprint(f"loaded model weights from {weight_file}")
    except FileNotFoundError:
        print(f"{weight_file} not exists, start from beginning")

# init tensorboard
if world.tensorboard:
    w: SummaryWriter = SummaryWriter(
        join(world.BOARD_PATH,
             time.strftime("%m-%d-%Hh%Mm%Ss-") + "-" + world.comment))
else:
    w = None
    world.cprint("not enable tensorflowboard")

try:
    for epoch in range(world.TRAIN_epochs):
        print('======================')
コード例 #12
0
    def __init__(self, config=world.config, path="../data/gowalla"):
        # train or test
        cprint(f'loading [{path}]')
        print(config)
        self.split = config['A_split']
        self.folds = config['A_n_fold']
        self.mode_dict = {'train': 0, "test": 1}
        self.mode = self.mode_dict['train']
        self.n_user = 0
        self.m_item = 0
        train_file = path + '/train.txt'
        test_file = path + '/test.txt'
        self.path = path
        trainUniqueUsers, trainItem, trainUser = [], [], []
        testUniqueUsers, testItem, testUser = [], [], []
        self.traindataSize = 0
        self.testDataSize = 0

        with open(train_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')

                    items = [int(i) if i != '' else -1 for i in l[1:]]
                    uid = int(l[0])
                    trainUniqueUsers.append(uid)
                    trainUser.extend([uid] * len(items))
                    trainItem.extend(items)
                    self.m_item = max(self.m_item, max(items))
                    self.n_user = max(self.n_user, uid)
                    self.traindataSize += len(items)
        self.trainUniqueUsers = np.array(trainUniqueUsers)
        self.trainUser = np.array(trainUser)
        self.trainItem = np.array(trainItem)

        with open(test_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) if i != '' else -1 for i in l[1:]]
                    uid = int(l[0])
                    testUniqueUsers.append(uid)
                    testUser.extend([uid] * len(items))
                    testItem.extend(items)
                    self.m_item = max(self.m_item, max(items))
                    self.n_user = max(self.n_user, uid)
                    self.testDataSize += len(items)
        self.m_item += 1
        self.n_user += 1
        self.testUniqueUsers = np.array(testUniqueUsers)
        self.testUser = np.array(testUser)
        self.testItem = np.array(testItem)
        self.UserItemNet = csr_matrix(
            (np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
            shape=(self.n_user, self.m_item))

        self._allPos = self.getUserPosItems(list(range(self.n_user)))
        ## bipartite graph, reindex item after user
        self.trainItem += self.n_user
        #         self.testItem = self.n_user
        print(self.trainItem, self.trainItem.shape)
        first_sub = np.stack([self.trainUser, self.trainItem])
        second_sub = np.stack([self.trainItem, self.trainUser])
        self.train_edge = np.concatenate(
            [first_sub.reshape(-1, 1),
             second_sub.reshape(-1, 1)], axis=-1)
        self.train_edge = sorted(self.train_edge, key=lambda x: x[0])
コード例 #13
0
    def __init__(self, config=world.config, path="../data/movielens"):
        super(Movie, self).__init__()
        # train or test
        cprint(f'loading [{path}]')
        self.aspect = config['aspect']
        self.split = config['A_split']
        self.folds = config['A_n_fold']
        self.mode_dict = {'train': 0, 'test': 1}
        self.mode = self.mode_dict['train']
        self.n_user = 0
        self.n_all_item = []
        test_file = path + '/test.txt'
        train_file = path + '/train.txt'
        item_info_file = path + '/map-table.csv'
        self.path = path
        train_unique_users, train_a_item, train_user = [], [], []
        for i in range(len(self.aspect)):
            train_a_item.append([])
        test_unique_users, test_item, test_user = [], [], []
        self.traindataSize = 0
        self.testdataSize = 0

        self.map_table = pd.read_csv(item_info_file, sep=',', header=0)
        for i in range(len(self.aspect)):
            aspect_i = set(list(self.map_table[self.aspect[i]].values))
            self.n_all_item.append(len(aspect_i))
        self.n_all_item[1] += 1

        with open(train_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    train_unique_users.append(uid)
                    train_user.extend([uid] * len(items))
                    train_a_item[0].extend(items)
                    self.n_user = max(self.n_user, uid)
                    self.traindataSize += len(items)
        self.trainUniqueUsers = np.array(train_unique_users)
        self.trainUser = np.array(train_user)
        for i in range(1, len(self.aspect)):
            train_aspect_i = list(
                self.map_table.iloc[train_a_item[0]][self.aspect[i]].values)
            train_a_item[i].extend(train_aspect_i)
        self.trainAllItem = []
        for i in range(len(self.aspect)):
            self.trainAllItem.append(np.array(train_a_item[i]))
        # self.trainAllItem = np.array(train_a_item)

        with open(test_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    test_unique_users.append(uid)
                    test_user.extend([uid] * len(items))
                    test_item.extend(items)
                    self.n_user = max(self.n_user, uid)
                    self.testdataSize += len(items)
        self.testUniqueUsers = np.array(test_unique_users)
        self.testUser = np.array(test_user)
        self.testItem = np.array(test_item)
        self.n_user += 1

        self.Graph = None
        print(f"{self.traindataSize} interactions for training")
        print(f"{self.testdataSize} interactions for testing")
        print(
            f"{world.dataset} Sparsity: "
            f"{(self.traindataSize + self.testdataSize) / self.n_users / self.n_all_item[0]}"
        )

        # bipartite graph
        self.InteractNet = []
        self.users_D = []
        self.all_items_D = []
        for i in range(len(self.aspect)):
            self.InteractNet.append(
                csr_matrix((np.ones(len(self.trainUser)),
                            (self.trainUser, self.trainAllItem[i]))))
            self.users_D.append(
                np.array(self.InteractNet[i].sum(axis=1)).squeeze())
            self.users_D[i][self.users_D[i] == 0.] = 1
            self.all_items_D.append(
                np.array(self.InteractNet[i].sum(axis=0)).squeeze())
            self.all_items_D[i][self.all_items_D[i] == 0.] = 1

        # pre-calculate
        self._allPos = self.getUserPosItems(list(range(self.n_user)))
        self.__testDict = self.__build_test()
        print(f"{world.dataset} is ready to go")
コード例 #14
0
from time import time
from utils import shapes, combinations, timer
from world import cprint
from model import PairWiseModel, LightGCN
from dataloader import BasicDataset
from torch.nn import Softmax, Sigmoid
import torch.nn.functional as F
try:
    from cppimport import imp_from_filepath
    from os.path import join, dirname
    path = join(dirname(__file__), "sources/sampling.cpp")
    sampling = imp_from_filepath(path)
    sampling.seed(world.SEED)
    sample_ext = True
except:
    world.cprint("Cpp ext not loaded")
    sample_ext = False

ALLPOS = None
# ----------------------------------------------------------------------------
# distill


def userAndMatrix(batch_users, batch_items, model):
    """cal scores between user vector and item matrix

    Args:
        batch_users (tensor): vector (batch_size)
        batch_items (tensor): matrix (batch_size, dim_item)
        model (PairWiseModel):
コード例 #15
0
ファイル: utils.py プロジェクト: mjhough/LGCN-mod
from dataloader import BasicDataset
from time import time
from model import LightGCN
from model import PairWiseModel
from sklearn.metrics import roc_auc_score
import random
import os
try:
    from cppimport import imp_from_filepath
    from os.path import join, dirname
    path = join(dirname(__file__), "sources/sampling.cpp")
    sampling = imp_from_filepath(path)
    sampling.seed(world.seed)
    sample_ext = True
except:
    world.cprint("Cpp extension not loaded")
    sample_ext = False

import pdb


class BPRLoss:
    def __init__(self, recmodel: PairWiseModel, config: dict):
        self.model = recmodel
        self.weight_decay = config['decay']
        self.lr = config['lr']
        self.opt = optim.Adam(recmodel.parameters(), lr=self.lr)

    def stageOne(self, users, pos, neg):
        t1 = time()
        loss, reg_loss = self.model.bpr_loss(users, pos, neg)
コード例 #16
0
print(f"[SEED:{world.SEED}]")
# ----------------------------------------------------------------------------
# init model
import register
from register import dataset

# ----------------------------------------------------------------------------
# loading teacher
teacher_file = utils.getFileName(world.model_name,
                                 world.dataset,
                                 world.config['teacher_dim'],
                                 layers=world.config['teacher_layer'])
teacher_file = "teacher-" + teacher_file
teacher_weight_file = os.path.join(world.FILE_PATH, teacher_file)
print('-------------------------')
world.cprint("loaded teacher weights from")
print(teacher_weight_file)
print('-------------------------')
teacher_config = utils.getTeacherConfig(world.config)
world.cprint('teacher')
teacher_model = register.MODELS[world.model_name](teacher_config,
                                                  dataset,
                                                  fix=True)
teacher_model.eval()
utils.load(teacher_model, teacher_weight_file)
# ----------------------------------------------------------------------------

# ----------------------------------------------------------------------------
# loading student
world.cprint('student')
if world.EMBEDDING:
コード例 #17
0
    def __init__(self, config=world.config, path="../data/gowalla"):
        # train or test
        cprint(f'loading [{path}]')
        self.split = config['A_split']
        self.folds = config['A_n_fold']
        self.mode_dict = {'train': 0, "test": 1}
        self.mode = self.mode_dict['train']
        self.__n_users = 0
        self.__m_items = 0
        train_file = path + '/train.txt'
        valid_file = path + '/valid.txt'
        test_file = path + '/test.txt'
        self.path = path
        trainUniqueUsers, trainItem, trainUser = [], [], []
        validUniqueUsers, validItem, validUser = [], [], []
        testUniqueUsers, testItem, testUser = [], [], []
        self.__trainsize = 0
        self.validDataSize = 0
        self.testDataSize = 0

        with open(train_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    trainUniqueUsers.append(uid)
                    trainUser.extend([uid] * len(items))
                    trainItem.extend(items)
                    self.__m_items = max(self.__m_items, max(items))
                    self.__n_users = max(self.__n_users, uid)
                    self.__trainsize += len(items)
        self.trainUniqueUsers = np.array(trainUniqueUsers)
        self.trainUser = np.array(trainUser)
        self.trainItem = np.array(trainItem)

        with open(valid_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    validUniqueUsers.append(uid)
                    validUser.extend([uid] * len(items))
                    validItem.extend(items)
                    self.__m_items = max(self.__m_items, max(items))
                    self.__n_users = max(self.__n_users, uid)
                    self.validDataSize += len(items)
        self.validUniqueUsers = np.array(validUniqueUsers)
        self.validUser = np.array(validUser)
        self.validItem = np.array(validItem)

        with open(test_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    try:
                        items = [int(i) for i in l[1:]]
                    except:
                        print("user data error", l)
                    uid = int(l[0])
                    testUniqueUsers.append(uid)
                    testUser.extend([uid] * len(items))
                    testItem.extend(items)
                    self.__m_items = max(self.__m_items, max(items))
                    self.__n_users = max(self.__n_users, uid)
                    self.testDataSize += len(items)
        self.__m_items += 1
        self.__n_users += 1
        self.testUniqueUsers = np.array(testUniqueUsers)
        self.testUser = np.array(testUser)
        self.testItem = np.array(testItem)

        # if world.ALLDATA:
        #     self._trainUser = self.trainUser
        #     self._trainItem = self.trainItem
        #     self.trainUser = np.concatenate([self.trainUser, self.testUser])
        #     self.trainItem = np.concatenate([self.trainItem, self.testItem])
        #     self.__trainsize += self.testDataSize
        # elif world.TESTDATA:
        #     self.__trainsize = self.testDataSize
        #     self.trainUser = self.testUser
        #     self.trainItem  = self.testItem

        self.Graph = None
        print(f"({self.n_users} X {self.m_items})")
        print(f"{self.trainDataSize} interactions for training")
        print(f"{self.validDataSize} interactions for training")
        print(f"{self.testDataSize} interactions for testing")
        print(
            f"{world.dataset} Sparsity : {(self.trainDataSize + self.validDataSize + self.testDataSize) / self.n_users / self.m_items}"
        )

        # (users,items), bipartite graph
        self.UserItemNet = csr_matrix(
            (np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
            shape=(self.__n_users, self.__m_items),
            dtype='int')
        # pre-calculate
        self.__allPos = self.getUserPosItems(list(range(self.__n_users)))
        self.__testDict = self.build_dict(self.testUser, self.testItem)
        self.__validDict = self.build_dict(self.validUser, self.validItem)
        if world.ALLDATA:
            self.UserItemNet = csr_matrix(
                (np.ones(len(self._trainUser)),
                 (self._trainUser, self._trainItem)),
                shape=(self.__n_users, self.__m_items),
                dtype='int')
        print(f"{world.dataset} is ready to go")
コード例 #18
0
    def __init__(self, config=world.config, path="../data/gowalla_one"):
        cprint(f'loading [{path}]')
        self.path = path
        self.split = False
        self.__n_users = 0
        self.__m_items = 0
        train_file = path + '/train.txt'
        valid_file = path + '/valid.txt'
        test_file = path + '/test.txt'
        trainUser, trainItem = [], []
        validUser, validItem = [], []
        testUser, testItem = [], []
        with open(train_file) as f:
            for line in f.readlines():
                user, item, _ = line.strip().split()
                trainUser.append(int(user))
                trainItem.append(int(item))
        with open(valid_file) as f:
            for line in f.readlines():
                user, item, _ = line.strip().split()
                validUser.append(int(user))
                validItem.append(int(item))
        with open(test_file) as f:
            for line in f.readlines():
                user, item, _ = line.strip().split()
                testUser.append(int(user))
                testItem.append(int(item))
        self.__n_users = len(testUser)
        self.__m_items = max(max(trainItem), max(testItem))
        self.__trainsize = len(trainUser)
        min_index = np.min(trainUser)
        self.trainUser = np.array(trainUser) - min_index
        self.trainItem = np.array(trainItem) - min_index
        self.validUser = np.array(validUser) - min_index
        self.validItem = np.array(validItem) - min_index
        self.testUser = np.array(testUser) - min_index
        self.testItem = np.array(testItem) - min_index
        self.__m_items += 1 - min_index
        assert len(testUser) == (max(trainUser) + 1 - min_index)
        if world.ALLDATA:
            self._trainUser = self.trainUser
            self._trainItem = self.trainItem
            self.trainUser = np.concatenate([self.trainUser, self.testUser])
            self.trainItem = np.concatenate([self.trainItem, self.testItem])
            self.__trainsize += len(testUser)
        elif world.TESTDATA:
            self.__trainsize = len(testUser)
            self.trainUser = self.testUser
            self.trainItem = self.testItem

        self.Graph = None
        print(f"({self.n_users} X {self.m_items})")
        print(f"{self.trainDataSize} interactions for training")
        print(f"{len(testUser)} interactions for testing")
        print(f"{len(validUser)} interactions for validating")
        print(
            f"{world.dataset} Sparsity : {(self.trainDataSize + len(validUser) + len(testUser)) / self.n_users / self.m_items}"
        )

        self.UserItemNet = csr_matrix(
            (np.ones(len(self.trainUser)), (self.trainUser, self.trainItem)),
            shape=(self.n_users, self.m_items))
        self.users_D = np.array(self.UserItemNet.sum(axis=1)).squeeze()
        self.users_D[self.users_D == 0.] = 1
        self.items_D = np.array(self.UserItemNet.sum(axis=0)).squeeze()
        self.items_D[self.items_D == 0.] = 1.
        # pre-calculate
        self.__allPos = self.getUserPosItems(list(range(self.__n_users)))
        self.__testDict = self.build_dict(self.testUser, self.testItem)
        self.__validDict = self.build_dict(self.validUser, self.validItem)
        if world.ALLDATA:
            self.UserItemNet = csr_matrix((np.ones(len(self._trainUser)),
                                           (self._trainUser, self._trainItem)),
                                          shape=(self.n_users, self.m_items))
        print(f"{world.dataset} is ready to go")
コード例 #19
0
    Recmodel.state_dict()[key] = val

# FREEZE HERE
#  Recmodel.embedding_user.weight.requires_grad = False
#  Recmodel.embedding_item.weight.requires_grad = False

Neg_k = 1

# init tensorboard
if world.tensorboard:
    w: SummaryWriter = SummaryWriter(
        join(world.BOARD_PATH,
             time.strftime("%m-%d-%Hh%Mm%Ss-") + "-" + world.comment))
else:
    w = None
    world.cprint("not enable tensorflowboard")

# Store the model size in a variable:
#mb_params = 1e-6*sum([param.nelement()*param.element_size() for param in Recmodel.parameters()])

try:
    for epoch in range(world.TRAIN_epochs):
        start = time.time()
        if epoch % 60 == 0:
            cprint("[TEST]")

            #torch.cuda.reset_max_memory_allocated() # reset max memory stats for next iter
            test_t0 = time.time()
            Procedure.Test(dataset, Recmodel, epoch, w,
                           world.config['multicore'])
            test_t1 = time.time()