def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix user_fea_norm_adj, item_fea_norm_adj = self.dataset.make_fea_sim_mat() self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["user_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor( user_fea_norm_adj ) self.config["item_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor( item_fea_norm_adj ) self.config["num_batch"] = self.dataset.n_train // self.config["batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items
def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix ( user_edge_list, user_edge_type, item_edge_list, item_edge_type, self.config["n_user_fea"], self.config["n_item_fea"], ) = self.dataset.make_multi_graph() self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["user_edge_list"] = torch.LongTensor(user_edge_list) self.config["user_edge_type"] = torch.LongTensor(user_edge_type) self.config["item_edge_list"] = torch.LongTensor(item_edge_list) self.config["item_edge_type"] = torch.LongTensor(item_edge_type) self.config["num_batch"] = self.dataset.n_train // self.config["batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items
def build_data_loader(self): ( user_edge_list, user_edge_type, item_edge_list, item_edge_type, self.config["n_user_fea"], self.config["n_item_fea"], ) = self.dataset.make_multi_graph() self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["user_edge_list"] = torch.LongTensor(user_edge_list) self.config["user_edge_type"] = torch.LongTensor(user_edge_type) self.config["item_edge_list"] = torch.LongTensor(item_edge_list) self.config["item_edge_type"] = torch.LongTensor(item_edge_type) self.config["num_batch"] = self.dataset.n_train // self.config["batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items plain_adj, norm_adj, mean_adj = self.dataset.get_adj_mat() norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj) self.config["norm_adj"] = norm_adj
class NCF_train(TrainEngine): """ An instance class from the TrainEngine base class """ def __init__(self, config): """Constructor Args: config (dict): All the parameters for the model """ self.config = config super(NCF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.gpu_id, self.config["device_str"] = self.get_device() def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix ( user_edge_list, user_edge_type, item_edge_list, item_edge_type, self.config["n_user_fea"], self.config["n_item_fea"], ) = self.dataset.make_multi_graph() self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["user_edge_list"] = torch.LongTensor(user_edge_list) self.config["user_edge_type"] = torch.LongTensor(user_edge_type) self.config["item_edge_list"] = torch.LongTensor(item_edge_list) self.config["item_edge_type"] = torch.LongTensor(item_edge_type) self.config["num_batch"] = self.dataset.n_train // self.config[ "batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items def _train(self, engine, train_loader, save_dir): self.eval_engine.flush() epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout) for epoch in epoch_bar: print("Epoch {} starts !".format(epoch)) print("-" * 80) if self.check_early_stop(engine, save_dir, epoch): break engine.train_an_epoch(train_loader, epoch_id=epoch) """evaluate model on validation and test sets""" if self.config["validate"]: self.eval_engine.train_eval(self.dataset.valid[0], self.dataset.test[0], engine.model, epoch) else: self.eval_engine.train_eval(None, self.dataset.test[0], engine.model, epoch) def train(self): """ Main training navigator Returns: """ self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) print("Start Pre-training") if self.config["pre_train"] == 0: self.train_compgcn() else: pass print("Start fine-tuning") self.train_ncf() self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) def train_ncf(self): """ Train NeuMF Returns: None """ train_loader = self.sample_generator.instance_a_train_loader( self.config["num_negative"], self.config["batch_size"]) self.engine = NeuMFEngine(self.config) self.neumf_save_dir = os.path.join( self.config["model_save_dir"], self.config["neumf_config"]["save_name"]) self._train(self.engine, train_loader, self.neumf_save_dir) def train_compgcn(self): """ Train RGCN Returns: None """ train_loader = self.dataset # Train RGCN self.engine = CompGCNEngine(self.config) self.model_save_dir = os.path.join( self.config["model_save_dir"], self.config["compgcn_config"]["save_name"]) self._train(self.engine, train_loader, self.model_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker")
class MF_train(TrainEngine): """ An instance class from the TrainEngine base class """ def __init__(self, config): """Constructor Args: config (dict): All the parameters for the model """ self.config = config super(MF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.gpu_id, self.config["device_str"] = self.get_device() def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix ( user_edge_list, user_edge_type, item_edge_list, item_edge_type, self.config["n_user_fea"], self.config["n_item_fea"], ) = self.dataset.make_multi_graph() self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["user_edge_list"] = torch.LongTensor(user_edge_list) self.config["user_edge_type"] = torch.LongTensor(user_edge_type) self.config["item_edge_list"] = torch.LongTensor(item_edge_list) self.config["item_edge_type"] = torch.LongTensor(item_edge_type) self.config["num_batch"] = self.dataset.n_train // self.config["batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items def _train(self, engine, train_loader, save_dir): self.eval_engine.flush() epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout) for epoch in epoch_bar: print("Epoch {} starts !".format(epoch)) print("-" * 80) if self.check_early_stop(engine, save_dir, epoch): break engine.train_an_epoch(train_loader, epoch_id=epoch) """evaluate model on validation and test sets""" if self.config["validate"]: self.eval_engine.train_eval( self.dataset.valid[0], self.dataset.test[0], engine.model, epoch ) else: self.eval_engine.train_eval( None, self.dataset.test[0], engine.model, epoch ) def train(self): """ Main training navigator Returns: """ self.monitor = Monitor( log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id ) self.train_rescal() self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) def train_rescal(self): """ Train rescal Returns: None """ import sys sys.path.append("../") from scipy.io.matlab import loadmat from scipy.sparse import lil_matrix from rescal.rescal import als as rescal_als def edge2Tensor(edge_list, edge_type, n_type): r_tensor = np.zeros((edge_list.max() + 1, edge_list.max() + 1, n_type)) # print(np.count_nonzero(r_tensor)) for idx, e in enumerate(edge_type): i = edge_list[0][idx] j = edge_list[1][idx] r_tensor[i][j][e] = 1 print(f"n values: {np.count_nonzero(r_tensor)}") X = [lil_matrix(r_tensor[:, :, k]) for k in range(r_tensor.shape[2])] return X def get_emb(r_tensor, dim=64, lambda_A=10, lambda_R=10): A, R, fit, itr, exectimes = rescal_als( r_tensor, dim, init="nvecs", lambda_A=lambda_A, lambda_R=lambda_R ) return A ( user_edge_list, user_edge_type, item_edge_list, item_edge_type, n_user_fea, n_item_fea, ) = self.dataset.make_multi_graph() user_r = edge2Tensor(user_edge_list, user_edge_type, n_user_fea) item_r = edge2Tensor(item_edge_list, item_edge_type, n_item_fea) lambda_A = self.config["lambda_A"] lambda_R = self.config["lambda_R"] u_emb = get_emb(user_r, dim=self.config["emb_dim"], lambda_A=lambda_A, lambda_R=lambda_R) i_emb = get_emb(item_r, dim=self.config["emb_dim"], lambda_A=lambda_A, lambda_R=lambda_R) u_emb.astype(np.float64) i_emb.astype(np.float64) if self.config["loss"] == "bpr": train_loader = self.sample_generator.pairwise_negative_train_loader( self.config["batch_size"], self.config["device_str"] ) elif self.config["loss"] == "bce": train_loader = self.sample_generator.uniform_negative_train_loader( self.config["num_negative"], self.config["batch_size"], self.config["device_str"], ) else: raise ValueError( f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'" ) self.engine = MFEngine(self.config) self.engine.model.user_emb.weight.data = torch.tensor(u_emb.astype(np.float64)).to(self.engine.device) self.engine.model.item_emb.weight.data = torch.tensor(i_emb.astype(np.float64)).to(self.engine.device) self.model_save_dir = os.path.join( self.config["model_save_dir"], self.config["save_name"] ) self._train(self.engine, train_loader, self.model_save_dir)
class NCF_train(TrainEngine): """ An instance class from the TrainEngine base class """ def __init__(self, config): """Constructor Args: config (dict): All the parameters for the model """ self.config = config super(NCF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.gpu_id, self.config["device_str"] = self.get_device() def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix user_fea_norm_adj, item_fea_norm_adj = self.dataset.make_fea_sim_mat() self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["user_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor( user_fea_norm_adj ) self.config["item_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor( item_fea_norm_adj ) self.config["num_batch"] = self.dataset.n_train // self.config["batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items def _train(self, engine, train_loader, save_dir): self.eval_engine.flush() epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout) for epoch in epoch_bar: print("Epoch {} starts !".format(epoch)) print("-" * 80) if self.check_early_stop(engine, save_dir, epoch): break engine.train_an_epoch(train_loader, epoch_id=epoch) """evaluate model on validation and test sets""" if self.config["validate"]: self.eval_engine.train_eval( self.dataset.valid[0], self.dataset.test[0], engine.model, epoch ) else: self.eval_engine.train_eval( None, self.dataset.test[0], engine.model, epoch ) def train(self): """ Main training navigator Returns: """ # Options are: 'gcn', 'mlp', 'ncf', and 'ncf_gcn'; # Train NeuMF without pre-train self.monitor = Monitor( log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id ) if self.config["model"] == "ncf": self.train_ncf() elif self.config["model"] == "gcn": self.train_gcn() elif self.config["model"] == "mlp": self.train_mlp() elif self.config["model"] == "ncf_gcn": self.train_gcn() while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.train_mlp() while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.train_ncf() else: raise ValueError( "Model type error: Options are: 'gcn', 'mlp', 'ncf', and 'ncf_gcn'." ) self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) def train_ncf(self): """ Train NeuMF Returns: None """ train_loader = self.sample_generator.instance_a_train_loader( self.config["num_negative"], self.config["batch_size"] ) self.engine = NeuMFEngine(self.config) self.neumf_save_dir = os.path.join( self.config["model_save_dir"], self.config["neumf_config"]["save_name"] ) self._train(self.engine, train_loader, self.neumf_save_dir) def train_gcn(self): """ Train GCN Returns: None """ train_loader = self.dataset # Train GCN self.engine = GCN_SEngine(self.config) self.gcn_save_dir = os.path.join( self.config["model_save_dir"], self.config["gcn_config"]["save_name"] ) self._train(self.engine, train_loader, self.gcn_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the def train_mlp(self): """ Train MLP Returns: None """ # Train MLP train_loader = self.sample_generator.instance_a_train_loader( self.config["num_negative"], self.config["batch_size"] ) self.engine = MLPEngine(self.config) self.mlp_save_dir = os.path.join( self.config["model_save_dir"], self.config["mlp_config"]["save_name"] ) self._train(self.engine, train_loader, self.mlp_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the
class MF_train(TrainEngine): """ An instance class from the TrainEngine base class """ def __init__(self, config): """Constructor Args: config (dict): All the parameters for the model """ self.config = config super(MF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.gpu_id, self.config["device_str"] = self.get_device() def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix user_fea_norm_adj, item_fea_norm_adj = self.dataset.make_fea_sim_mat() self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["user_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor( user_fea_norm_adj) self.config["item_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor( item_fea_norm_adj) plain_adj, norm_adj, mean_adj = self.dataset.get_adj_mat() norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj) self.config["norm_adj"] = norm_adj self.config["num_batch"] = self.dataset.n_train // self.config[ "batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items def _train(self, engine, train_loader, save_dir): self.eval_engine.flush() epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout) for epoch in epoch_bar: print("Epoch {} starts !".format(epoch)) print("-" * 80) if self.check_early_stop(engine, save_dir, epoch): break engine.train_an_epoch(train_loader, epoch_id=epoch) """evaluate model on validation and test sets""" if self.config["validate"]: self.eval_engine.train_eval(self.dataset.valid[0], self.dataset.test[0], engine.model, epoch) else: self.eval_engine.train_eval(None, self.dataset.test[0], engine.model, epoch) def train(self): """ Main training navigator Returns: """ self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) if self.config["pre_train"] == 0: print(self.config["pre_train"]) self.train_gcn() else: pass self.train_mf() self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) def train_mf(self): """ Train NeuMF Returns: None """ if self.config["loss"] == "bpr": train_loader = self.sample_generator.pairwise_negative_train_loader( self.config["batch_size"], self.config["device_str"]) elif self.config["loss"] == "bce": train_loader = self.sample_generator.uniform_negative_train_loader( self.config["num_negative"], self.config["batch_size"], self.config["device_str"], ) else: raise ValueError( f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'" ) self.engine = MFEngine(self.config) self.model_save_dir = os.path.join(self.config["model_save_dir"], self.config["save_name"]) self._train(self.engine, train_loader, self.model_save_dir) def train_gcn(self): """ Train GCN Returns: None """ train_loader = self.dataset # Train GCN self.engine = GCN_SEngine(self.config) self.gcn_save_dir = os.path.join( self.config["model_save_dir"], self.config["gcn_config"]["save_name"]) self._train(self.engine, train_loader, self.gcn_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the
def build_data_loader(self): self.sample_generator = SampleGenerator(ratings=self.dataset.train) self.config["num_batch"] = self.dataset.n_train // self.config[ "batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items
def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix self.sample_generator = SampleGenerator(ratings=self.dataset.train)
class MF_train(TrainEngine): """ An instance class from the TrainEngine base class """ def __init__(self, config): """Constructor Args: config (dict): All the parameters for the model """ self.config = config super(MF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.gpu_id, self.config["device_str"] = self.get_device() def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix self.sample_generator = SampleGenerator(ratings=self.dataset.train) def _train(self, engine, train_loader, save_dir): self.eval_engine.flush() epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout) for epoch in epoch_bar: print("Epoch {} starts !".format(epoch)) print("-" * 80) if self.check_early_stop(engine, save_dir, epoch): break engine.train_an_epoch(train_loader, epoch_id=epoch) """evaluate model on validation and test sets""" if self.config["validate"]: self.eval_engine.train_eval( self.dataset.valid[0], self.dataset.test[0], engine.model, epoch ) else: self.eval_engine.train_eval( None, self.dataset.test[0], engine.model, epoch ) def train(self): """ Main training navigator Returns: """ # Options are: 'gcn', 'mlp', 'ncf', and 'ncf_gcn'; # Train NeuMF without pre-train self.monitor = Monitor( log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id ) self.train_mf() self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) return self.eval_engine.best_valid_performance def train_mf(self): """ Train NeuMF Returns: None """ if self.config["loss"] == "bpr": train_loader = self.sample_generator.pairwise_negative_train_loader( self.config["batch_size"], self.config["device_str"] ) elif self.config["loss"] == "bce": train_loader = self.sample_generator.uniform_negative_train_loader( self.config["num_negative"], self.config["batch_size"], self.config["device_str"], ) else: raise ValueError( f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'" ) self.engine = MFEngine(self.config) self.model_save_dir = os.path.join( self.config["model_save_dir"], self.config["save_name"] ) print(self.model_save_dir) self._train(self.engine, train_loader, self.model_save_dir)