def train(self): self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) self.model_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["save_name"]) for epoch in range(self.config["model"]["max_epoch"]): print(f"Epoch {epoch} starts !") print("-" * 80) if epoch > 0 and self.eval_engine.n_no_update == 0: # previous epoch have already obtained better result self.engine.save_checkpoint(model_dir=self.model_save_dir) if self.eval_engine.n_no_update >= MAX_N_UPDATE: print( "Early stop criterion triggered, no performance update for {:} times" .format(MAX_N_UPDATE)) break train_loader = self.sample_generator.pairwise_negative_train_loader( self.config["model"]["batch_size"], self.config["model"]["device_str"]) self.engine.train_an_epoch(epoch_id=epoch, train_loader=train_loader) self.eval_engine.train_eval(self.data.valid[0], self.data.test[0], self.engine.model, epoch) self.config["run_time"] = self.monitor.stop()
def train(self): """Train the model.""" self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) self.model_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["save_name"]) if self.config["model"]["loss"] == "bpr": train_loader = self.data.instance_bpr_loader( batch_size=self.config["model"]["batch_size"], device=self.config["model"]["device_str"], ) elif self.config["model"]["loss"] == "bce": train_loader = self.data.instance_bce_loader( num_negative=self.config["model"]["num_negative"], batch_size=self.config["model"]["batch_size"], device=self.config["model"]["device_str"], ) else: raise ValueError( f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'" ) self.engine = LCFNEngine(self.config) self._train(self.engine, train_loader, self.model_save_dir) self.config["run_time"] = self.monitor.stop() return self.eval_engine.best_valid_performance
def train(self): """Train the model.""" self.load_dataset() self.gpu_id, self.config["device_str"] = self.get_device() """ Main training navigator Returns: """ self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) train_loader = self.data.instance_vae_loader( batch_size=self.config["model"]["batch_size"], device=self.config["model"]["device_str"], ) self.config["model"]["n_items"] = self.data.n_items self.config["model"]["n_users"] = self.data.n_users self.engine = VAECFEngine(self.config) self.model_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["save_name"]) self._train(self.engine, train_loader, self.model_save_dir) self.config["run_time"] = self.monitor.stop() return self.eval_engine.best_valid_performance
def train(self): if self.config["pretrain"] == "gmf": user_embed, item_embed = self.train_gmf() model = self.cmnengine(self.config, user_embed, item_embed, self.data.item_users_list) self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) self.model_dir = os.path.join(self.config["model_save_dir"], self.config["save_name"]) for epoch in range(config["max_epoch"]): print(f"Epoch {epoch} starts !") print("-" * 80) if epoch > 0 and self.eval_engine.n_no_update == 0: # previous epoch have already obtained better result model.save_checkpoint(model_dir=self.model_dir) if self.eval_engine.n_no_update >= MAX_N_UPDATE: print( "Early stop criterion triggered, no performance update for {:} times" .format(MAX_N_UPDATE)) break train_loader = self.data model.train_an_epoch(epoch_id=epoch, train_loader=train_loader) self.eval_engine.train_eval(self.data.valid[0], self.data.test[0], model.model, epoch) self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.data.test, model.model)
def train_mlp(self): """ Train MLP Returns: None """ # Train MLP self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) train_loader = self.sample_generator.instance_a_train_loader( self.config["model"]["num_negative"], self.config["model"]["batch_size"]) self.engine = MLPEngine(self.config) self.mlp_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["mlp_config"]["save_name"], ) self._train(self.engine, train_loader, self.mlp_save_dir) while self.eval_engine.n_worker: print("Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.data.test, self.engine.model)
def train(self): """Default train implementation """ self.load_dataset() self.train_data = self.data.sample_triple() self.config["model"]["alpha_step"] = ( 1 - self.config["model"]["alpha"]) / ( self.config["model"]["max_epoch"]) self.config["user_fea"] = self.data.user_feature self.config["item_fea"] = self.data.item_feature self.engine = VBCAREngine(self.config) self.engine.data = self.data assert hasattr(self, "engine"), "Please specify the exact model engine !" self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) print("Start training... ") epoch_bar = tqdm(range(self.config["model"]["max_epoch"]), file=sys.stdout) for epoch in epoch_bar: print(f"Epoch {epoch} starts !") print("-" * 80) if epoch > 0 and self.eval_engine.n_no_update == 0: # previous epoch have already obtained better result self.engine.save_checkpoint(model_dir=os.path.join( self.config["system"]["model_save_dir"], "model.cpk")) if self.eval_engine.n_no_update >= MAX_N_UPDATE: print( "Early stop criterion triggered, no performance update for {:} times" .format(MAX_N_UPDATE)) break data_loader = DataLoader( torch.LongTensor(self.train_data.to_numpy()).to( self.engine.device), batch_size=self.config["model"]["batch_size"], shuffle=True, drop_last=True, ) self.engine.train_an_epoch(data_loader, epoch_id=epoch) self.eval_engine.train_eval(self.data.valid[0], self.data.test[0], self.engine.model, epoch) # anneal alpha self.engine.model.alpha = min( self.config["model"]["alpha"] + math.exp(epoch - self.config["model"]["max_epoch"] + 20), 1, ) """Sets the learning rate to the initial LR decayed by 10 every 10 epochs""" lr = self.config["model"]["lr"] * (0.5**(epoch // 10)) for param_group in self.engine.optimizer.param_groups: param_group["lr"] = lr self.config["run_time"] = self.monitor.stop() return self.eval_engine.best_valid_performance
def train(self): self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) # Train GCN self.engine = GCN_SEngine(self.config["gcn_config"]) train_loader = self.dataset self.gcn_save_dir = os.path.join( self.config["model_save_dir"], self.config["gcn_config"]["save_name"]) self._train(engine=self.engine, train_loader=train_loader, save_dir=self.gcn_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the # Train MLP train_loader = self.sample_generator.instance_a_train_loader( self.config["num_negative"], self.config["batch_size"]) self.engine = MLPEngine(self.config["mlp_config"], gcn_config=self.config["gcn_config"]) self.mlp_save_dir = os.path.join( self.config["model_save_dir"], self.config["mlp_config"]["save_name"]) self._train(engine=self.engine, train_loader=train_loader, save_dir=self.mlp_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the # Train ncf self.engine = NeuMFEngine( self.config["neumf_config"], mlp_config=self.config["mlp_config"], gcn_config=self.config["gcn_config"], ) self.neumf_save_dir = os.path.join( self.config["model_save_dir"], self.config["neumf_config"]["save_name"]) self._train( engine=self.engine, train_loader=train_loader, save_dir=self.neumf_save_dir, ) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop()
def train(self): """Train and test NARM.""" self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) train_loader = self.load_train_data self.engine = NARMEngine(self.config) self.narm_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["save_name"]) self._train(self.engine, train_loader, self.narm_save_dir) self.config["run_time"] = self.monitor.stop() self.seq_eval_engine.test_eval_seq(self.test_data, self.engine)
def train(self): """Train the model.""" self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) self.model_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["save_name"]) self.engine = LightGCNEngine(self.config) train_loader = self.data.instance_bpr_loader( batch_size=self.config["model"]["batch_size"], device=self.config["model"]["device_str"], ) self._train(self.engine, train_loader, self.model_save_dir) self.config["run_time"] = self.monitor.stop() return self.eval_engine.best_valid_performance
def train_ncf(self): """Train NeuMF.""" self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) train_loader = self.sample_generator.instance_a_train_loader( self.config["model"]["num_negative"], self.config["model"]["batch_size"]) self.engine = NeuMFEngine(self.config) self.neumf_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["neumf_config"]["save_name"], ) self._train(self.engine, train_loader, self.neumf_save_dir) self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.data.test, self.engine.model)
def train(self): self.load_dataset() self.engine = Triple2vecEngine(self.config) self.engine.data = self.data self.train_data = self.data.sample_triple() train_loader = DataLoader( torch.LongTensor(self.train_data.to_numpy()).to(self.engine.device), batch_size=self.config["model"]["batch_size"], shuffle=True, drop_last=True, ) self.monitor = Monitor( log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id ) self.model_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["save_name"] ) self._train(self.engine, train_loader, self.model_save_dir) self.config["run_time"] = self.monitor.stop() return self.eval_engine.best_valid_performance
def train(self): """Default train implementation """ assert hasattr(self, "engine"), "Please specify the exact model engine !" self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) self.engine.data = self.dataset print("Start training... ") epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout) for epoch in epoch_bar: print(f"Epoch {epoch} starts !") print("-" * 80) if epoch > 0 and self.eval_engine.n_no_update == 0: # previous epoch have already obtained better result self.engine.save_checkpoint(model_dir=os.path.join( self.config["model_save_dir"], "model.cpk")) if self.eval_engine.n_no_update >= MAX_N_UPDATE: print( "Early stop criterion triggered, no performance update for {:} times" .format(MAX_N_UPDATE)) break self.engine.train_an_epoch(self.train_data, epoch_id=epoch) self.eval_engine.train_eval(self.dataset.valid[0], self.dataset.test[0], self.engine.model, epoch) # anneal alpha self.engine.model.alpha = min( self.config["alpha"] + math.exp(epoch - self.config["max_epoch"] + 20), 1, ) """Sets the learning rate to the initial LR decayed by 10 every 10 epochs""" lr = self.config["lr"] * (0.5**(epoch // 10)) for param_group in self.engine.optimizer.param_groups: param_group["lr"] = lr self.config["run_time"] = self.monitor.stop() return self.eval_engine.best_valid_performance
def train_gmf(self): """Train GMF.""" self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) self.model_dir = os.path.join(self.config["model_save_dir"], self.config["save_name"]) for epoch in range(config["max_epoch"]): print(f"Epoch {epoch} starts !") print("-" * 80) if epoch > 0 and self.eval_engine.n_no_update == 0: # previous epoch have already obtained better result self.gmfengine.save_checkpoint(model_dir=self.model_dir) if self.eval_engine.n_no_update >= MAX_N_UPDATE: print( "Early stop criterion triggered, no performance update for {:} times" .format(MAX_N_UPDATE)) break train_loader = self.data self.gmfengine.train_an_epoch(epoch_id=epoch, train_loader=train_loader) print("Saving embeddings to: %s" % self.config["model_save_dir"]) user_embed, item_embed, v = ( self.gmfengine.model.user_memory.weight.detach().cpu(), self.gmfengine.model.item_memory.weight.detach().cpu(), self.gmfengine.model.v.weight.detach().cpu(), ) embed_dir = os.path.join(self.config["model_save_dir"], "pretain/embeddings") ensureDir(embed_dir) np.savez(embed_dir, user=user_embed, item=item_embed, v=v) self.config["run_time"] = self.monitor.stop() return np.array(user_embed), np.array(item_embed)
def train(self): self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) train_loader = self.sample_generator.instance_a_train_loader( self.config["num_negative"], self.config["batch_size"]) # Train ncf without pretrain self.config["pretrain"] = None self.config["model"] = "NCF_wo_pre" self.engine = NeuMFEngine(self.config) self.neumf_save_dir = os.path.join( self.config["model_save_dir"], self.config["neumf_config"]["save_name"]) self._train(self.engine, train_loader, self.neumf_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) # Train GCN self.config["pretrain"] = None self.config["model"] = "GCN" self.engine = GCN_SEngine(self.config) self.gcn_save_dir = os.path.join( self.config["model_save_dir"], self.config["gcn_config"]["save_name"]) self._train(engine=self.engine, train_loader=self.dataset, save_dir=self.gcn_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) # Train GMF self.config["pretrain"] = None self.config["model"] = "GMF" self.engine = GMFEngine(self.config) self.gmf_save_dir = os.path.join( self.config["model_save_dir"], self.config["gmf_config"]["save_name"]) self._train(self.engine, train_loader, self.gmf_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) # Train MLP self.config["pretrain"] = None self.config["model"] = "mlp" self.engine = MLPEngine(self.config) self.mlp_save_dir = os.path.join( self.config["model_save_dir"], self.config["mlp_config"]["save_name"]) self._train(self.engine, train_loader, self.mlp_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) # Train ncf_gmf self.config["pretrain"] = "gmf" self.config["model"] = "ncf_gmf" self.engine = NeuMFEngine(self.config) self.neumf_save_dir = os.path.join( self.config["model_save_dir"], self.config["neumf_config"]["save_name"]) self._train(self.engine, train_loader, self.neumf_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model) self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) # Train ncf_gcn self.config["pretrain"] = "gcn" self.config["model"] = "ncf_gcn" self.engine = NeuMFEngine(self.config) self.neumf_save_dir = os.path.join( self.config["model_save_dir"], self.config["neumf_config"]["save_name"]) self._train(self.engine, train_loader, self.neumf_save_dir) while self.eval_engine.n_worker: print(f"Wait 15s for the complete of eval_engine.n_worker") time.sleep(15) # wait the self.config["run_time"] = self.monitor.stop() self.eval_engine.test_eval(self.dataset.test, self.engine.model)