def sample_triple(self, dump=True, load_save=False): """Sample triples or load triples samples from files. This method is only applicable for basket based Recommender. Returns: None """ sample_file_name = ( "triple_" + self.config["dataset"]["dataset"] + ( ("_" + str(self.config["dataset"]["percent"] * 100)) if "percent" in self.config else "" ) + "_" + str(self.config["model"]["n_sample"]) if "percent" in self.config else "" + ".csv" ) self.process_path = self.config["system"]["process_dir"] ensureDir(self.process_path) sample_file = os.path.join(self.process_path, sample_file_name) my_sampler = Sampler( self.train, sample_file, self.config["model"]["n_sample"], dump=dump, load_save=load_save, ) return my_sampler.sample()
def get_adj_mat(self): """ Get the adjacent matrix, if not previously stored then call the function to create This method is for NGCF model Return: Different types of adjacment matrix """ self.init_train_items() process_file_name = ("ngcf_" + self.config["dataset"] + "_" + self.config["data_split"] + (("_" + str(self.config["percent"] * 100)) if "percent" in self.config else "")) self.process_path = os.path.join(self.config["root_dir"], self.config["process_dir"]) process_file_name = os.path.join(self.process_path, process_file_name) ensureDir(process_file_name) print(process_file_name) try: t1 = time() adj_mat = sp.load_npz( os.path.join(process_file_name, "s_adj_mat.npz")) norm_adj_mat = sp.load_npz( os.path.join(process_file_name, "s_norm_adj_mat.npz")) mean_adj_mat = sp.load_npz( os.path.join(process_file_name, "s_mean_adj_mat.npz")) print("already load adj matrix", adj_mat.shape, time() - t1) except Exception: adj_mat, norm_adj_mat, mean_adj_mat = self.create_adj_mat() sp.save_npz(os.path.join(process_file_name, "s_adj_mat.npz"), adj_mat) sp.save_npz(os.path.join(process_file_name, "s_norm_adj_mat.npz"), norm_adj_mat) sp.save_npz(os.path.join(process_file_name, "s_mean_adj_mat.npz"), mean_adj_mat) return adj_mat, norm_adj_mat, mean_adj_mat
def sample_triple_time(self, dump=True, load_save=False): """ Sample triples or load triples samples from files. Only applicable for basket based Recommender Returns: None """ sample_file_name = ("triple_" + self.config["dataset"] + (("_" + str(self.config["percent"] * 100)) if "percent" in self.config else "") + (("_" + str(self.config["time_step"])) if "time_step" in self.config else "_10") + "_" + str(self.config["n_sample"]) if "percent" in self.config else "" + ".csv") self.process_path = os.path.join(self.config["root_dir"], self.config["process_dir"]) ensureDir(self.process_path) sample_file = os.path.join(self.process_path, sample_file_name) my_sampler = Sampler( self.train, sample_file, self.config["n_sample"], dump=dump, load_save=load_save, ) return my_sampler.sample_by_time(self.config["time_step"])
def train_gmf(self): """Train GMF.""" self.monitor = Monitor(log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id) self.model_dir = os.path.join(self.config["model_save_dir"], self.config["save_name"]) for epoch in range(config["max_epoch"]): print(f"Epoch {epoch} starts !") print("-" * 80) if epoch > 0 and self.eval_engine.n_no_update == 0: # previous epoch have already obtained better result self.gmfengine.save_checkpoint(model_dir=self.model_dir) if self.eval_engine.n_no_update >= MAX_N_UPDATE: print( "Early stop criterion triggered, no performance update for {:} times" .format(MAX_N_UPDATE)) break train_loader = self.data self.gmfengine.train_an_epoch(epoch_id=epoch, train_loader=train_loader) print("Saving embeddings to: %s" % self.config["model_save_dir"]) user_embed, item_embed, v = ( self.gmfengine.model.user_memory.weight.detach().cpu(), self.gmfengine.model.item_memory.weight.detach().cpu(), self.gmfengine.model.v.weight.detach().cpu(), ) embed_dir = os.path.join(self.config["model_save_dir"], "pretain/embeddings") ensureDir(embed_dir) np.savez(embed_dir, user=user_embed, item=item_embed, v=v) self.config["run_time"] = self.monitor.stop() return np.array(user_embed), np.array(item_embed)
def get_adj_mat(self, config): """Get the adjacent matrix, if not previously stored then call the function to create. This method is for NGCF model. Returns: Different types of adjacment matrix. """ process_file_name = ("ngcf_" + config["dataset"]["dataset"] + "_" + config["dataset"]["data_split"] + (("_" + str(config["dataset"]["percent"] * 100)) if "percent" in config else "")) process_path = os.path.join( config["system"]["process_dir"], config["dataset"]["dataset"] + "/", ) process_file_name = os.path.join(process_path, process_file_name) ensureDir(process_file_name) print(process_file_name) try: adj_mat = sp.load_npz( os.path.join(process_file_name, "s_adj_mat.npz")) norm_adj_mat = sp.load_npz( os.path.join(process_file_name, "s_norm_adj_mat.npz")) mean_adj_mat = sp.load_npz( os.path.join(process_file_name, "s_mean_adj_mat.npz")) print("already load adj matrix", adj_mat.shape) except Exception: adj_mat, norm_adj_mat, mean_adj_mat = self.create_adj_mat() sp.save_npz(os.path.join(process_file_name, "s_adj_mat.npz"), adj_mat) sp.save_npz(os.path.join(process_file_name, "s_norm_adj_mat.npz"), norm_adj_mat) sp.save_npz(os.path.join(process_file_name, "s_mean_adj_mat.npz"), mean_adj_mat) return adj_mat, norm_adj_mat, mean_adj_mat
def prepare_env(self): """Prepare running environment. * Load parameters from json files. * Initialize system folders, model name and the paths to be saved. * Initialize resource monitor. * Initialize random seed. * Initialize logging. """ # Load config file from json with open(self.args.config_file) as config_params: print(f"loading config file {self.args.config_file}") config = json.load(config_params) # Update configs based on the received args from the command line . update_args(config, self.args) # obtain abspath for the project config["system"]["root_dir"] = os.path.abspath( config["system"]["root_dir"]) # construct unique model run id, which consist of model name, config id and a timestamp timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S") random_str = "".join( [random.choice(string.ascii_lowercase) for n in range(6)]) config["system"]["model_run_id"] = (config["model"]["model"] + "_" + config["model"]["config_id"] + "_" + timestamp_str + "_" + random_str) # Initialize random seeds set_seed(config["system"]["seed"] if "seed" in config["system"] else 2020) # Initialize working folders self.initialize_folders(config) config["system"]["process_dir"] = os.path.join( config["system"]["root_dir"], config["system"]["process_dir"]) # Initialize log file config["system"]["log_file"] = os.path.join( config["system"]["root_dir"], config["system"]["log_dir"], config["system"]["model_run_id"], ) logger.init_std_logger(config["system"]["log_file"]) print("Python version:", sys.version) print("pytorch version:", torch.__version__) # File paths to be saved config["model"]["run_dir"] = os.path.join( config["system"]["root_dir"], config["system"]["run_dir"], config["system"]["model_run_id"], ) config["system"]["run_dir"] = config["model"]["run_dir"] print( "The intermediate running statuses will be reported in folder:", config["system"]["run_dir"], ) config["system"]["tune_dir"] = os.path.join( config["system"]["root_dir"], config["system"]["tune_dir"]) def get_user_temp_dir(): tempdir = os.path.join(config["system"]["root_dir"], "tmp") print(f"ray temp dir {tempdir}") return tempdir ray.utils.get_user_temp_dir = get_user_temp_dir # Model checkpoints paths to be saved config["system"]["model_save_dir"] = os.path.join( config["system"]["root_dir"], config["system"]["checkpoint_dir"], config["system"]["model_run_id"], ) ensureDir(config["system"]["model_save_dir"]) print("Model checkpoint will save in file:", config["system"]["model_save_dir"]) config["system"]["result_file"] = os.path.join( config["system"]["root_dir"], config["system"]["result_dir"], config["system"]["result_file"], ) print("Performance result will save in file:", config["system"]["result_file"]) print_dict_as_table(config["system"], "System configs") return config
def prepare_env(config): """Prepare running environment - Load parameters from json files. - Initialize system folders, model name and the paths to be saved. - Initialize resource monitor. - Initialize random seed. - Initialize logging. Args: config (dict): Global configs. """ # obtain abspath for the project # You need specified it if it is running in the container. if "root_dir" not in config: file_dir = os.path.dirname(os.path.abspath(__file__)) config["root_dir"] = os.path.abspath(os.path.join(file_dir, "..")) # load config file from json with open(config["config_file"]) as config_params: print("loading config file", config["config_file"]) json_config = json.load(config_params) # update global parameters with these parameters received from the command line . json_config.update(config) config = json_config # construct unique model run id, which consist of model name, config id and a timestamp timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S") random_str = "".join([random.choice(string.ascii_lowercase) for n in range(6)]) config["model_run_id"] = ( config["model"] + "_" + config["config_id"] + "_" + timestamp_str + "_" + random_str ) set_seed(config["seed"] if "seed" in config else 2020) initialize_folders(config["root_dir"]) # Initialize log file config["log_file"] = os.path.join( config["root_dir"], config["log_dir"], config["model_run_id"] ) logger.init_std_logger(config["log_file"]) print("python version:", sys.version) print("pytorch version:", torch.__version__) # File paths to be saved config["run_dir"] = os.path.join( config["root_dir"], config["run_dir"], config["model_run_id"] ) print( "The intermediate running statuses will be reported in folder:", config["run_dir"], ) # Model checkpoints paths to be saved config["model_save_dir"] = os.path.join( config["root_dir"], config["checkpoint_dir"], config["model_run_id"] ) ensureDir(config["model_save_dir"]) print("Model checkpoint will save in file:", config["model_save_dir"]) config["result_file"] = os.path.join( config["root_dir"], config["result_dir"], config["result_file"] ) print("Performance result will save in file:", config["result_file"]) # remove comments print_dict_as_table(config, "Model configs") return config