Beispiel #1
0
    def train(self):
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        for epoch in range(self.config["model"]["max_epoch"]):
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=self.model_save_dir)

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break

            train_loader = self.sample_generator.pairwise_negative_train_loader(
                self.config["model"]["batch_size"],
                self.config["model"]["device_str"])
            self.engine.train_an_epoch(epoch_id=epoch,
                                       train_loader=train_loader)
            self.eval_engine.train_eval(self.data.valid[0], self.data.test[0],
                                        self.engine.model, epoch)
        self.config["run_time"] = self.monitor.stop()
Beispiel #2
0
    def train(self):
        """Train the model."""
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])

        if self.config["model"]["loss"] == "bpr":
            train_loader = self.data.instance_bpr_loader(
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        elif self.config["model"]["loss"] == "bce":
            train_loader = self.data.instance_bce_loader(
                num_negative=self.config["model"]["num_negative"],
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        else:
            raise ValueError(
                f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'"
            )

        self.engine = LCFNEngine(self.config)
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()

        return self.eval_engine.best_valid_performance
Beispiel #3
0
    def train(self):
        """Train the model."""
        self.load_dataset()
        self.gpu_id, self.config["device_str"] = self.get_device()
        """ Main training navigator

        Returns:

        """

        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.data.instance_vae_loader(
            batch_size=self.config["model"]["batch_size"],
            device=self.config["model"]["device_str"],
        )

        self.config["model"]["n_items"] = self.data.n_items
        self.config["model"]["n_users"] = self.data.n_users
        self.engine = VAECFEngine(self.config)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Beispiel #4
0
    def train(self):
        if self.config["pretrain"] == "gmf":
            user_embed, item_embed = self.train_gmf()
            model = self.cmnengine(self.config, user_embed, item_embed,
                                   self.data.item_users_list)
            self.monitor = Monitor(log_dir=self.config["run_dir"],
                                   delay=1,
                                   gpu_id=self.gpu_id)
            self.model_dir = os.path.join(self.config["model_save_dir"],
                                          self.config["save_name"])
            for epoch in range(config["max_epoch"]):
                print(f"Epoch {epoch} starts !")
                print("-" * 80)
                if epoch > 0 and self.eval_engine.n_no_update == 0:
                    # previous epoch have already obtained better result
                    model.save_checkpoint(model_dir=self.model_dir)

                if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                    print(
                        "Early stop criterion triggered, no performance update for {:} times"
                        .format(MAX_N_UPDATE))
                    break

                train_loader = self.data
                model.train_an_epoch(epoch_id=epoch, train_loader=train_loader)

                self.eval_engine.train_eval(self.data.valid[0],
                                            self.data.test[0], model.model,
                                            epoch)
            self.config["run_time"] = self.monitor.stop()
            self.eval_engine.test_eval(self.data.test, model.model)
Beispiel #5
0
    def train_mlp(self):
        """ Train MLP

        Returns:
            None
        """
        # Train MLP
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["model"]["num_negative"],
            self.config["model"]["batch_size"])
        self.engine = MLPEngine(self.config)
        self.mlp_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["mlp_config"]["save_name"],
        )
        self._train(self.engine, train_loader, self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print("Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.data.test, self.engine.model)
Beispiel #6
0
    def train(self):
        """Default train implementation

        """
        self.load_dataset()
        self.train_data = self.data.sample_triple()
        self.config["model"]["alpha_step"] = (
            1 - self.config["model"]["alpha"]) / (
                self.config["model"]["max_epoch"])
        self.config["user_fea"] = self.data.user_feature
        self.config["item_fea"] = self.data.item_feature
        self.engine = VBCAREngine(self.config)
        self.engine.data = self.data
        assert hasattr(self,
                       "engine"), "Please specify the exact model engine !"
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        print("Start training... ")
        epoch_bar = tqdm(range(self.config["model"]["max_epoch"]),
                         file=sys.stdout)
        for epoch in epoch_bar:
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=os.path.join(
                    self.config["system"]["model_save_dir"], "model.cpk"))

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break
            data_loader = DataLoader(
                torch.LongTensor(self.train_data.to_numpy()).to(
                    self.engine.device),
                batch_size=self.config["model"]["batch_size"],
                shuffle=True,
                drop_last=True,
            )
            self.engine.train_an_epoch(data_loader, epoch_id=epoch)
            self.eval_engine.train_eval(self.data.valid[0], self.data.test[0],
                                        self.engine.model, epoch)
            # anneal alpha
            self.engine.model.alpha = min(
                self.config["model"]["alpha"] +
                math.exp(epoch - self.config["model"]["max_epoch"] + 20),
                1,
            )
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.config["model"]["lr"] * (0.5**(epoch // 10))
            for param_group in self.engine.optimizer.param_groups:
                param_group["lr"] = lr
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Beispiel #7
0
    def train(self):
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)

        # Train GCN
        self.engine = GCN_SEngine(self.config["gcn_config"])
        train_loader = self.dataset
        self.gcn_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gcn_config"]["save_name"])
        self._train(engine=self.engine,
                    train_loader=train_loader,
                    save_dir=self.gcn_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        # Train MLP
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["num_negative"], self.config["batch_size"])
        self.engine = MLPEngine(self.config["mlp_config"],
                                gcn_config=self.config["gcn_config"])
        self.mlp_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["mlp_config"]["save_name"])
        self._train(engine=self.engine,
                    train_loader=train_loader,
                    save_dir=self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the

        # Train ncf
        self.engine = NeuMFEngine(
            self.config["neumf_config"],
            mlp_config=self.config["mlp_config"],
            gcn_config=self.config["gcn_config"],
        )
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(
            engine=self.engine,
            train_loader=train_loader,
            save_dir=self.neumf_save_dir,
        )

        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
Beispiel #8
0
 def train(self):
     """Train and test NARM."""
     self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                            delay=1,
                            gpu_id=self.gpu_id)
     train_loader = self.load_train_data
     self.engine = NARMEngine(self.config)
     self.narm_save_dir = os.path.join(
         self.config["system"]["model_save_dir"],
         self.config["model"]["save_name"])
     self._train(self.engine, train_loader, self.narm_save_dir)
     self.config["run_time"] = self.monitor.stop()
     self.seq_eval_engine.test_eval_seq(self.test_data, self.engine)
Beispiel #9
0
 def train(self):
     """Train the model."""
     self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                            delay=1,
                            gpu_id=self.gpu_id)
     self.model_save_dir = os.path.join(
         self.config["system"]["model_save_dir"],
         self.config["model"]["save_name"])
     self.engine = LightGCNEngine(self.config)
     train_loader = self.data.instance_bpr_loader(
         batch_size=self.config["model"]["batch_size"],
         device=self.config["model"]["device_str"],
     )
     self._train(self.engine, train_loader, self.model_save_dir)
     self.config["run_time"] = self.monitor.stop()
     return self.eval_engine.best_valid_performance
Beispiel #10
0
 def train_ncf(self):
     """Train NeuMF."""
     self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                            delay=1,
                            gpu_id=self.gpu_id)
     train_loader = self.sample_generator.instance_a_train_loader(
         self.config["model"]["num_negative"],
         self.config["model"]["batch_size"])
     self.engine = NeuMFEngine(self.config)
     self.neumf_save_dir = os.path.join(
         self.config["system"]["model_save_dir"],
         self.config["model"]["neumf_config"]["save_name"],
     )
     self._train(self.engine, train_loader, self.neumf_save_dir)
     self.config["run_time"] = self.monitor.stop()
     self.eval_engine.test_eval(self.data.test, self.engine.model)
Beispiel #11
0
 def train(self):
     self.load_dataset()
     self.engine = Triple2vecEngine(self.config)
     self.engine.data = self.data
     self.train_data = self.data.sample_triple()
     train_loader = DataLoader(
         torch.LongTensor(self.train_data.to_numpy()).to(self.engine.device),
         batch_size=self.config["model"]["batch_size"],
         shuffle=True,
         drop_last=True,
     )
     self.monitor = Monitor(
         log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id
     )
     self.model_save_dir = os.path.join(
         self.config["system"]["model_save_dir"], self.config["model"]["save_name"]
     )
     self._train(self.engine, train_loader, self.model_save_dir)
     self.config["run_time"] = self.monitor.stop()
     return self.eval_engine.best_valid_performance
Beispiel #12
0
    def train(self):
        """Default train implementation

        """
        assert hasattr(self,
                       "engine"), "Please specify the exact model engine !"
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.engine.data = self.dataset
        print("Start training... ")
        epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout)
        for epoch in epoch_bar:
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=os.path.join(
                    self.config["model_save_dir"], "model.cpk"))

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break
            self.engine.train_an_epoch(self.train_data, epoch_id=epoch)
            self.eval_engine.train_eval(self.dataset.valid[0],
                                        self.dataset.test[0],
                                        self.engine.model, epoch)
            # anneal alpha
            self.engine.model.alpha = min(
                self.config["alpha"] +
                math.exp(epoch - self.config["max_epoch"] + 20),
                1,
            )
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.config["lr"] * (0.5**(epoch // 10))
            for param_group in self.engine.optimizer.param_groups:
                param_group["lr"] = lr
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Beispiel #13
0
    def train_gmf(self):
        """Train GMF."""
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_dir = os.path.join(self.config["model_save_dir"],
                                      self.config["save_name"])
        for epoch in range(config["max_epoch"]):
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.gmfengine.save_checkpoint(model_dir=self.model_dir)

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break

            train_loader = self.data
            self.gmfengine.train_an_epoch(epoch_id=epoch,
                                          train_loader=train_loader)

        print("Saving embeddings to: %s" % self.config["model_save_dir"])
        user_embed, item_embed, v = (
            self.gmfengine.model.user_memory.weight.detach().cpu(),
            self.gmfengine.model.item_memory.weight.detach().cpu(),
            self.gmfengine.model.v.weight.detach().cpu(),
        )
        embed_dir = os.path.join(self.config["model_save_dir"],
                                 "pretain/embeddings")
        ensureDir(embed_dir)
        np.savez(embed_dir, user=user_embed, item=item_embed, v=v)
        self.config["run_time"] = self.monitor.stop()

        return np.array(user_embed), np.array(item_embed)
Beispiel #14
0
    def train(self):
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["num_negative"], self.config["batch_size"])

        # Train ncf without pretrain
        self.config["pretrain"] = None
        self.config["model"] = "NCF_wo_pre"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train GCN
        self.config["pretrain"] = None
        self.config["model"] = "GCN"
        self.engine = GCN_SEngine(self.config)
        self.gcn_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gcn_config"]["save_name"])
        self._train(engine=self.engine,
                    train_loader=self.dataset,
                    save_dir=self.gcn_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the

        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train GMF
        self.config["pretrain"] = None
        self.config["model"] = "GMF"
        self.engine = GMFEngine(self.config)
        self.gmf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gmf_config"]["save_name"])
        self._train(self.engine, train_loader, self.gmf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train MLP
        self.config["pretrain"] = None
        self.config["model"] = "mlp"
        self.engine = MLPEngine(self.config)
        self.mlp_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["mlp_config"]["save_name"])
        self._train(self.engine, train_loader, self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train ncf_gmf
        self.config["pretrain"] = "gmf"
        self.config["model"] = "ncf_gmf"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train ncf_gcn
        self.config["pretrain"] = "gcn"
        self.config["model"] = "ncf_gcn"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)