Esempio n. 1
0
    def load_model(self, folder_path, file_name = None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Loading model from file '{}'".format(folder_path + file_name))

        dataIO = DataIO(folder_path=folder_path)
        data_dict = dataIO.load_data(file_name=file_name)

        for attrib_name in data_dict.keys():
             self.__setattr__(attrib_name, data_dict[attrib_name])


        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                           graph = self.URM_train.toarray(),
                           n_users = self.n_users,
                           n_items = self.n_items,
                           emb_dim = self.embedding_size,
                           lr = self.learning_rate,
                           decay = self.decay,
                           batch_size = self.batch_size)

        self.model.compute_eigenvalues()
        self.model.build_graph()



        saver = tf.train.Saver()
        self.sess = tf.Session()

        saver.restore(self.sess, folder_path + file_name + "_session")


        self._print("Loading complete")
Esempio n. 2
0
    def loadModel(self, folder_path, file_name=None):

        import pickle

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        print("{}: Loading model from file '{}'".format(
            self.RECOMMENDER_NAME, folder_path + file_name))

        data_dict = pickle.load(open(folder_path + file_name, "rb"))

        for attrib_name in data_dict.keys():
            self.__setattr__(attrib_name, data_dict[attrib_name])

        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                                graph=self.URM_train.toarray(),
                                n_users=self.n_users,
                                n_items=self.n_items,
                                emb_dim=self.embedding_size,
                                lr=self.learning_rate,
                                decay=self.decay,
                                batch_size=self.batch_size)

        self.model.compute_eigenvalues(lamda=self.model_lamda, U=self.model_U)
        self.model.build_graph()

        saver = tf.train.Saver()
        self.sess = tf.Session()

        saver.restore(self.sess, folder_path + file_name + "_session")

        print("{}: Loading complete".format(self.RECOMMENDER_NAME))
Esempio n. 3
0
class SpectralCF_RecommenderWrapper(BaseRecommender, Incremental_Training_Early_Stopping, BaseTempFolder):


    RECOMMENDER_NAME = "SpectralCF_RecommenderWrapper"

    def __init__(self, URM_train):
        super(SpectralCF_RecommenderWrapper, self).__init__(URM_train)

        self._train = sps.dok_matrix(self.URM_train)


    def _compute_item_score(self, user_id_array, items_to_compute=None):

        if len(user_id_array) < self.batch_size:
            user_batch = np.zeros((self.batch_size), dtype=np.int64)
            user_batch[0:len(user_id_array)] = user_id_array

        else:
            user_batch = user_id_array


        # If user id batch is too long, slice the current user_id_array in blocks that the model can handle
        if len(user_id_array) > self.batch_size:

            n_split = math.ceil(len(user_id_array)/ self.batch_size)

            split_list = np.array_split(user_id_array, n_split)

            item_scores_to_compute = - np.ones((len(user_id_array), self.n_items)) * np.inf

            start_pos = 0
            for user_id_array_split in split_list:
                item_scores_to_compute[start_pos:start_pos+len(user_id_array_split),:] = self._compute_item_score(user_id_array_split, items_to_compute=items_to_compute)
                start_pos+=len(user_id_array_split)

        else:

            item_scores_to_compute = self.sess.run(self.model.all_ratings, {self.model.users: user_batch})


        if len(user_id_array) < self.batch_size:
            item_scores_to_compute = item_scores_to_compute[0:len(user_id_array),:]


        if items_to_compute is not None:
            item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf
            item_scores[:, items_to_compute] = item_scores_to_compute[:, items_to_compute]
        else:
            item_scores = item_scores_to_compute

        return item_scores




    def fit(self,
            epochs = 200,
            batch_size = 1024,
            embedding_size = 16,
            decay = 0.001,
            k = 3,
            learning_rate = 1e-3,
            temp_file_folder = None,
            **earlystopping_kwargs
            ):


        self.temp_file_folder = self._get_unique_temp_folder(input_temp_file_folder=temp_file_folder)


        self.k = k
        self.embedding_size = embedding_size
        self.learning_rate = learning_rate
        self.decay = decay
        self.batch_size = batch_size



        print("SpectralCF_RecommenderWrapper: Instantiating model...")

        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                           graph = self.URM_train.toarray(),
                           n_users = self.n_users,
                           n_items = self.n_items,
                           emb_dim = self.embedding_size,
                           lr = self.learning_rate,
                           decay = self.decay,
                           batch_size = self.batch_size)

        self.model.compute_eigenvalues()

        # Keep it to avoid recomputing every time the model is loaded
        # self.model_lamda = self.model.lamda.copy()
        # self.model_U = self.model.U

        self.model.build_graph()


        print("SpectralCF_RecommenderWrapper: Instantiating model... done!")
        # print(self.model.model_name)

        # config = tf.ConfigProto()
        # config.gpu_options.allow_growth = True
        # self.sess = tf.Session(config=config)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())


        print("SpectralCF_RecommenderWrapper: Training SpectralCF...")

        self._update_best_model()

        self._train_with_early_stopping(epochs,
                                        algorithm_name = self.RECOMMENDER_NAME,
                                        **earlystopping_kwargs)

        self.sess.close()
        self.sess = tf.Session()

        self.load_model(self.temp_file_folder, file_name="_best_model")


        print("SpectralCF_RecommenderWrapper: Tranining complete")


        self._clean_temp_folder(temp_file_folder=self.temp_file_folder)



    def _prepare_model_for_validation(self):
        pass


    def _update_best_model(self):
        self.save_model(self.temp_file_folder, file_name="_best_model")



    def _run_epoch(self, currentEpoch):

        users, pos_items, neg_items = self.data_generator.sample()

        _, loss = self.sess.run([self.model.updates, self.model.loss],
                                           feed_dict={self.model.users: users, self.model.pos_items: pos_items,
                                                      self.model.neg_items: neg_items})

        print("SpectralCF_RecommenderWrapper: Epoch {}, loss {:.2E}".format(currentEpoch+1, loss))

        if not np.isfinite(loss):
            self._clean_temp_folder(temp_file_folder=self.temp_file_folder)

            assert False, "SpectralCF_RecommenderWrapper: loss is not a finite number, terminating!"




















    def save_model(self, folder_path, file_name = None):


        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Saving model in file '{}'".format(folder_path + file_name))

        data_dict_to_save = {"k": self.k,
                              "embedding_size": self.embedding_size,
                              "learning_rate": self.learning_rate,
                              "decay": self.decay,
                              "batch_size": self.batch_size,
                              # "model_lamda": self.model_lamda,
                              # "model_U": self.model_U,
                              }

        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save = data_dict_to_save)

        saver = tf.train.Saver()

        saver.save(self.sess, folder_path + file_name + "_session")


        self._print("Saving complete")




    def load_model(self, folder_path, file_name = None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Loading model from file '{}'".format(folder_path + file_name))

        dataIO = DataIO(folder_path=folder_path)
        data_dict = dataIO.load_data(file_name=file_name)

        for attrib_name in data_dict.keys():
             self.__setattr__(attrib_name, data_dict[attrib_name])


        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                           graph = self.URM_train.toarray(),
                           n_users = self.n_users,
                           n_items = self.n_items,
                           emb_dim = self.embedding_size,
                           lr = self.learning_rate,
                           decay = self.decay,
                           batch_size = self.batch_size)

        self.model.compute_eigenvalues()
        self.model.build_graph()



        saver = tf.train.Saver()
        self.sess = tf.Session()

        saver.restore(self.sess, folder_path + file_name + "_session")


        self._print("Loading complete")
Esempio n. 4
0
    def fit(self,
            epochs = 200,
            batch_size = 1024,
            embedding_size = 16,
            decay = 0.001,
            k = 3,
            learning_rate = 1e-3,
            temp_file_folder = None,
            **earlystopping_kwargs
            ):


        self.temp_file_folder = self._get_unique_temp_folder(input_temp_file_folder=temp_file_folder)


        self.k = k
        self.embedding_size = embedding_size
        self.learning_rate = learning_rate
        self.decay = decay
        self.batch_size = batch_size



        print("SpectralCF_RecommenderWrapper: Instantiating model...")

        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                           graph = self.URM_train.toarray(),
                           n_users = self.n_users,
                           n_items = self.n_items,
                           emb_dim = self.embedding_size,
                           lr = self.learning_rate,
                           decay = self.decay,
                           batch_size = self.batch_size)

        self.model.compute_eigenvalues()

        # Keep it to avoid recomputing every time the model is loaded
        # self.model_lamda = self.model.lamda.copy()
        # self.model_U = self.model.U

        self.model.build_graph()


        print("SpectralCF_RecommenderWrapper: Instantiating model... done!")
        # print(self.model.model_name)

        # config = tf.ConfigProto()
        # config.gpu_options.allow_growth = True
        # self.sess = tf.Session(config=config)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())


        print("SpectralCF_RecommenderWrapper: Training SpectralCF...")

        self._update_best_model()

        self._train_with_early_stopping(epochs,
                                        algorithm_name = self.RECOMMENDER_NAME,
                                        **earlystopping_kwargs)

        self.sess.close()
        self.sess = tf.Session()

        self.load_model(self.temp_file_folder, file_name="_best_model")


        print("SpectralCF_RecommenderWrapper: Tranining complete")


        self._clean_temp_folder(temp_file_folder=self.temp_file_folder)
Esempio n. 5
0
class SpectralCF_RecommenderWrapper(BaseRecommender,
                                    Incremental_Training_Early_Stopping):

    RECOMMENDER_NAME = "SpectralCF_RecommenderWrapper"
    DEFAULT_TEMP_FILE_FOLDER = './result_experiments/__Temp_SpectralCF_RecommenderWrapper/'

    def __init__(self, URM_train):
        super(SpectralCF_RecommenderWrapper, self).__init__(URM_train)

        self._train = sps.dok_matrix(self.URM_train)

    def _compute_item_score(self, user_id_array, items_to_compute=None):

        if len(user_id_array) < self.batch_size:
            user_batch = np.zeros((self.batch_size), dtype=np.int64)
            user_batch[0:len(user_id_array)] = user_id_array

        elif len(user_id_array) < self.batch_size:
            assert False, "not supported"
        else:
            user_batch = user_id_array

        item_scores_to_compute = self.sess.run(self.model.all_ratings,
                                               {self.model.users: user_batch})

        if len(user_id_array) < self.batch_size:
            item_scores_to_compute = item_scores_to_compute[
                0:len(user_id_array), :]

        if items_to_compute is not None:
            item_scores = -np.ones(
                (len(user_id_array), self.n_items - 1)) * np.inf
            item_scores[:,
                        items_to_compute] = item_scores_to_compute[:,
                                                                   items_to_compute]
        else:
            item_scores = item_scores_to_compute

        return item_scores

    def fit(self,
            epochs=200,
            batch_size=1024,
            embedding_size=16,
            decay=0.001,
            k=3,
            learning_rate=1e-3,
            temp_file_folder=None,
            **earlystopping_kwargs):

        if temp_file_folder is None:
            print("{}: Using default Temp folder '{}'".format(
                self.RECOMMENDER_NAME, self.DEFAULT_TEMP_FILE_FOLDER))
            self.temp_file_folder = self.DEFAULT_TEMP_FILE_FOLDER
        else:
            print("{}: Using Temp folder '{}'".format(self.RECOMMENDER_NAME,
                                                      temp_file_folder))
            self.temp_file_folder = temp_file_folder

        if not os.path.isdir(self.temp_file_folder):
            os.makedirs(self.temp_file_folder)

        self.k = k
        self.embedding_size = embedding_size
        self.learning_rate = learning_rate
        self.decay = decay
        self.batch_size = batch_size

        print("SpectralCF_RecommenderWrapper: Instantiating model...")

        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                                graph=self.URM_train.toarray(),
                                n_users=self.n_users,
                                n_items=self.n_items,
                                emb_dim=self.embedding_size,
                                lr=self.learning_rate,
                                decay=self.decay,
                                batch_size=self.batch_size)

        self.model.compute_eigenvalues()

        # Keep it to avoid recomputing every time the model is loaded
        self.model_lamda = self.model.lamda.copy()
        self.model_U = self.model.U

        self.model.build_graph()

        print("SpectralCF_RecommenderWrapper: Instantiating model... done!")
        # print(self.model.model_name)

        # config = tf.ConfigProto()
        # config.gpu_options.allow_growth = True
        # self.sess = tf.Session(config=config)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        print("SpectralCF_RecommenderWrapper: Training SpectralCF...")

        self._update_best_model()

        self._train_with_early_stopping(epochs,
                                        algorithm_name=self.RECOMMENDER_NAME,
                                        **earlystopping_kwargs)

        self.sess.close()
        self.sess = tf.Session()

        self.loadModel(self.temp_file_folder, file_name="_best_model")

        print("SpectralCF_RecommenderWrapper: Tranining complete")

        if self.temp_file_folder == self.DEFAULT_TEMP_FILE_FOLDER:
            print("{}: cleaning temporary files".format(self.RECOMMENDER_NAME))
            shutil.rmtree(self.DEFAULT_TEMP_FILE_FOLDER, ignore_errors=True)

    def _prepare_model_for_validation(self):
        pass

    def _update_best_model(self):
        self.saveModel(self.temp_file_folder, file_name="_best_model")

    def _run_epoch(self, currentEpoch):

        users, pos_items, neg_items = self.data_generator.sample()

        _, loss = self.sess.run(
            [self.model.updates, self.model.loss],
            feed_dict={
                self.model.users: users,
                self.model.pos_items: pos_items,
                self.model.neg_items: neg_items
            })

        print("SpectralCF_RecommenderWrapper: Epoch {}, loss {:.2E}".format(
            currentEpoch + 1, loss))

    def saveModel(self, folder_path, file_name=None):

        import pickle

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME,
                                                     folder_path + file_name))

        dictionary_to_save = {
            "k": self.k,
            "embedding_size": self.embedding_size,
            "learning_rate": self.learning_rate,
            "decay": self.decay,
            "batch_size": self.batch_size,
            "model_lamda": self.model_lamda,
            "model_U": self.model_U,
        }

        pickle.dump(dictionary_to_save,
                    open(folder_path + file_name, "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        saver = tf.train.Saver()

        saver.save(self.sess, folder_path + file_name + "_session")

        print("{}: Saving complete".format(self.RECOMMENDER_NAME,
                                           folder_path + file_name))

    def loadModel(self, folder_path, file_name=None):

        import pickle

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        print("{}: Loading model from file '{}'".format(
            self.RECOMMENDER_NAME, folder_path + file_name))

        data_dict = pickle.load(open(folder_path + file_name, "rb"))

        for attrib_name in data_dict.keys():
            self.__setattr__(attrib_name, data_dict[attrib_name])

        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                                graph=self.URM_train.toarray(),
                                n_users=self.n_users,
                                n_items=self.n_items,
                                emb_dim=self.embedding_size,
                                lr=self.learning_rate,
                                decay=self.decay,
                                batch_size=self.batch_size)

        self.model.compute_eigenvalues(lamda=self.model_lamda, U=self.model_U)
        self.model.build_graph()

        saver = tf.train.Saver()
        self.sess = tf.Session()

        saver.restore(self.sess, folder_path + file_name + "_session")

        print("{}: Loading complete".format(self.RECOMMENDER_NAME))
Esempio n. 6
0
    def fit(self,
            epochs=200,
            batch_size=1024,
            embedding_size=16,
            decay=0.001,
            k=3,
            learning_rate=1e-3,
            temp_file_folder=None,
            **earlystopping_kwargs):

        if temp_file_folder is None:
            print("{}: Using default Temp folder '{}'".format(
                self.RECOMMENDER_NAME, self.DEFAULT_TEMP_FILE_FOLDER))
            self.temp_file_folder = self.DEFAULT_TEMP_FILE_FOLDER
        else:
            print("{}: Using Temp folder '{}'".format(self.RECOMMENDER_NAME,
                                                      temp_file_folder))
            self.temp_file_folder = temp_file_folder

        if not os.path.isdir(self.temp_file_folder):
            os.makedirs(self.temp_file_folder)

        self.k = k
        self.embedding_size = embedding_size
        self.learning_rate = learning_rate
        self.decay = decay
        self.batch_size = batch_size

        print("SpectralCF_RecommenderWrapper: Instantiating model...")

        tf.reset_default_graph()

        self.data_generator = Data(self.URM_train, batch_size=self.batch_size)

        self.model = SpectralCF(K=self.k,
                                graph=self.URM_train.toarray(),
                                n_users=self.n_users,
                                n_items=self.n_items,
                                emb_dim=self.embedding_size,
                                lr=self.learning_rate,
                                decay=self.decay,
                                batch_size=self.batch_size)

        self.model.compute_eigenvalues()

        # Keep it to avoid recomputing every time the model is loaded
        self.model_lamda = self.model.lamda.copy()
        self.model_U = self.model.U

        self.model.build_graph()

        print("SpectralCF_RecommenderWrapper: Instantiating model... done!")
        # print(self.model.model_name)

        # config = tf.ConfigProto()
        # config.gpu_options.allow_growth = True
        # self.sess = tf.Session(config=config)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        print("SpectralCF_RecommenderWrapper: Training SpectralCF...")

        self._update_best_model()

        self._train_with_early_stopping(epochs,
                                        algorithm_name=self.RECOMMENDER_NAME,
                                        **earlystopping_kwargs)

        self.sess.close()
        self.sess = tf.Session()

        self.loadModel(self.temp_file_folder, file_name="_best_model")

        print("SpectralCF_RecommenderWrapper: Tranining complete")

        if self.temp_file_folder == self.DEFAULT_TEMP_FILE_FOLDER:
            print("{}: cleaning temporary files".format(self.RECOMMENDER_NAME))
            shutil.rmtree(self.DEFAULT_TEMP_FILE_FOLDER, ignore_errors=True)