예제 #1
0
from skopt.space import Real, Integer, Categorical
from Utils.Evaluator import EvaluatorHoldout
from Utils.DataSplitter import DataSplitter
from Utils.DataReader import DataReader
import os

# Model to be tuned
from hybrid import Hybrid

################################# READ DATA #################################

reader = DataReader()
splitter = DataSplitter()

urm = reader.load_urm()
ICM = reader.load_icm()
URM_train, URM_val, URM_test = splitter.split(urm, validation=0.2, testing=0.1)

################################ EVALUATORS ##################################

evaluator_validation = EvaluatorHoldout(URM_val, [10])
evaluator_test = EvaluatorHoldout(URM_test, [10])

############################### OPTIMIZER SETUP ###############################

recommender_class = Hybrid
parameterSearch = SearchBayesianSkopt(
    recommender_class,
    evaluator_validation=evaluator_validation,
    evaluator_test=evaluator_test)
hyperparameters_range_dictionary = {}
    def fit(self,
            topK=50,
            shrink=100,
            similarity='cosine',
            normalization="none",
            feature_weighting="none",
            **similarity_args):

        self.topK = topK
        self.shrink = shrink

        reader = DataReader()
        icm = reader.load_icm()

        if normalization == "bm25":
            self.URM_train = similaripy.normalization.bm25(self.URM_train,
                                                           axis=1)
        if normalization == "tfidf":
            self.URM_train = similaripy.normalization.tfidf(self.URM_train,
                                                            axis=1)
        if normalization == "bm25plus":
            self.URM_train = similaripy.normalization.bm25plus(self.URM_train,
                                                               axis=1)

        if feature_weighting == "bm25":
            icm = similaripy.normalization.bm25(icm, axis=1)
        if feature_weighting == "tfidf":
            icm = similaripy.normalization.tfidf(icm, axis=1)
        if feature_weighting == "bm25plus":
            icm = similaripy.normalization.bm25plus(icm, axis=1)

        matrix = sps.hstack((self.URM_train.transpose().tocsr(), icm))

        if similarity == "cosine":
            similarity_matrix = similaripy.cosine(matrix,
                                                  k=self.topK,
                                                  shrink=self.shrink,
                                                  binary=False,
                                                  threshold=0)
        if similarity == "dice":
            similarity_matrix = similaripy.dice(matrix,
                                                k=self.topK,
                                                shrink=self.shrink,
                                                binary=False,
                                                threshold=0)
        if similarity == "jaccard":
            similarity_matrix = similaripy.jaccard(matrix,
                                                   k=self.topK,
                                                   shrink=self.shrink,
                                                   binary=False,
                                                   threshold=0)
        if similarity == "asym":
            similarity_matrix = similaripy.asymmetric_cosine(
                matrix,
                k=self.topK,
                shrink=self.shrink,
                binary=False,
                threshold=0)
        if similarity == "rp3beta":
            similarity_matrix = similaripy.rp3beta(matrix,
                                                   k=self.topK,
                                                   shrink=self.shrink,
                                                   binary=False,
                                                   threshold=0,
                                                   alpha=0.3,
                                                   beta=0.61)

        self.W_sparse = similarity_matrix
        self.W_sparse = check_matrix(self.W_sparse, format='csr')