예제 #1
0
 def __init__(self,
              d,
              gamma,
              C,
              batch_size=10,
              positive_thresh=0.00001,
              max_iters=1000,
              validation_iters=100,
              max_trials=50,
              sample_item_rate=1.0,
              model_type='WARP'):
     self.d = d
     self.gamma = gamma
     self.C = C
     self.batch_size = batch_size
     self.positive_thresh = positive_thresh
     self.max_iters = max_iters
     self.validation_iters = validation_iters
     self.max_trials = max_trials
     self.sample_item_rate = sample_item_rate
     self.model_type = model_type
     self.model = None
     if self.model_type == 'WARP':
         self.model = WARP(self.d, self.gamma, self.C, self.max_iters,
                           self.validation_iters, self.batch_size,
                           self.positive_thresh, self.max_trials)
     elif self.model_type == 'WARPLimitedItem':
         self.model = WARPLimitedItem(self.d, self.gamma, self.C,
                                      self.max_iters, self.validation_iters,
                                      self.batch_size, self.positive_thresh,
                                      self.max_trials,
                                      self.sample_item_rate)
     else:
         print "Invalid model type:[%s]" % self.model_type
         sys.exit(2)
예제 #2
0
파일: warp.py 프로젝트: xiaop1987/mrec
 def __init__(self,d,gamma,C,batch_size=10,positive_thresh=0.00001,max_iters=1000,
              validation_iters = 100, max_trials = 50, sample_item_rate = 1.0,
              model_type = 'WARP'):
     self.d = d
     self.gamma = gamma
     self.C = C
     self.batch_size = batch_size
     self.positive_thresh = positive_thresh
     self.max_iters = max_iters
     self.validation_iters = validation_iters
     self.max_trials = max_trials
     self.sample_item_rate = sample_item_rate
     self.model_type = model_type
     self.model = None
     if self.model_type == 'WARP':
         self.model = WARP(self.d, self.gamma, self.C, self.max_iters,
                           self.validation_iters,self.batch_size, self.positive_thresh,
                           self.max_trials)
     elif self.model_type == 'WARPLimitedItem':
         self.model = WARPLimitedItem(self.d, self.gamma, self.C, self.max_iters,
                                      self.validation_iters,self.batch_size, self.positive_thresh,
                                      self.max_trials, self.sample_item_rate)
     else:
         print "Invalid model type:[%s]" % self.model_type
         sys.exit(2)
예제 #3
0
class WARPMFRecommender(MatrixFactorizationRecommender):
    """
    Learn matrix factorization optimizing the WARP loss.

    Parameters
    ==========
    d : int
        Dimensionality of factors.
    gamma : float
        Learning rate.
    C : float
        Regularization constant.
    batch_size : int
        Mini batch size for SGD updates.
    positive_thresh: float
        Consider an item to be "positive" i.e. liked if its rating is at least this.
    max_iters : int
        Number of attempts allowed to find a violating negative example during updates.
        In practice it means that we optimize for ranks 1 to max_iters-1.
    """
    def __init__(self,
                 d,
                 gamma,
                 C,
                 batch_size=10,
                 positive_thresh=0.00001,
                 max_iters=1000,
                 validation_iters=100,
                 max_trials=50,
                 sample_item_rate=1.0,
                 model_type='WARP'):
        self.d = d
        self.gamma = gamma
        self.C = C
        self.batch_size = batch_size
        self.positive_thresh = positive_thresh
        self.max_iters = max_iters
        self.validation_iters = validation_iters
        self.max_trials = max_trials
        self.sample_item_rate = sample_item_rate
        self.model_type = model_type
        self.model = None
        if self.model_type == 'WARP':
            self.model = WARP(self.d, self.gamma, self.C, self.max_iters,
                              self.validation_iters, self.batch_size,
                              self.positive_thresh, self.max_trials)
        elif self.model_type == 'WARPLimitedItem':
            self.model = WARPLimitedItem(self.d, self.gamma, self.C,
                                         self.max_iters, self.validation_iters,
                                         self.batch_size, self.positive_thresh,
                                         self.max_trials,
                                         self.sample_item_rate)
        else:
            print "Invalid model type:[%s]" % self.model_type
            sys.exit(2)

    def recommend(self, users, topK):
        return self.model.recommend(users, topK)

    def fit(self, train, item_features=None):
        """
        Learn factors from training set.

        Parameters
        ==========
        train : scipy.sparse.csr_matrix
            User-item matrix.
        item_features : array_like, shape = [num_items, num_features]
            Features for each item in the dataset, ignored here.
        """
        max_iters, validation_iters, validation = self.create_validation_set(
            train)
        self.description = 'WARPMF({0})'.format(self.model)
        print "Start model fit with param:[%s]" % self.description
        self.model.fit(train, validation)

        self.U = self.model.U_
        self.V = self.model.V_

    @staticmethod
    def create_validation_set(train):
        """
        Hide and return half of the known items for a sample of users,
        and estimate the number of sgd iterations to run.

        Parameters
        ==========
        train : scipy.sparse.csr_matrix
            User-item matrix.

        Returns
        =======
        max_iters : int
            Total number of sgd iterations to run.
        validation_iters : int
            Check progress after this many iterations.
        validation : dict
            Validation set.
        """
        # use 1% of users for validation, with a floor
        num_users = train.shape[0]
        num_validation_users = max(num_users / 100, 10)
        # ensure reasonable expected number of updates per validation user
        validation_iters = 100 * num_users / num_validation_users
        # and reasonable number of validation cycles
        max_iters = 30 * validation_iters

        print num_validation_users, 'validation users'
        print validation_iters, 'validation iters'
        print max_iters, 'max_iters'

        validation = dict()
        for u in xrange(num_validation_users):
            positive = np.where(train[u].data > 0)[0]
            hidden = random.sample(positive, positive.shape[0] / 2)
            if hidden:
                train[u].data[hidden] = 0
                #index_value_list = []
                #for index in train[u].indices[hidden]:
                #    index_value_list.append((index, 0))
                validation[u] = train[u].indices[hidden]
            #validation[u] = index_value_list

        return max_iters, validation_iters, validation
예제 #4
0
파일: warp.py 프로젝트: xiaop1987/mrec
class WARPMFRecommender(MatrixFactorizationRecommender):
    """
    Learn matrix factorization optimizing the WARP loss.

    Parameters
    ==========
    d : int
        Dimensionality of factors.
    gamma : float
        Learning rate.
    C : float
        Regularization constant.
    batch_size : int
        Mini batch size for SGD updates.
    positive_thresh: float
        Consider an item to be "positive" i.e. liked if its rating is at least this.
    max_iters : int
        Number of attempts allowed to find a violating negative example during updates.
        In practice it means that we optimize for ranks 1 to max_iters-1.
    """

    def __init__(self,d,gamma,C,batch_size=10,positive_thresh=0.00001,max_iters=1000,
                 validation_iters = 100, max_trials = 50, sample_item_rate = 1.0,
                 model_type = 'WARP'):
        self.d = d
        self.gamma = gamma
        self.C = C
        self.batch_size = batch_size
        self.positive_thresh = positive_thresh
        self.max_iters = max_iters
        self.validation_iters = validation_iters
        self.max_trials = max_trials
        self.sample_item_rate = sample_item_rate
        self.model_type = model_type
        self.model = None
        if self.model_type == 'WARP':
            self.model = WARP(self.d, self.gamma, self.C, self.max_iters,
                              self.validation_iters,self.batch_size, self.positive_thresh,
                              self.max_trials)
        elif self.model_type == 'WARPLimitedItem':
            self.model = WARPLimitedItem(self.d, self.gamma, self.C, self.max_iters,
                                         self.validation_iters,self.batch_size, self.positive_thresh,
                                         self.max_trials, self.sample_item_rate)
        else:
            print "Invalid model type:[%s]" % self.model_type
            sys.exit(2)

    def recommend(self, users, topK):
        return self.model.recommend(users, topK)

    def fit(self,train,item_features=None):
        """
        Learn factors from training set.

        Parameters
        ==========
        train : scipy.sparse.csr_matrix
            User-item matrix.
        item_features : array_like, shape = [num_items, num_features]
            Features for each item in the dataset, ignored here.
        """
        max_iters,validation_iters,validation = self.create_validation_set(train)
        self.description = 'WARPMF({0})'.format(self.model)
        print "Start model fit with param:[%s]" % self.description
        self.model.fit(train, validation)

        self.U = self.model.U_
        self.V = self.model.V_


    @staticmethod
    def create_validation_set(train):
        """
        Hide and return half of the known items for a sample of users,
        and estimate the number of sgd iterations to run.

        Parameters
        ==========
        train : scipy.sparse.csr_matrix
            User-item matrix.

        Returns
        =======
        max_iters : int
            Total number of sgd iterations to run.
        validation_iters : int
            Check progress after this many iterations.
        validation : dict
            Validation set.
        """
        # use 1% of users for validation, with a floor
        num_users = train.shape[0]
        num_validation_users = max(num_users/100,10)
        # ensure reasonable expected number of updates per validation user
        validation_iters = 100*num_users/num_validation_users
        # and reasonable number of validation cycles
        max_iters = 30*validation_iters

        print num_validation_users,'validation users'
        print validation_iters,'validation iters'
        print max_iters,'max_iters'

        validation = dict()
        for u in xrange(num_validation_users):
            positive = np.where(train[u].data > 0)[0]
            hidden = random.sample(positive,positive.shape[0]/2)
            if hidden:
                train[u].data[hidden] = 0
               #index_value_list = []
               #for index in train[u].indices[hidden]:
               #    index_value_list.append((index, 0))
                validation[u] = train[u].indices[hidden]
               #validation[u] = index_value_list

        return max_iters,validation_iters,validation