def update(self, user_ix, item_i, item_j):
        ''' u * k
        C = U s V
        D =
        utility = rate_ui , price_ui
        utility  = alpha * rate - beta * price



        - utility_2

        rate_ui

        object = alpha_uc * (rate_1 - rate_2) + beta_uc * (price_1 - price_2)

        model_score_i = self.model_score_without_log(alpha, beta, gamma, rate_i, price_i)
        model_score_j = self.model_score_without_log(alpha, beta, gamma, rate_j, price_j)

        new_alpha = alpha - self.lambda_iter * ((rate_i * model_score_j - rate_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * alpha)
        new_beta = beta - self.lambda_iter * ((price_i**2 * model_score_j - price_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * beta)
        new_gamma = gamma - self.lambda_iter * ((model_score_j - model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * gamma)
        '''
        alpha = self.alpha[user_ix]
        beta = self.beta[user_ix]
        rate_i = self.mf_rate[user_ix][item_i]
        rate_j = self.mf_rate[user_ix][item_j]
        prob_i = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_i], self.param_mu, self.param_sigm)
        prob_j = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_j], self.param_mu, self.param_sigm)

        new_alpha = alpha + self.lambda_iter * (rate_i - rate_j) - 2 * self.lambda_regular * alpha**2
        new_beta = beta + self.lambda_iter * (np.exp(prob_i) - np.exp(prob_j)) - 2 * self.lambda_regular * beta**2

        return new_alpha, new_beta
Esempio n. 2
0
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        data = np.array(self.dataModel.getData().todense())
        u, s, v = isvd(data)
        new_data = np.dot(u, np.dot(s, v))
        new_data[new_data < 1] = np.nan
        u, s, v = isvd(new_data)
        new_data = np.dot(u, np.dot(s, v))
        #new_data[new_data < 1] = 0
        self.mf_rate = new_data

        train_data = pd.DataFrame(trainSamples,
                                  columns=['user', 'item', 'price'])
        item_price = dict(
            zip(train_data.ix[:, 'item'], train_data.ix[:, 'price']))
        data = train_data.groupby('user').mean()
        self.personal_item_price = np.empty(
            (self.dataModel.getUsersNum(), self.dataModel.getItemsNum()))
        for user_ix in xrange(self.dataModel.getUsersNum()):
            user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix),
                                      'price']
            for item_ix in xrange(self.dataModel.getItemsNum()):
                delta_item_price = (
                    item_price[self.dataModel.getItemByIid(item_ix)] -
                    user_avg_price) / user_avg_price
                self.personal_item_price[user_ix][item_ix] = item_price[
                    self.dataModel.getItemByIid(item_ix)]

        b = {'phones': train_data}
        price_df = Construction.get_user_category_buy_price(b)
        self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit(
            price_df)

        self.alpha = np.random.rand(self.dataModel.getUsersNum())
        self.beta = np.random.rand(self.dataModel.getUsersNum())
        self.gamma = np.zeros(self.dataModel.getUsersNum())

        origin_lambda_iter = self.lambda_iter
        for user_ix in xrange(self.dataModel.getUsersNum()):
            samples = self.sample(user_ix, self.max_iter)
            self.lambda_iter = origin_lambda_iter
            old_target_value = 0
            for item_1, item_2 in samples:
                new_alpha, new_beta = self.update(user_ix, item_1, item_2)

                ##if not old_target_value or old_target_value < target_value:
                #old_target_value = target_value
                self.alpha[user_ix] = new_alpha
                self.beta[user_ix] = new_beta
                self.lambda_iter = self.lambda_iter * 0.9
                #else:
                #break
                #print user_ix, target_value

                #self.gamma[user_ix] = new_gamma
                #print user_ix, self.target_value(user_ix)

            #print user_ix, self.target_value(user_ix)
        self.lambda_iter = origin_lambda_iter
        self.beta = np.zeros(self.dataModel.getUsersNum())
    def model_score(self, user_ix, item_ix):
        alpha = self.alpha[user_ix]
        beta = self.beta[user_ix]
        gamma = self.gamma[user_ix]
        rate = self.mf_rate[user_ix, item_ix]
        price = self.personal_item_price[user_ix][item_ix]

        prob = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_ix], self.param_mu, self.param_sigm)
        result = alpha * rate + beta * np.exp(prob) + gamma
        return result
def construct_price_feature(df, dataModel):
    #category_df = {'phones':df}
    price_df = get_user_category_buy_price(df, dataModel)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    result = {}
    for user, user_df in df.groupby('user'):
        user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)])
        origin_user_price_feature = np.zeros((dataModel.getPriceIxNum(), dataModel.getFeaturesNum()))
        user_df.apply((lambda x : _construct_mention_feature_apply(dataModel, origin_user_price_feature, x, user_sigm)), axis=1)
        result[dataModel.getUidByUser(user)] = origin_user_price_feature
    return result
Esempio n. 5
0
    def update(self, user_ix, item_i, item_j):
        ''' u * k
        C = U s V
        D =
        utility = rate_ui , price_ui
        utility  = alpha * rate - beta * price



        - utility_2

        rate_ui

        object = alpha_uc * (rate_1 - rate_2) + beta_uc * (price_1 - price_2)

        model_score_i = self.model_score_without_log(alpha, beta, gamma, rate_i, price_i)
        model_score_j = self.model_score_without_log(alpha, beta, gamma, rate_j, price_j)

        new_alpha = alpha - self.lambda_iter * ((rate_i * model_score_j - rate_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * alpha)
        new_beta = beta - self.lambda_iter * ((price_i**2 * model_score_j - price_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * beta)
        new_gamma = gamma - self.lambda_iter * ((model_score_j - model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * gamma)
        '''
        alpha = self.alpha[user_ix]
        beta = self.beta[user_ix]
        rate_i = self.mf_rate[user_ix][item_i]
        rate_j = self.mf_rate[user_ix][item_j]
        prob_i = MyGaussian.get_prob_from_gaussian(
            self.dataModel.getUserByUid(user_ix), 'phones',
            self.personal_item_price[user_ix][item_i], self.param_mu,
            self.param_sigm)
        prob_j = MyGaussian.get_prob_from_gaussian(
            self.dataModel.getUserByUid(user_ix), 'phones',
            self.personal_item_price[user_ix][item_j], self.param_mu,
            self.param_sigm)

        new_alpha = alpha + self.lambda_iter * (
            rate_i - rate_j) - 2 * self.lambda_regular * alpha**2
        new_beta = beta + self.lambda_iter * (np.exp(prob_i) - np.exp(
            prob_j)) - 2 * self.lambda_regular * beta**2

        return new_alpha, new_beta
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        data = np.array(self.dataModel.getData().todense())
        u,s,v = isvd(data)
        new_data = np.dot(u, np.dot(s, v))
        new_data[new_data < 1] = np.nan
        u,s,v = isvd(new_data)
        new_data = np.dot(u, np.dot(s, v))
        #new_data[new_data < 1] = 0
        self.mf_rate = new_data

        train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price'])
        item_price = dict(zip(train_data.ix[:,'item'], train_data.ix[:,'price']))
        data = train_data.groupby('user').mean()
        self.personal_item_price = np.empty((self.dataModel.getUsersNum(), self.dataModel.getItemsNum()))
        for user_ix in xrange(self.dataModel.getUsersNum()):
            user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price']
            for item_ix in xrange(self.dataModel.getItemsNum()):
                delta_item_price = (item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price
                self.personal_item_price[user_ix][item_ix] = item_price[self.dataModel.getItemByIid(item_ix)]

        b = {'phones':train_data}
        price_df = Construction.get_user_category_buy_price(b)
        self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit(price_df)

        self.alpha = np.random.rand(self.dataModel.getUsersNum())
        self.beta = np.random.rand(self.dataModel.getUsersNum())
        self.gamma = np.zeros(self.dataModel.getUsersNum())

        origin_lambda_iter = self.lambda_iter
        for user_ix in xrange(self.dataModel.getUsersNum()):
            samples = self.sample(user_ix, self.max_iter)
            self.lambda_iter = origin_lambda_iter
            old_target_value = 0
            for item_1, item_2 in samples:
                new_alpha, new_beta = self.update(user_ix, item_1, item_2)

                ##if not old_target_value or old_target_value < target_value:
                    #old_target_value = target_value
                self.alpha[user_ix] = new_alpha
                self.beta[user_ix] = new_beta
                self.lambda_iter = self.lambda_iter * 0.9
                #else:
                    #break
                #print user_ix, target_value

                #self.gamma[user_ix] = new_gamma
                #print user_ix, self.target_value(user_ix)

            #print user_ix, self.target_value(user_ix)
        self.lambda_iter = origin_lambda_iter
        self.beta = np.zeros(self.dataModel.getUsersNum())
Esempio n. 7
0
    def model_score(self, user_ix, item_ix):
        alpha = self.alpha[user_ix]
        beta = self.beta[user_ix]
        gamma = self.gamma[user_ix]
        rate = self.mf_rate[user_ix, item_ix]
        price = self.personal_item_price[user_ix][item_ix]

        prob = MyGaussian.get_prob_from_gaussian(
            self.dataModel.getUserByUid(user_ix), 'phones',
            self.personal_item_price[user_ix][item_ix], self.param_mu,
            self.param_sigm)
        result = alpha * rate + beta * np.exp(prob) + gamma
        return result
Esempio n. 8
0
def construct_price_feature(df, dataModel):
    #category_df = {'phones':df}
    price_df = get_user_category_buy_price(df, dataModel)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    result = {}
    for user, user_df in df.groupby('user'):
        user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)])
        origin_user_price_feature = np.zeros(
            (dataModel.getPriceIxNum(), dataModel.getFeaturesNum()))
        user_df.apply((lambda x: _construct_mention_feature_apply(
            dataModel, origin_user_price_feature, x, user_sigm)),
                      axis=1)
        result[dataModel.getUidByUser(user)] = origin_user_price_feature
    return result
Esempio n. 9
0
def _construct_mention_feature_apply(dataModel, origin_user_price_feature, row,
                                     user_sigm):
    price_ix = dataModel.getPidByPriceIx(row['price_ix'])
    feature = [dataModel.getFidByFeature(i[0]) for i in eval(row['feature'])]

    min_ix = int(price_ix - user_sigm)
    max_ix = int(price_ix + user_sigm)
    if min_ix < 0:
        min_ix = 0
    if max_ix > dataModel.getPriceIxNum():
        max_ix = dataModel.getPriceIxNum()

    for i in xrange(min_ix, max_ix):
        origin_user_price_feature[i][feature] = origin_user_price_feature[i][
            feature] + MyGaussian.get_prob_from_gaussian(
                i, price_ix, user_sigm)
Esempio n. 10
0
def main():
    a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv')
    b = {'phones': a}
    price_df = Construction.get_user_category_buy_price(b)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    print min(param_sigm['phones']), max(param_sigm['phones'])
Esempio n. 11
0
def main():
    a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv')
    b = {'phones':a}
    price_df = Construction.get_user_category_buy_price(b)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    print min(param_sigm['phones']), max(param_sigm['phones'])
def _construct_mention_feature_apply(dataModel, origin_user_price_feature, row, user_sigm):
    price_ix = dataModel.getPidByPriceIx(row['price_ix'])
    feature = [dataModel.getFidByFeature(i[0]) for i in eval(row['feature'])]

    min_ix = int(price_ix - user_sigm)
    max_ix = int(price_ix + user_sigm)
    if min_ix < 0:
        min_ix = 0
    if max_ix > dataModel.getPriceIxNum():
        max_ix = dataModel.getPriceIxNum()

    for i in xrange(min_ix, max_ix):
        origin_user_price_feature[i][feature] = origin_user_price_feature[i][feature] + MyGaussian.get_prob_from_gaussian(i, price_ix, user_sigm)