Beispiel #1
0
def main():
    from recommend.data.datasets import load_100k
    import random
    # 数据集处理
    df = load_100k(type='pd').alldata
    nums_train = 10
    df = df.groupby('user_id').filter(lambda x: len(x) >= nums_train + 10)
    data_train = []
    data_test = []
    for gb, dtfm in df.groupby('user_id'):
        dtfm = dtfm.reset_index(drop=True)  # 去除原索引
        index_sample = random.sample(range(len(dtfm)), nums_train)
        data_train.append(dtfm.iloc[index_sample, :])
        data_test.append(dtfm.drop(index_sample))
    data_train = pd.concat(data_train)
    data_test = pd.concat(data_test)
    x_train = np.array(data_train[['user_id', 'item_id', 'timestamp']])
    y_train = np.array(data_train['rating'])
    x_test = np.array(data_test[['user_id', 'item_id', 'timestamp']])
    y_test = np.array(data_test['rating'])

    lrmf = ListRankMF(0.01, 10, 0.01, 800)
    lrmf.fit(x_train, y_train, evals=(x_test, y_test), top=10)
Beispiel #2
0
        if len(cusers) == 0: return 0
        users_i = self.__item_user[i]
        users_j = self.__item_user[j]

        sum_up = 0.
        sum_l, sum_r = 0., 0.
        for u in cusers:
            sum_up += ((users_i[u] - self.__item_scores[i][0]) *
                       (users_j[u] - self.__item_scores[j][0]))
            sum_l += (users_i[u] - self.__item_scores[i][0])**2
            sum_r += (users_j[u] - self.__item_scores[j][0])**2
        sum_lr = np.sqrt(sum_l * sum_r)
        result = sum_up / sum_lr if sum_lr > 0 else 0
        return result

    def __commonUsers(self, i, j):
        cm_users = set(self.__item_user[i].keys()) & set(
            self.__item_user[j].keys())
        return cm_users


if __name__ == '__main__':
    from recommend.data import datasets

    df = datasets.load_100k('pd').alldata
    train_x, test_x, train_y, test_y = datasets.filter_split(df, 20, 20, 0.2)

    ir = ItemCR(10, 'cosine', 'origin')
    ir.fit(train_x, train_y)
    ir.report(test_x, 10)
Beispiel #3
0
        for u in recommend_dict:
            cm_users = set(user_item[u]) & set(recommend_dict[u])
            p += len(cm_users) / top_n
            r += len(cm_users) / len(user_item[u])
        precision = p / len(recommend_dict)
        recall = r / len(recommend_dict)
        print("precision=%f\nrecall=%f" % (precision, recall))


if __name__ == '__main__':
    from recommend.data.datasets import load_100k
    import random
    import pandas as pd

    # 数据集处理
    df = load_100k(type='pd').alldata
    nums_train = 10
    df = df.groupby('user_id').filter(lambda x: len(x) >= nums_train + 10)
    data_train = []
    data_test = []
    for gb, dtfm in df.groupby('user_id'):
        dtfm = dtfm.reset_index(drop=True)  # 去除原索引
        index_sample = random.sample(range(len(dtfm)), nums_train)
        data_train.append(dtfm.iloc[index_sample, :])
        data_test.append(dtfm.drop(index_sample))
    data_train = pd.concat(data_train)
    data_test = pd.concat(data_test)
    x_train = np.array(data_train[['user_id', 'item_id', 'timestamp']])
    y_train = np.array(data_train['rating'])
    x_test = np.array(data_test[['user_id', 'item_id', 'timestamp']])
    y_test = np.array(data_test['rating'])
Beispiel #4
0
                    tmp1.append(tmp3)
                self.H[i] *= np.array(tmp0) / np.array(tmp1)

            results = []
            for u in test_uiMatrix:
                for i in test_uiMatrix[u]:
                    r = test_uiMatrix[u][i]
                    results.append((r - np.dot(self.W[u], self.H[i]))**2)
            rmse = np.sqrt(np.sum(results) / len(testMatrix))

            print("%d iter finish, rmse: %f" % (iter, rmse))

    def predict(self):
        pass


if __name__ == '__main__':
    from recommend.data import datasets
    from sklearn.model_selection import train_test_split
    mv = datasets.load_100k(type='pd')
    data = mv.alldata
    df_train, df_test = train_test_split(data, test_size=0.2, random_state=0)
    uidset = set(df_test.user_id) - set(df_train.user_id)
    if uidset:
        df_test = df_test[~df_test["user_id"].isin(uidset)]
    iidset = set(df_test.item_id) - set(df_train.item_id)
    if iidset:
        df_test = df_test[~df_test["item_id"].isin(iidset)]
    nmf = NMF(10, 0.1, 20)
    nmf.fit(df_train, df_test, "rmse")