예제 #1
0
def test_fitting():

    users, items = 10, 100

    dataset = Dataset()
    dataset.fit(range(users), range(items))

    assert dataset.interactions_shape() == (users, items)
    assert dataset.user_features_shape() == (users, users)
    assert dataset.item_features_shape() == (items, items)

    assert dataset.build_interactions([])[0].shape == (users, items)
    assert dataset.build_user_features([]).getnnz() == users
    assert dataset.build_item_features([]).getnnz() == items
예제 #2
0
def test_fitting_no_identity():

    users, items = 10, 100

    dataset = Dataset(user_identity_features=False, item_identity_features=False)
    dataset.fit(range(users), range(items))

    assert dataset.interactions_shape() == (users, items)
    assert dataset.user_features_shape() == (users, 0)
    assert dataset.item_features_shape() == (items, 0)

    assert dataset.build_interactions([])[0].shape == (users, items)
    assert dataset.build_user_features([], normalize=False).getnnz() == 0
    assert dataset.build_item_features([], normalize=False).getnnz() == 0
예제 #3
0
def test_fitting():

    users, items = 10, 100

    dataset = Dataset()
    dataset.fit(range(users), range(items))

    assert dataset.interactions_shape() == (users, items)
    assert dataset.user_features_shape() == (users, users)
    assert dataset.item_features_shape() == (items, items)

    assert dataset.build_interactions([])[0].shape == (users, items)
    assert dataset.build_user_features([]).getnnz() == users
    assert dataset.build_item_features([]).getnnz() == items
예제 #4
0
def test_fitting_no_identity():

    users, items = 10, 100

    dataset = Dataset(user_identity_features=False, item_identity_features=False)
    dataset.fit(range(users), range(items))

    assert dataset.interactions_shape() == (users, items)
    assert dataset.user_features_shape() == (users, 0)
    assert dataset.item_features_shape() == (items, 0)

    assert dataset.build_interactions([])[0].shape == (users, items)
    assert dataset.build_user_features([], normalize=False).getnnz() == 0
    assert dataset.build_item_features([], normalize=False).getnnz() == 0
예제 #5
0
user_feature_df = pd.read_csv('./input/user_feature.csv')
user_feature_names = list(user_feature_df)[1:]
user_feature_df = user_feature_df[user_feature_df['userCode'].isin(unique_user)]
user_feature_iterable = ((row['userCode'], {feature_name: row[feature_name] for feature_name in user_feature_names})for index, row in user_feature_df.iterrows())

# fit dataset
dataset.fit(users=user_iterable,
            items=iteam_iterable,
            user_features=user_feature_names,
            item_features=item_feature_names
            )

# check shape
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items: {}.'.format(num_users, num_items))
_, num_users_feature = dataset.user_features_shape()
_, num_items_feature = dataset.item_features_shape()
print('Num users feature: {}, num_items feature: {}.'.format(num_users_feature, num_items_feature))

# build user feature matrix
user_feature_matrix = dataset.build_user_features(user_feature_iterable, normalize=True)

# build item feature matrix
item_feature_matrix = dataset.build_item_features(item_feature_iterable, normalize=True)

# build interaction
(train_interactions, weights) = dataset.build_interactions(data=((row['userCode'], row['project_id'], row[interaction_col_name])for index, row in train.iterrows() if row['project_id'] not in ignore_project))

from lightfm import LightFM

model = LightFM(loss='warp', random_state=44, learning_schedule='adagrad')
예제 #6
0
item_meta = pd.read_csv('data/books.csv')
item_meta = item_meta[['book_id', 'authors', 'average_rating', 'original_title']]

item_features_source = [(item_meta['book_id'][i],
                        [item_meta['authors'][i],
                         item_meta['average_rating'][i]]) for i in range(item_meta.shape[0])]

# Construct Data-set
# set, list, pandas series 모두 가능하다.
# 먼저 User/Item Index를 mapping하고, User Features/Item Features를 추가한 후
# occurence 데이터를 fit한다.
# 혹은 scipy.csr_matrix를 바로 fit하는 것도 가능하다.
# 주의: Null 값은 다 채운 후여야 한다.
dataset = Dataset()
dataset.fit(users=ratings['user_id'].unique(),
            items=ratings['book_id'].unique(),
            item_features=item_meta[item_meta.columns[1:]].values.flatten()
            )

print("Num Users: {}, Num Items: {}".format(*dataset.interactions_shape()))
print(dataset.user_features_shape(), dataset.item_features_shape())

interactions, weights = dataset.build_interactions(ratings_source)
item_features = dataset.build_item_features(item_features_source)
# mappings = dataset.mapping()

# Save
# mmwrite('data/interactions.mtx', interactions)
# mmwrite('data/item_features.mtx', item_features)
# mmwrite('data/weights.mtx', weights)