Ejemplo n.º 1
0
def main(args):
    users = pd.read_csv(
        USERS_CSV,
        sep='::',
        engine='python',
        usecols=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'])
    print('{} description of {} users loaded'.format(len(users), max_userid))

    movies = pd.read_csv(MOVIES_CSV,
                         sep='::',
                         engine='python',
                         usecols=['movieID', 'Title', 'Genres'])
    print('{} descriptions of {} movies loaded'.format(len(movies),
                                                       max_movieid))

    test_data = pd.read_csv(TEST_CSV, usecols=['UserID', 'MovieID'])
    print('{} testing data loaded.'.format(test_data.shape[0]))

    trained_model = build_cf_model(max_userid, max_movieid, DIM, isBest=True)
    print('Loading model weights...')
    trained_model.load_weights(MODEL_WEIGHTS_FILE)
    print('Loading model done!!!')

    recommendations = pd.read_csv(TEST_CSV, usecols=['TestDataID'])
    recommendations['Rating'] = test_data.apply(
        lambda x: predict_rating(trained_model, x['UserID'], x['MovieID']),
        axis=1)
    # print(recommendations)

    ensure_dir(args.output)
    recommendations.to_csv(args.output,
                           index=False,
                           columns=['TestDataID', 'Rating'])
Ejemplo n.º 2
0
def main(args):
    users = pd.read_csv(
        USERS_CSV,
        sep='::',
        engine='python',
        usecols=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'])
    print('{} description of {} users loaded'.format(len(users), max_userid))

    movies = pd.read_csv(MOVIES_CSV,
                         sep='::',
                         engine='python',
                         usecols=['movieID', 'Title', 'Genres'])
    print('{} descriptions of {} movies loaded'.format(len(movies),
                                                       max_movieid))

    test_data = pd.read_csv(TEST_CSV, usecols=['UserID', 'MovieID'])
    print('{} testing data loaded.'.format(test_data.shape[0]))

    trained_model = build_cf_model(max_userid, max_movieid, DIM, isBest=True)
    print('Loading model weights...')
    trained_model.load_weights(MODEL_WEIGHTS_FILE)
    print('Loading model done!!!')

    movies_array = movies.as_matrix()
    genres_map = {}
    for i in range(movies_array.shape[0]):
        genre = movies_array[i][2].split('|')[0]
        if genre not in genres_map.keys():
            genres_map[genre] = [movies_array[i][0] - 1]
        else:
            genres_map[genre].append(movies_array[i][0] - 1)
    # print(genres_map)
    movie_emb = np.array(trained_model.layers[3].get_weights()).squeeze()
    model = TSNE(n_components=2, random_state=0)
    movie_emb = model.fit_transform(movie_emb)
    for key in genres_map.keys():
        genres_map[key] = movie_emb[genres_map[key]]
        # print(key, genres_map[key].shape)

    new_genres_map = {}
    for c in classes:
        new_genres_map[c] = np.ndarray(shape=(0, 2))
        for g in c.split('|'):
            new_genres_map[c] = np.concatenate(
                (new_genres_map[c], genres_map[g]), axis=0)
        # print(new_genres_map[c].shape)
    draw(new_genres_map, 'graph.png')
Ejemplo n.º 3
0
def main(args):
    ratings = pd.read_csv(args.train, usecols=['UserID', 'MovieID', 'Rating'])
    max_userid = ratings['UserID'].drop_duplicates().max()
    max_movieid = ratings['MovieID'].drop_duplicates().max()
    ratings['User_emb_id'] = ratings['UserID'] - 1
    ratings['Movie_emb_id'] = ratings['MovieID'] - 1
    print('{} ratings loaded.'.format(ratings.shape[0]))

    maximum = {}
    maximum['max_userid'] = [max_userid]
    maximum['max_movieid'] = [max_movieid]
    maximum['dim'] = [DIM]
    pd.DataFrame(data=maximum).to_csv(MAX_FILE, index=False)
    print('max info save to {}'.format(MAX_FILE))

    ratings = ratings.sample(frac=1)
    Users = ratings['User_emb_id'].values
    print('Users: {}, shape = {}'.format(Users, Users.shape))
    Movies = ratings['Movie_emb_id'].values
    print('Movies: {}, shape = {}'.format(Movies, Movies.shape))
    Ratings = ratings['Rating'].values
    print('Ratings: {}, shape = {}'.format(Ratings, Ratings.shape))

    model = build_cf_model(max_userid, max_movieid, DIM)
    model.compile(loss='mse', optimizer='adamax', metrics=[rmse])

    callbacks = [
        EarlyStopping('val_rmse', patience=2),
        ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)
    ]
    history = model.fit([Users, Movies],
                        Ratings,
                        epochs=1000,
                        batch_size=256,
                        validation_split=.1,
                        verbose=1,
                        callbacks=callbacks)