Ejemplo n.º 1
0
if __name__ == "__main__":
    #chunks = file_io.read_lastfm_user_art_file("data/userid-timestamp-artid-artname-traid-traname.tsv")
    chunks = file_io.read_lastfm_user_art_file("data/test_shorter.tsv")

    # read songs
    vectorizer = FeatureHasher()
    pre = Preprocessor(chunks, vectorizer)
    songs = pre.read_songs(20)
    print(songs)

    # reset file reader
    #chunks = file_io.read_lastfm_user_art_file("data/tmp.tsv")
    #pre.reset_file_reader(chunks)

    # read user song mapping
    pre.read_user_songs(1000)
    # convert to user-song matrix
    X = pre.get_user_song_matrix()

    start_time = time.time()
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(X, list(range(X.shape[0])))
    print(clf.predict(pre.user_song_dict["user_000001"]))

    print("training and predict using {0}".format(time.time() - start_time))

    #song_content = [ (artist,song) for (artist,song) in zip(list(songs['artname']), list(songs['traname']))]
    # temporary song_list
    song_content = [('Underworld', 'Boy, Boy, Boy'),
                    ('Underworld', 'Crocodile'),
                    ('Led Zeppelin', 'Stairway to heaven'),
Ejemplo n.º 2
0
if __name__ == "__main__":
    start_time = time.time()
    np.set_printoptions(threshold=np.nan)

    vectorizer = DictVectorizer()
    # reset file reader
    chunks = file_io.read_lastfm_user_art_file("data/halfid_20%_train.tsv")

    valid_songs = []    # don't filter with valid songs
    valid_songs = file_io.get_all_valid_songs('data/song_word2vec_whole_truncate_60000_new.csv')

    pre = Preprocessor(chunks, vectorizer, valid_songs)
    pre.reset_file_reader(chunks)

    # read user song mapping
    pre.read_user_songs(3000000)
    # convert to user-song matrix
    X = pre.get_user_song_matrix()
    print("non zeros: {0}".format(X.count_nonzero()))
    print("pre-processed in {0:.2f} sec".format(time.time() - start_time))

    #cluster_cf.cluster_usr(X, k=5)
    print("non zeros: {0}".format(X.count_nonzero()))
    pred = recommendation.predict_by_user(X)

    #pred = recommendation.predict_by_factorize(X)
    recommended = recommendation.recommend_all(X, pred, masked=False)
    recommend_numbers = 5
    songs = pre.get_songs_by_indices(recommended, recommend_numbers)
    print("user 1 top recommended songs: {0}".format(songs[0]))
    print("predict in {0:.2f} sec".format(time.time() - start_time))