Esempio n. 1
0
def train_and_predict(train_filepath, test_filepath):
    train_df = pd.read_json(train_filepath)
    test_df = pd.read_json(test_filepath)

    tr_songs = train_df.songs.tolist()
    te_songs = test_df.songs.tolist()
    tr_tags = train_df.tags.tolist()
    te_tags = test_df.tags.tolist()

    vocab = Vocabulary(pd.concat([train_df, test_df], ignore_index=True))

    train_data = encode_features(train_df, vocab)
    test_data = encode_features(test_df, vocab)

    # Shuffle train data
    train_data = shuffle(train_data)

    # list of lists -> CSR
    def lil_to_csr(indices, shape):
        data = []
        row_ind = []
        col_ind = []
        for row_idx, row in enumerate(indices):
            for col_idx in row:
                data.append(1)
                row_ind.append(row_idx)
                col_ind.append(col_idx)
        return csr_matrix((data, (row_ind, col_ind)), shape=shape)

    train_csr = lil_to_csr(train_data, (len(train_data), vocab.size))
    test_csr = lil_to_csr(test_data, (len(test_data), vocab.size))

    r = scipy.sparse.vstack([test_csr, train_csr])
    r = csr_matrix(r)

    factors = 512
    alpha = 500.0
    als_model = ALS(factors=factors, regularization=0.1)
    als_model.fit(r.T * alpha)

    song_model = ALS(factors=factors)
    tag_model = ALS(factors=factors)
    song_model.user_factors = als_model.user_factors
    tag_model.user_factors = als_model.user_factors
    song_model.item_factors = als_model.item_factors[:vocab.num_songs]
    tag_model.item_factors = als_model.item_factors[vocab.num_songs:]

    song_rec_csr = test_csr[:, :vocab.num_songs]
    tag_rec_csr = test_csr[:, vocab.num_songs:]

    song_rec = song_model.recommend_all(song_rec_csr, N=100)
    tag_rec = tag_model.recommend_all(tag_rec_csr, N=10)
    tag_rec += vocab.num_songs

    return [{
        "id": test_playlist_id,
        "songs": list(map(vocab.id_to_song, song_rec[test_row_idx])),
        "tags": list(map(vocab.id_to_tag, tag_rec[test_row_idx])),
    } for test_row_idx, test_playlist_id in enumerate(tqdm(test_df.id))]
Esempio n. 2
0
    def _fit_for_tag(self, train, val):
        df = self._t_data.get_preference(train, val)
        t_len = self._t_data.get_tag_length()

        # user x item csr_matrix
        user_item_csr = sparse.csr_matrix(
            (df['preference'].astype(float), (df['user_id'], df['item_id'])))

        t_model = AlternatingLeastSquares(factors=1800)
        t_model.fit(user_item_csr.T * 65)

        # Configure tag only model
        t_model.item_factors = t_model.item_factors[:t_len]

        user_tags_csr = user_item_csr[:, :t_len]
        self._t_best = t_model.recommend_all(user_tags_csr, N=self._t_topk)
Esempio n. 3
0
    def _fit_for_song(self, train, val):
        df = self._s_data.get_preference(train, val)
        s_len = self._s_data.get_song_length()

        # user x item csr_matrix
        user_item_csr = sparse.csr_matrix(
            (df['preference'].astype(float), (df['user_id'], df['item_id'])))

        s_model = AlternatingLeastSquares(factors=2500)
        s_model.fit(user_item_csr.T * 160)

        # Configure song only model
        s_model.item_factors = s_model.item_factors[:s_len]

        user_song_csr = user_item_csr[:, :s_len]
        self._s_best = s_model.recommend_all(user_song_csr, N=self._s_topk)
Esempio n. 4
0
    def multi_mf_(self, train_songs_A, train_tags_A, test_songs_A, test_tags_A,
                  song_ntop, tag_ntop, iteration):

        print(f'Multi_MF... iters:{iteration}')

        #res = []

        songs_A = spr.vstack([test_songs_A, train_songs_A])
        tags_A = spr.vstack([test_tags_A, train_tags_A])

        A = spr.hstack([songs_A, tags_A])

        als_model = ALS(factors=256,
                        regularization=0.08,
                        use_gpu=True,
                        iterations=iteration)  # epoch
        als_model.fit(A.T * 15)

        song_model = ALS(use_gpu=False)
        tag_model = ALS(use_gpu=False)

        song_model.user_factors = als_model.user_factors
        tag_model.user_factors = als_model.user_factors

        song_model.item_factors = als_model.item_factors[:self.n_songs]
        tag_model.item_factors = als_model.item_factors[self.n_songs:]

        # for test
        song_rec_csr = songs_A[:self.n_test, :]
        tag_rec_csr = tags_A[:self.n_test, :]

        cand_song = song_model.recommend_all(song_rec_csr, N=500)
        cand_tag = tag_model.recommend_all(tag_rec_csr, N=50)

        res = [{
            "id": self.plylst_nid_id[self.n_train + id],
            "songs": [self.song_sid_id.get(x) for x in rec_idx[0].tolist()],
            "tags": [self.tag_tid_id.get(x) for x in rec_idx[1].tolist()]
        } for id, rec_idx in enumerate(zip(cand_song, cand_tag))]

        #rec_song = als_model.recommend_all(train_songs_A,N=500)
        #rec_tag = als_model_tag.recommend_all(train_tags_A,N=50) # list (no score)
        '''
        for id in tqdm(range(self.n_test)):

            # song
            cand_song = song_model.recommend(id,song_rec_csr, N=song_ntop, filter_already_liked_items=True)
            rec_song_idx = [self.song_sid_id.get(x[0]) for x in cand_song]
            rec_song_score = [x[1] for x in cand_song]

            # tag
            cand_tag = tag_model.recommend(id,tag_rec_csr, N=tag_ntop, filter_already_liked_items=True)
            rec_tag_idx = [self.tag_tid_id.get(x[0]) for x in cand_tag]
            rec_tag_score = [x[1] for x in cand_tag]

            res.append({
                "id": self.plylst_nid_id[self.n_train + id],
                "songs": rec_song_idx, 
                "tags": rec_tag_idx,
                "songs_score":rec_song_score,
                "tags_score":rec_tag_score
            })
        '''

        print("DONE")

        return res
Esempio n. 5
0
        split_matrix(rating_matrix, user2idx, movie2idx)

    print(
        f'Train: {rating_matrix_train.count_nonzero()}\t',
        f'Validation Size: {rating_matrix_val.count_nonzero()}'
    )

    # Train ALS Model
    model = AlternatingLeastSquares(
        factors=20,
        iterations=50,
        calculate_training_loss=True,
        num_threads=4
    )
    model.fit(rating_matrix_train.T)

    # Make Prediction
    recommendations = model.recommend_all(
        user_items=rating_matrix_train,
        N=100
    )

    # Evaluate
    precison_100 = n_precision(recommendations, rating_matrix_val, 100)
    recall_100 = n_recall(recommendations, rating_matrix_val, 100)
    print(f'P@100 : {precison_100:.2%}')
    print(f'R@100 : {recall_100:.2%}')

    # Save Recommendation
    np.savez('./output/rec_mf.npz', recommendations)
Esempio n. 6
0
    def multi_mf_(self,
                  train_songs_A,
                  train_tags_A,
                  val_songs_A,
                  val_tags_A,
                  test_songs_A,
                  test_tags_A,
                  meta=True,
                  song_ntop=500,
                  tag_ntop=50,
                  iteration=20,
                  score=False):

        print(f'Multi_MF... iters:{iteration}')

        val_res = []
        test_res = []

        songs_A = spr.vstack([val_songs_A, test_songs_A, train_songs_A])
        tags_A = spr.vstack([val_tags_A, test_tags_A, train_tags_A])

        print(val_songs_A.shape, test_songs_A.shape, train_songs_A.shape)

        if meta == True:
            s_meta = self.mkspr_for_meta()
            print(songs_A.shape, tags_A.shape, s_meta.shape)
            A = spr.hstack([songs_A, tags_A, s_meta])
        else:
            A = spr.hstack([songs_A, tags_A])

        als_model = ALS(factors=256,
                        regularization=0.08,
                        use_gpu=True,
                        iterations=iteration)
        als_model.fit(A.T * 100)

        song_model = ALS(use_gpu=True)
        tag_model = ALS(use_gpu=True)

        song_model.user_factors = als_model.user_factors
        tag_model.user_factors = als_model.user_factors

        song_model.item_factors = als_model.item_factors[:self.n_songs]
        tag_model.item_factors = als_model.item_factors[self.n_songs:]

        # for val
        val_song_rec_csr = songs_A[:self.n_val, :]
        val_tag_rec_csr = tags_A[:self.n_val, :]

        # for test
        test_song_rec_csr = songs_A[self.n_val:self.n_val + self.n_test, :]
        test_tag_rec_csr = tags_A[self.n_val:self.n_val + self.n_test, :]

        if score is True:
            pass

        else:
            # val
            cand_song = song_model.recommend_all(val_song_rec_csr, N=song_ntop)
            cand_tag = tag_model.recommend_all(val_tag_rec_csr, N=tag_ntop)

            val_res = [{
                "id":
                self.plylst_nid_id[self.n_train + id],
                "songs":
                [self.song_sid_id.get(x) for x in rec_idx[0].tolist()],
                "tags": [self.tag_tid_id.get(x) for x in rec_idx[1].tolist()]
            } for id, rec_idx in enumerate(zip(cand_song, cand_tag))]

            # test
            cand_song = song_model.recommend_all(test_song_rec_csr,
                                                 N=song_ntop)
            cand_tag = tag_model.recommend_all(test_tag_rec_csr, N=tag_ntop)

            test_res = [{
                "id":
                self.plylst_nid_id[self.n_train + self.n_val + id],
                "songs":
                [self.song_sid_id.get(x) for x in rec_idx[0].tolist()],
                "tags": [self.tag_tid_id.get(x) for x in rec_idx[1].tolist()]
            } for id, rec_idx in enumerate(zip(cand_song, cand_tag))]

        return val_res, test_res
Esempio n. 7
0
    def mf_(self,
            train_songs_A,
            train_tags_A,
            val_songs_A,
            val_tags_A,
            test_songs_A,
            test_tags_A,
            song_ntop=500,
            tag_ntop=50,
            iteration=20,
            score=False):

        print(f'MF... iters:{iteration}')

        val_song_res = []
        val_tag_res = []
        test_song_res = []
        test_tag_res = []

        songs_A = spr.vstack([val_songs_A, test_songs_A, train_songs_A])
        tags_A = spr.vstack([val_tags_A, test_tags_A, train_tags_A])

        als_model = ALS(factors=256,
                        regularization=0.08,
                        use_gpu=True,
                        iterations=iteration)  # epoch
        als_model.fit(songs_A.T * 100)
        als_model_tag = ALS(factors=32,
                            regularization=0.08,
                            use_gpu=True,
                            iterations=iteration)
        als_model_tag.fit(tags_A.T * 100)

        def res_recommend(id,
                          als_model=als_model,
                          matrix=val_songs_A,
                          N=song_ntop,
                          nid_id=self.song_sid_id,
                          id_index=self.plylst_val_song.index,
                          res=val_song_res):
            try:
                cand_song = als_model.recommend(
                    id, val_songs_A, N=N, filter_already_liked_items=True)

                rec_song_idx = [nid_id.get(x[0]) for x in cand_song]
                rec_song_score = [x[1] for x in cand_song]

                val_song_res.append({"id":self.plylst_nid_id[id_index[id]],\
                    "songs" : rec_song_idx, "songs_score": rec_song_score})

            except IndexError:
                pass

        if score is True:
            for id in tqdm(range(self.n_val_song)):
                res_recommend(id, als_model = als_model, matrix = val_songs_A, N=song_ntop, nid_id = self.song_sid_id,\
                     id_index = self.plylst_val_song.index, res = val_song_res)
                res_recommend(id, als_model = als_model_tag, matrix = val_tags_A, N=tag_ntop, nid_id = self.tag_tid_id,\
                     id_index = self.plylst_val_tag.index, res = val_tag_res)
                res_recommend(id, als_model = als_model, matrix = test_songs_A, N=song_ntop, nid_id = self.song_sid_id,\
                     id_index = self.plylst_test_song.index, res = test_song_res)
                res_recommend(id, als_model = als_model_tag, matrix = test_tags_A, N=tag_ntop, nid_id = self.tag_tid_id,\
                     id_index = self.plylst_test_tag.index, res = test_tag_res)

        else:  # Score > False

            val_cand_song = als_model.recommend_all(
                val_songs_A, N=song_ntop, filter_already_liked_items=True)
            val_cand_tag = als_model_tag.recommend_all(
                val_tags_A, N=tag_ntop, filter_already_liked_items=True)
            test_cand_song = als_model.recommend_all(
                test_songs_A, N=song_ntop, filter_already_liked_items=True)
            test_cand_tag = als_model_tag.recommend_all(
                test_tags_A, N=tag_ntop, filter_already_liked_items=True)

            val_song_res = [{
                "id":
                self.plylst_nid_id[self.plylst_val_song.index[id]],
                "songs": [self.song_sid_id.get(x) for x in rec_idx.tolist()]
            } for id, rec_idx in enumerate(val_cand_song, 0)]
            val_tag_res = [{
                "id":
                self.plylst_nid_id[self.plylst_val_tag.index[id]],
                "tags": [self.tag_tid_id.get(x) for x in rec_idx.tolist()]
            } for id, rec_idx in enumerate(val_cand_tag, 0)]
            test_song_res = [{
                "id":
                self.plylst_nid_id[self.plylst_test_song.index[id]],
                "songs": [self.song_sid_id.get(x) for x in rec_idx.tolist()]
            } for id, rec_idx in enumerate(test_cand_song, 0)]
            test_tag_res = [{
                "id":
                self.plylst_nid_id[self.plylst_test_tag.index[id]],
                "tags": [self.tag_tid_id.get(x) for x in rec_idx.tolist()]
            } for id, rec_idx in enumerate(test_cand_tag, 0)]

        print("DONE")

        return val_song_res, val_tag_res, test_song_res, test_tag_res