def split_only_sequential(self, URM, URM_df):

        helper = Helper()

        sequential_playlists = helper.get_target_playlists_list()[:5000]
        selected_playlists = np.array([])

        self.target_playlists = sequential_playlists

        grouped = URM_df.groupby(
            'playlist_id', as_index=True).apply(lambda x: list(x['track_id']))

        relevant_items = defaultdict(list)
        for playlist_id in sequential_playlists:

            # Tracks = lista delle tracks prese dalla URM
            tracks = np.array(grouped[playlist_id])
            to_be_removed = int(len(tracks) * 0.2)

            # Torna le #to_be_removed tracks ordinate sequenzialmente. e le toglie dalla lista delle tracks
            to_be_removed_tracks = helper.get_sorted_tracks_in_playlist(
                playlist_id)[-to_be_removed:]
            for track in to_be_removed_tracks:
                relevant_items[playlist_id].append(track)
                tracks = np.delete(tracks, np.where(tracks == track))
            grouped[playlist_id] = tracks

        all_tracks = self.tracks_df["track_id"].unique()
        matrix = MultiLabelBinarizer(classes=all_tracks,
                                     sparse_output=True).fit_transform(grouped)
        self.URM_train = matrix.tocsr()
        self.URM_train = self.URM_train.astype(np.float64)
        self.dict_test = relevant_items
    def split_sequential(self, URM, URM_df):
        segment = 1
        # splitting URM in test set e train set
        selected_playlists = np.array([])
        available_playlists = np.arange(URM.shape[0])
        target_analyzer = TargetAnalyzer()

        #Gets distribution of only last 5000 playlists
        dist = target_analyzer.get_distribution_array_only_last(segment)

        helper = Helper()
        target_playlists = helper.get_target_playlists_list()[:5000]
        #n_target = np.sum(dist) - len(target_playlists)

        # Removing from the cluster distribution the len of the sequential target
        for playlist_id in target_playlists:
            playlist_id = int(playlist_id)
            available_playlists = np.delete(
                available_playlists,
                np.where(available_playlists == playlist_id))
            selected_playlists = np.append(selected_playlists, playlist_id)
            #target_len = len(URM[playlist_id].data)
            #dist[target_len] -= 1

        print("Clustering with segment = " + str(segment))
        for key in tqdm(range(len(dist))):
            while dist[key] != 0:
                random_index = randint(0, len(available_playlists) - 1)
                playlist_id = available_playlists[random_index]
                target_segment = int(0.8 * len(URM[playlist_id].data))
                if target_segment == key:
                    available_playlists = np.delete(
                        available_playlists,
                        np.where(available_playlists == playlist_id))
                    selected_playlists = np.append(selected_playlists,
                                                   playlist_id)
                    dist[key] -= 1

        self.target_playlists = selected_playlists.astype(int)
        grouped = URM_df.groupby(
            'playlist_id', as_index=True).apply(lambda x: list(x['track_id']))

        relevant_items = defaultdict(list)

        for playlist_id in selected_playlists:
            #Tracks = lista delle tracks prese dalla URM
            tracks = np.array(grouped[playlist_id])
            if playlist_id in target_playlists:
                to_be_removed = int(len(tracks) * 0.2)

                #Torna le #to_be_removed tracks ordinate sequenzialmente. e le toglie dalla lista delle tracks
                to_be_removed_tracks = helper.get_sorted_tracks_in_playlist(
                    playlist_id)[-to_be_removed:]
                for track in to_be_removed_tracks:
                    relevant_items[playlist_id].append(track)
                    tracks = np.delete(tracks, np.where(tracks == track))
            else:
                to_be_removed = int(len(tracks) * 0.2)
                for i in range(to_be_removed):
                    index = randint(0, len(tracks) - 1)
                    removed_track = tracks[index]
                    relevant_items[playlist_id].append(removed_track)
                    tracks = np.delete(tracks, index)
            grouped[playlist_id] = tracks

        all_tracks = self.tracks_df["track_id"].unique()
        matrix = MultiLabelBinarizer(classes=all_tracks,
                                     sparse_output=True).fit_transform(grouped)
        self.URM_train = matrix.tocsr()
        self.URM_train = self.URM_train.astype(np.float64)
        self.dict_test = relevant_items