def getPublicPlaylistDashboard():
    library = analyze.loadLibraryFromFiles(
        str(DATA_DIRECTORY) + "/users/127108998/")
    if library is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library=library,
                               **session)

    playlistName = request.args.get('playlistName')
    playlist = None
    if playlistName is not None:
        subheader_message = "Playlist " + playlistName
        for playlist in library['playlists-tracks']:
            if playlist['name'] == playlistName:
                break
    else:
        subheader_message = "Playlists count: " + str(
            len(library['playlists-tracks']))
        tracks = library['playlists-tracks']

    #library= {}
    #library['tracks'] = tracks

    return render_template('playlistDashboard.html',
                           playlistName=playlistName,
                           playlist=playlist,
                           subheader_message=subheader_message,
                           library=library,
                           **session)
def getAudioFeatures(file_path='data/'):
    #print ("retrieving audio features...")
    library = analyze.loadLibraryFromFiles(_getDataPath())

    if library is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library={},
                               **session)

    audioFeatures = library['audio_features']
    if audioFeatures == None:
        audioFeatures = getAudioFeatures(library['tracks'])

    if audioFeatures == None:
        return render_template(
            'index.html',
            subheader_message="Failed to get audio features ",
            library=library,
            **session)

    library['audio_features'] = audioFeatures
    #print (" done")

    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

    bar = saagraph.create_dataseries(_getDataPath())

    return render_template('timeseries.html',
                           sortedA=audioFeatures,
                           subheader_message=str(len(audioFeatures)),
                           plot=bar,
                           **session)
def getTopGenres():
    library = analyze.loadLibraryFromFiles(_getDataPath())
    if library is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library=library,
                               **session)
    tracks = analyze.getOrphanedTracks(
        analyze.loadLibraryFromFiles(_getDataPath()))
    library['tracks'] = tracks
    genres = analyze.getTopGenreSet(library)

    return render_template('topgenres.html',
                           sortedA=tracks,
                           subheader_message="Top Genres " +
                           str(len(library['tracks'])),
                           library=library,
                           genres=genres,
                           **session)
def getlibrary():
    ds = request.args

    library = analyze.loadLibraryFromFiles(_getDataPath())

    if library is None:
        logging.info(" library is None")
    tracksWoAlbum = []
    analyze.process(library)

    #for ()

    bar = saagraph.create_dataseries()
    return render_template('timeseries.html', plot=bar)
def dataload():

    dt = analyze.getUpdateDtStr(_getDataPath())
    l = None
    if dt is not None:
        library = analyze.loadLibraryFromFiles(_getDataPath())
        l = analyze.getLibrarySize(library)

    return render_template('dataload.html',
                           sortedA=None,
                           subheader_message="",
                           library={},
                           lastmodified=dt,
                           sizetext=l,
                           **session)
def getOrphanedTracks():
    username = request.args.get('username')

    if username is None:
        dataPath = _getDataPath()
    else:
        dataPath = str(DATA_DIRECTORY) + "/users/" + username + "/"

    library = analyze.loadLibraryFromFiles(dataPath)
    if library is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library=library,
                               **session)
    tracks = analyze.getOrphanedTracks(analyze.loadLibraryFromFiles(dataPath))
    library = {}
    library['tracks'] = tracks

    return render_template('orphanedTracks.html',
                           sortedA=tracks,
                           subheader_message="Orphaned tracks count " +
                           str(len(library['tracks'])),
                           library=library,
                           **session)
def getPlaylistDashboard():

    username = request.args.get('username')

    if username is None:
        dataPath = _getDataPath()
    else:
        #dataPath = str(DATA_DIRECTORY)+"/"+username+ "/"
        dataPath = str(DATA_DIRECTORY) + "/users/127108998/"

    library = analyze.loadLibraryFromFiles(dataPath)
    if library is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library=library,
                               **session)

    playlistId = request.args.get('playlistId')
    playlistName = None
    playlist = None
    if playlistId is not None:
        playlist = analyze.getPlaylist(session['id'], library['playlists'],
                                       playlistId)
        playlist = playlist[0]  #getPlaylist always returns a list
        playlistName = playlist['name']
        subheader_message = "Playlist " + playlistName
    else:
        subheader_message = "Playlists count: " + str(len(
            library['playlists']))
        library['playlists-tracks'] = analyze.getPlaylist(
            session['id'], library['playlists'])
        tracks = library['playlists']

    if playlist is None and library['playlists-tracks'] is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library=library,
                               **session)

    return render_template('playlistDashboard.html',
                           playlistName=playlistName,
                           playlist=playlist,
                           subheader_message=subheader_message,
                           library=library,
                           **session)
def generate(sessionLocal):
    x = 0
    started = str(datetime.datetime.now())
    with app.app_context():
        #print("generator started...")
        #thread = Thread(target=_retrieveSpotifyData, args=(session))
        #thread.start()

        lib = analyze.loadLibraryFromFiles(_getDataPath())
        #session['dataLoadingProgressMsg'] = userId
        while _getUserSessionMsg():
            #s = "data: {x:" + str(x) + ', generator ' \
            s = 'data: ' + _getUserSessionMsg()
            #+ sessionLocal['dataLoadingProgressMsg'] \
            #+ ' library: '+analyze.getLibrarySize(lib) \

            yield s + "\n\n"
            #print("generator yielding: " + s)
            x = x + 10
            time.sleep(0.3)
def _getTrackTuples(dataPath):
    fullLib = analyze.loadLibraryFromFiles(dataPath)

    # dataArray list:8  3799
    # one row per audio feature
    # [[0.469, 0.76, 0.598, 0.706, 0.756, 0.555, 0.53, 0.716, 0.481, 0.415, 0.684, 0.593, 0.395, 0.487, 0.671, 0.691, 0.155, 0.61, 0.171, 0.203, 0.181,
    #  [0.625, 0.608, 0.509, 0.653, 0.549, 0.71, 0.362, 0.685, 0.491, 0.42, 0.62, 0.626, 0.704, 0.757, 0.603, 0.669, 0
    #  [4, 9, 9, 7, 7, 10, 5, 4, 11, 3, 0, 4, 5, 0, 4, 1, 10, 11, 7, 2, 10, 10, 10, 0, 8, 9, 11, 6, 11, 6, 10, 1, 0, 3, 0,

    datesadded = []
    artists = []

    # date_time_str = '2020-02-16T19:54:58Z'
    # date_time_obj = datetime.datetime.strptime(date_time_str, '%Y-%m-%dT%H:%M:%SZ')
    dataSeries = []
    for i, item in enumerate(fullLib['tracks']):
        dataSeriesItem = []
        ts = datetime.datetime.strptime(item['added_at'], '%Y-%m-%dT%H:%M:%SZ')
        weekday = calendar.day_name[ts.weekday()]
        artist = fullLib['tracks'][i]['track']['artists'][0]['name']
        trackname = fullLib['tracks'][i]['track']['name']

        # print(str(i)+' '+str(track['track']['artists'][0]['name'])+ ' - '+
        #      str(track['track']['album']['name'])+ ' - '+
        #     str(track['track']['name'])+' song '+str(dataOrig[i])+' ' )
        # datesadded.append(ts)
        # artists.append(artist)
        # dataSeries.append((ts,artist,weekday))
        # print(str(i) + " " + str(item))
        # dataSeries.append(dataArray[i])
        dataSeriesItem.append(ts)
        dataSeriesItem.append(str(ts)+" - " + str(i) + " - " + artist + " - " + trackname)
        dataSeriesItem.append(weekday)
        dataSeries.append(dataSeriesItem)

    # dataSeries.append(datesadded)
    # dataSeries.append(artists)

    # dataArray.insert(0,dataSeries)
    # 2020-02-16T19:54:58Z
    return dataSeries
def getFavoriteArtistsOverTime(file_path='data/'):
    #logging.info ("retrieving audio features...")

    library = analyze.loadLibraryFromFiles(_getDataPath())

    if library is None or library.get('topartists_short_term') is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library={},
                               **session)

    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

    bar = saagraph.create_top_artists_graph(_getDataPath())

    return render_template('favorite_artists_over_time.html',
                           sortedA=None,
                           subheader_message='',
                           plot=bar,
                           **session)
def index():
    session['hellomsg'] = 'Welcome to Spotify Analyzer'
    hellomsg = 'Welcome to Additive Spotify Analyzer'
    _setUserSessionMsg(None)
    library = analyze.loadLibraryFromFiles(_getDataPath())

    if session.get('username'):
        genres = analyze.getTopGenreSet(library)
        hellomsg = 'Welcome ' + session.get('username')
        profile = None
        if library is not None:
            profile = library.get('profile')
        if profile is None:
            profileimageurl = None
            display_name = session.get('username')
        else:
            profileimageurl = profile['images'][0]['url']
            display_name = profile['display_name']
            session['profileimageurl'] = profile['images'][0]['url']

        l = analyze.getLibrarySize(library)
        lastModifiedDt = analyze.getUpdateDtStr(_getDataPath())
        #_setUserSessionMsg("Library size: "+l)
        return render_template('index.html',
                               subheader_message=hellomsg,
                               genres=genres,
                               library=library,
                               sizetext=l,
                               lastmodified=lastModifiedDt,
                               display_name=display_name,
                               **session)
    else:

        return render_template('index.html',
                               subheader_message=hellomsg,
                               genres={},
                               library={},
                               **session)
def analyzeLocal():

    library = analyze.loadLibraryFromFiles(_getDataPath())

    if library is None:
        return render_template('dataload.html',
                               subheader_message="",
                               library=library,
                               **session)
    if library:
        start = time.process_time()
        sortedA = analyze.process(library)

        trackCount = 0
        for l in sortedA:
            trackCount += len(sortedA[l])
        #data = {'First Column Name': ['First value', 'Second value', ...],
        #        'Second Column Name': ['First value', 'Second value', ...],
        #        ....
        #        }
        #data = pd

        return render_template('trackslist.html',
                               subheader_message="Local data processed in " +
                               str(time.process_time() - start) +
                               "ms. Artist count " + str(len(sortedA)) +
                               ". Track count " + str(trackCount),
                               sortedA=sortedA,
                               diagramVersion="test",
                               library=library,
                               **session)
    else:
        return render_template(
            'index.html',
            subheader_message="Local data not found. Click to retrieve.",
            library={},
            **session)
def create_sol_multiview():
    dataOrig = analyze.loadAudioFeatures()
    fullLib = analyze.loadLibraryFromFiles()
    # list: 3799 of dict:18
    # [{'danceability': 0.469, 'energy': 0.625, 'key': 4, 'loudness': -5.381, 'mode': 0, 'speechiness': 0.0306, 'acousticness': 0.00515, 'instrumentalness': 2.03e-05, 'liveness': 0.0682, 'valence': 0.325, 'tempo': 76.785, 'type': 'audio_features', 'id': '6PBzdsMi6YNdYAevzozBRi', 'uri': 'spotify:track:6PBzdsMi6YNdYAevzozBRi', 'track_href': 'https://api.spotify.com/v1/tracks/6PBzdsMi6YNdYAevzozBRi', 'analysis_url': 'https://api.spotify
    #  {'danceability': 0.76, 'energy': 0.608, 'key': 9, 'loudness': -8.673, 'mode': 0, 'speechiness': 0.0347, 'acousticness': 0.315, 'instrumentalness': 0.79, 'liveness': 0.121, 'valence': 0.727, 'tempo': 119.032, 'type': 'audio_features', 'id': '4dJYJTPbUgFK5pCQ5bYD4g', 'uri': 'spotify:track:4dJYJTPbUgFK5pCQ5bYD4g', 'track_href': 'https://api.spotify.com/v1/tracks/4dJYJTPbUgFK5pCQ5bYD4g', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4dJYJTPbUgFK5pCQ5bYD4g', 'duration_ms': 254118, 'time_signature': 4}
    #  {'danc..
    dtype = [('danceability', '<f8'), ('energy', '<f8'), ('key', '<f8'), ('loudness', '<f8'), ('mode', '<f8'),
             ('speechiness', '<f8'), ('acousticness', '<f8'), ('instrumentalness', '<f8'), ('liveness', '<f8'),
             ('valence', '<f8'),
             ('tempo', '<f8'), ('type', '<f8'), ('id', '<f8'), ('duration_ms', '<f8'), ('time_signature', '<f8'), ]

    keys = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness']
    keys = ['danceability', 'energy', 'key', 'loudness', 'valence', 'speechiness', 'tempo', 'time_signature']

    # keys = ['danceability', 'energy', 'loudness']
    # keys = ['danceability', 'energy']

    # ('danceability','energy','key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
    #      'valence', 'tempo'):

    dataArray = []
    for key in dataOrig[0]:
        if key in keys:
            # data[key] = [li[key] for li in dataOrig]
            dataArray.append([li[key] for li in dataOrig])

    # dataArray list:8  3799
    # one row per audio feature
    # [[0.469, 0.76, 0.598, 0.706, 0.756, 0.555, 0.53, 0.716, 0.481, 0.415, 0.684, 0.593, 0.395, 0.487, 0.671, 0.691, 0.155, 0.61, 0.171, 0.203, 0.181,
    #  [0.625, 0.608, 0.509, 0.653, 0.549, 0.71, 0.362, 0.685, 0.491, 0.42, 0.62, 0.626, 0.704, 0.757, 0.603, 0.669, 0
    #  [4, 9, 9, 7, 7, 10, 5, 4, 11, 3, 0, 4, 5, 0, 4, 1, 10, 11, 7, 2, 10, 10, 10, 0, 8, 9, 11, 6, 11, 6, 10, 1, 0, 3, 0,

    dataArray = np.array(dataArray)

    # call MinMaxScaler object
    min_max_scaler = MinMaxScaler()
    # feed in a numpy array
    minmaxscaled = min_max_scaler.fit_transform(dataArray)
    # wrap it up if you need a dataframe
    # df = pd.DataFrame(X_train_norm)

    dataArrayMean = np.mean(dataArray)
    dataArrayStd = np.std(dataArray)
    allsongsstandardized = (dataArray - dataArrayMean) / dataArrayStd

    X_train_norm = allsongsstandardized
    X_train_norm = np.flip(np.rot90(X_train_norm, 3))

    dataToDisplay = np.flip(np.rot90(dataArray, 3))

    # allsongs: list:3799 x 8\
    # one row per song
    # [[0.469, 0.625, 4, -5.381, 0, 0.0306, 0.00515, 2.03e-05],
    # [0.76, 0.608, 9, -8.673, 0, 0.0347, 0.315, 0.79],
    # [0.598, 0.509, 9, -9.719, 1, 0.0269, 0.593, 0.0503],

    kmeans = KMeans(n_clusters=7)
    kmeans.fit(X_train_norm)

    predict = kmeans.predict(X_train_norm)

    centroids = kmeans.cluster_centers_
    correct = 0
    # for i in range(len(X1)):
    #    predict_me = np.array(X1[i].astype(float))
    #    predict_me = predict_me.reshape(-1, len(predict_me))
    #    prediction = kmeans.predict(predict_me)
    #    print(prediction[0])

    cs2 = kmeans.labels_.astype(float)

    fig = go.Figure(data=go.Splom(
        dimensions=[dict(label=keys[0],
                         values=dataToDisplay[:, 0]),
                    dict(label=keys[1],
                         values=dataToDisplay[:, 1]),
                    dict(label=keys[2],
                         values=dataToDisplay[:, 2]),
                    dict(label=keys[3],
                         values=dataToDisplay[:, 3]),
                    dict(label=keys[4],
                         values=dataToDisplay[:, 4]),
                    dict(label=keys[5],
                         values=dataToDisplay[:, 5]),
                    dict(label=keys[6],
                         values=dataToDisplay[:, 6]),
                    dict(label=keys[7],
                         values=dataToDisplay[:, 7])
                    ],

        marker=dict(color=cs2,
                    showscale=False,  # colors encode categorical variables
                    line_color='white', line_width=0.5)
    ))
    fig.show()

    return fig
def create_figure_backup():
    dataOrig = analyze.loadAudioFeatures()
    fullLib = analyze.loadLibraryFromFiles()
    # list: 3799 of dict:18
    # [{'danceability': 0.469, 'energy': 0.625, 'key': 4, 'loudness': -5.381, 'mode': 0, 'speechiness': 0.0306, 'acousticness': 0.00515, 'instrumentalness': 2.03e-05, 'liveness': 0.0682, 'valence': 0.325, 'tempo': 76.785, 'type': 'audio_features', 'id': '6PBzdsMi6YNdYAevzozBRi', 'uri': 'spotify:track:6PBzdsMi6YNdYAevzozBRi', 'track_href': 'https://api.spotify.com/v1/tracks/6PBzdsMi6YNdYAevzozBRi', 'analysis_url': 'https://api.spotify
    #  {'danceability': 0.76, 'energy': 0.608, 'key': 9, 'loudness': -8.673, 'mode': 0, 'speechiness': 0.0347, 'acousticness': 0.315, 'instrumentalness': 0.79, 'liveness': 0.121, 'valence': 0.727, 'tempo': 119.032, 'type': 'audio_features', 'id': '4dJYJTPbUgFK5pCQ5bYD4g', 'uri': 'spotify:track:4dJYJTPbUgFK5pCQ5bYD4g', 'track_href': 'https://api.spotify.com/v1/tracks/4dJYJTPbUgFK5pCQ5bYD4g', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4dJYJTPbUgFK5pCQ5bYD4g', 'duration_ms': 254118, 'time_signature': 4}
    #  {'danc..
    dtype = [('danceability', '<f8'), ('energy', '<f8'), ('key', '<f8'), ('loudness', '<f8'), ('mode', '<f8'),
             ('speechiness', '<f8'), ('acousticness', '<f8'), ('instrumentalness', '<f8'), ('liveness', '<f8'),
             ('valence', '<f8'),
             ('tempo', '<f8'), ('type', '<f8'), ('id', '<f8'), ('duration_ms', '<f8'), ('time_signature', '<f8'), ]

    keys = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness']
    keys = ['danceability', 'energy', 'key', 'loudness', 'valence', 'speechiness', 'tempo', 'time_signature']

    # keys = ['danceability', 'energy', 'loudness']
    # keys = ['danceability', 'energy']

    # ('danceability','energy','key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
    #      'valence', 'tempo'):

    dataArray = []
    for key in dataOrig[0]:
        if key in keys:
            # data[key] = [li[key] for li in dataOrig]
            dataArray.append([li[key] for li in dataOrig])

    # dataArray list:8  3799
    # one row per audio feature
    # [[0.469, 0.76, 0.598, 0.706, 0.756, 0.555, 0.53, 0.716, 0.481, 0.415, 0.684, 0.593, 0.395, 0.487, 0.671, 0.691, 0.155, 0.61, 0.171, 0.203, 0.181,
    #  [0.625, 0.608, 0.509, 0.653, 0.549, 0.71, 0.362, 0.685, 0.491, 0.42, 0.62, 0.626, 0.704, 0.757, 0.603, 0.669, 0
    #  [4, 9, 9, 7, 7, 10, 5, 4, 11, 3, 0, 4, 5, 0, 4, 1, 10, 11, 7, 2, 10, 10, 10, 0, 8, 9, 11, 6, 11, 6, 10, 1, 0, 3, 0,

    dataArray = np.array(dataArray)
    dataArrayMean = np.mean(dataArray)
    dataArrayStd = np.std(dataArray)
    allsongsstandardized = (dataArray - dataArrayMean) / dataArrayStd

    X_train_norm = allsongsstandardized
    X_train_norm = np.flip(np.rot90(X_train_norm, 3))

    dataToDisplay = np.flip(np.rot90(dataArray, 3))

    # allsongs = []
    # for songOrig in dataArray:
    #    song = []
    #    for key in keys:
    #        song.append(dataArray[key])
    #    allsongs.append(song)

    # allsongs: list:3799 x 8\
    # one row per song
    # [[0.469, 0.625, 4, -5.381, 0, 0.0306, 0.00515, 2.03e-05],
    # [0.76, 0.608, 9, -8.673, 0, 0.0347, 0.315, 0.79],
    # [0.598, 0.509, 9, -9.719, 1, 0.0269, 0.593, 0.0503],

    # X1 = np.array(dataArray)
    # y = np.array(dataArray2)

    # kmeans = KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=3000,
    #      n_clusters=5, n_init=10, n_jobs=1, precompute_distances='auto',
    #     random_state=None, tol=0.0001, verbose=0)
    kmeans = KMeans(n_clusters=7)
    kmeans.fit(X_train_norm)

    predict = kmeans.predict(X_train_norm)
    # data['cluster'] = predict

    # df = px.data.gapminder().query("country=='Canada'")
    # fig = px.line(df, x="year", y="lifeExp", title='Life expectancy in Canada')
    # fig.show()

    # fig = go.Figure(data=X_train_norm.__array__())
    # fig.write_html('first_figure.html', auto_open=True)

    # fig = px.scatter(kmeans.cluster_centers_)
    # pd.plotting.parallel_coordinates(pd.array(X_train_norm),0)
    # plt.show()
    # print(numpy.info(X1))

    centroids = kmeans.cluster_centers_
    correct = 0
    # for i in range(len(X1)):
    #    predict_me = np.array(X1[i].astype(float))
    #    predict_me = predict_me.reshape(-1, len(predict_me))
    #    prediction = kmeans.predict(predict_me)
    #    print(prediction[0])

    # print(correct / len(X1))
    X2 = dataArray[0]

    nCols = len(X2)
    nRows = dataArray.shape[0]

    # colors = cm.rainbow(np.linspace(0, 1, len(dataArray)))

    # cs1 = [colors[i // len(dataArray)] for i in range(len(dataArray) * len(dataArray))]  # could be done with numpy's repmat
    cs2 = kmeans.labels_.astype(float)
    # cs3 = cs2 ** nRows
    # cs3 = np.repeat(cs2, nRows)
    # Xs1 = dataArray * nRows  # use list multiplication for repetition

    fig = go.Figure()

    # fig.add_trace(go.Scatter(x=dataArray[0], y=dataArray[1] ** 2, mode='markers', marker_color=cs2))
    # fig.show()

    fig = go.Figure(data=go.Splom(
        dimensions=[dict(label=keys[0],
                         values=dataToDisplay[:, 0]),
                    dict(label=keys[1],
                         values=dataToDisplay[:, 1]),
                    dict(label=keys[2],
                         values=dataToDisplay[:, 2]),
                    dict(label=keys[3],
                         values=dataToDisplay[:, 3]),
                    dict(label=keys[4],
                         values=dataToDisplay[:, 4]),
                    dict(label=keys[5],
                         values=dataToDisplay[:, 5]),
                    dict(label=keys[6],
                         values=dataToDisplay[:, 6]),
                    dict(label=keys[7],
                         values=dataToDisplay[:, 7])
                    ],

        marker=dict(color=cs2,
                    showscale=False,  # colors encode categorical variables
                    line_color='white', line_width=0.5)
    ))
    fig.show()

    for i, center in enumerate(kmeans.cluster_centers_):
        j = i % len(X_train_norm[0])
        k = (i + 1) % len(X_train_norm[0])
        # plt.figure(i)
        # plt.suptitle("scatterplot "+str(i)+" "+str(j)+":"+str(k))
        # plt.scatter(X_train_norm[:, j], X_train_norm[:, k], c=cs2, s=5, alpha=0.4)
        # plt.scatter(centroids[:,j], centroids[:,k], c='black', s=5)
        fig.add_trace(go.Scatter(x=X_train_norm[:, j], y=np.arange(min(X_train_norm[:, j]), max(X_train_norm[:, j])),
                                 mode='lines'))
        # fig.add_trace(go.Scatter(centroids[:,j], 'b.', markersize=2))

    fig.show()

    # plt.scatter(X_train_norm[:, 0], X_train_norm[:, 1], c=cs2, s=5, alpha=0.4)
    # plt.scatter(X_train_norm[:, 0], X_train_norm[:, 2], c=cs2, s=5, alpha=0.4)

    # plt.plot(allsongsstandardized)
    # plt.figure(2)
    # plt.plot(dataArray[0],' r.', markersize=1)
    # plt.figure(3)
    # plt.plot(dataArray[1], 'b.', markersize=1)
    # plt.figure(4)
    # plt.plot(dataArray[2], 'y.', markersize=1)

    # plt.scatter(dataArray[0], dataArray[3], c="blue", alpha=0.1)

    # plt.figure(5)
    # plt.scatter(dataArray[0], dataArray[0], c="blue", alpha=0.1)

    # plt.subplot(321, label="one")
    # plt.hist(dataArray[0], bins=200)
    # plt.title("exess")
    # plt.subplot(322, label="two")
    # plt.hist(dataArray[1], bins=200)
    # plt.title("222222")
    # plt.subplot(323)
    # plt.hist(dataArray[2], bins=200)
    # plt.title("ex333333ess")

    # plt.scatter(Xs1[1], Ys[1], c="blue", alpha=0.1)

    # plt.scatter(Xs1, Ys.flatten(), color=cs)

    # plt.grid(True)
    # plt.show()

    clusteredSongs = [[] for i in range(kmeans.n_clusters)]
    for i, cluster in enumerate(cs2):
        songCluster = clusteredSongs[int(cluster)]
        track = next((item for item in fullLib['tracks'] if item['track']['id'] == dataOrig[i]['id']), None)
        if (track is not None):
            songCluster.append({**track, **dataOrig[i]})
            # print(str(i)+' '+str(track['track']['artists'][0]['name'])+ ' - '+
            #      str(track['track']['album']['name'])+ ' - '+
            #     str(track['track']['name'])+' song '+str(dataOrig[i])+' ' )

    return fig
def create_top_artists_graph(dataPath):
    fullLib = analyze.loadLibraryFromFiles(dataPath)

    topartists = []

    topartists.append([art['name'] for i, art in enumerate(fullLib['topartists_long_term'])][:50])
    topartists.append([art['name'] for art in fullLib['topartists_medium_term']][:50])
    topartists.append([art['name'] for art in fullLib['topartists_short_term']][:20])

    #topartists = np.array(topartists)
    #topartists = topartists.reshape(-1,1)
    topartistsranking = []
    topartistsrankingM = {}

    for i, artistlist in enumerate(topartists):
        topartistsranking.append([])
        for j, artist in enumerate(artistlist):
            topartistsrankingM.setdefault(artist, [None, None, None])

    for i in range(50):
        for j in range(3):
            artist = topartists[j][i:i+1]
            if len(artist) > 0:
                a = topartistsrankingM.get(artist[0])
                a[j] = i+1 # so that most favorite artist is at position 1 instead of 0

    #print(' done')

    #colors = [plotly.colors.DEFAULT_PLOTLY_COLORS[random.randrange(1, 10)] for i in range(len(topartistsrankingM))]
    color = ('rgba(' + str(np.random.randint(0, high=200)) + ',' +
             str(np.random.randint(0, high=250)) + ',' +
             str(np.random.randint(0, high=100)))

    colors = ['rgba(200,121,121','rgba(60,121,60','rgba(121,60,256','rgba(33,33,33',
              'rgba(200,180,60','rgba(121,200,33','rgba(33,121,255','rgba(121,66,33',
              'rgba(255,60,121', 'rgba(33,170,121', 'rgba(0,66,66', 'rgba(121,66,121',
              'rgba(255,60,0', 'rgba(33,155,0', 'rgba(66,200,0', 'rgba(121,66,0',
              'rgba(255,0,33','rgba(66,121,255','rgba(66,66,255','rgba(66,200,200']


    #weights = [random.randint(15, 35) for i in range(50)]
    xaxis = ['Long Term (several years)','Medium Term (last six months)', 'Very recent (last four weeks)']
    data = []
    for i, (artist, yaxis)  in enumerate(topartistsrankingM.items()):
        #weights2 = [*range(11, len(topartistsrankingM)+5)]
        #weights3 = [x/2 for x in range(1,len(topartistsrankingM))]
        weights2 = 10
        #color = colors[i % len(colors)]
        red = 200*((len(topartistsrankingM)-i)/len(topartistsrankingM))
        color = ('rgba(' + str(red) + ',' + \
                 str(np.random.randint(0, high=100)) + ',' + \
                 str(np.random.randint(0, high=120)))
        #color = yaxis
        #visibletrace = 'legendonly'
        yaxisStrings = yaxis.copy()
        for i,y in enumerate(yaxisStrings):
            if y is None:
                yaxisStrings[i] = '--'
            else:
                yaxisStrings[i] = str(yaxis[i])

        hovertemplatetext = '<b>' + artist + ' </b><br>All time popularity: ' + yaxisStrings[0] + \
               '<br>In the last six months: ' + yaxisStrings[1] + '<br>Recently: ' + yaxisStrings[2] + \
               '<extra></extra>'
        if len(list(filter(None, yaxis))) > 1:
            visibletrace = True
            mode = "markers+text+lines"
            text = ['<b>'+artist+'</b>','<b>'+artist+'</b>','<b>'+artist+'</b>']
            hovertemplate=hovertemplatetext
            marker = {'opacity': 0.1, 'size': 11, 'symbol': 'diamond-wide-dot'}
        else:
            mode = "markers+lines"
            text= ''
            visibletrace = True
            hovertemplate=hovertemplatetext
            marker = {'opacity': 0.7, 'size': 11, 'symbol': 'diamond-wide-dot'}
        #print("")

        data.append(go.Scatter(x=xaxis, y=yaxis,
                                mode=mode,
                                marker=marker,
                                line={'color':color+',0.5)', 'width':1, 'dash':'dot'},
                               #line={'width': 1, 'dash': 'dot'},
                                name = artist,
                                #hoverinfo='text',
                                text=text,
                                #hovertext = text,
                               hovertemplate=hovertemplate,
                               #color=color,
                                textfont={'size': weights2, 'color': color+',1)'},
                               #textfont={'size': weights2},
                               visible=visibletrace
                               )
                    )

    tickvals = [1,5,10,15,20,25,30,35,40,45,50]
    ticktext = ['1', '5', '10', 'Seven', 'Nine', 'Eleven']

    default_linewidth = 2
    highlighted_linewidth_delta = 2
    layout = go.Layout({'title':'Change of artist popularity over time',
                        'font':{ 'size':15},
                        'xaxis': {'showgrid': False, 'showticklabels': True, 'zeroline': False},
                        'yaxis': {'showgrid': True, 'showticklabels': True, 'zeroline': False,
                                  'autorange':'reversed',
                                  #'tick0':1, 'dtick':'2', 'nticks':10,
                                  'tickvals': tickvals,
                                  #'tickmode':'array', 'tickvals':tickvals, 'ticktext':ticktext
                                  },
                        'legend':{ 'font':{ 'size':11 }},
                        'hovermode':'y',
                        'hoverlabel_align' : 'right',
                        #'width' : 1200,
                        'height' : 750,
                        'margin' : { 'l':50, 'r':50, 'b':50, 't':80,'pad':4 }
    })

    #fig = go.Figure(data=data, layout=layout)
    #fig.update_yaxes(automargin=True)
    #fig.update_traces(textposition='top right')

    fig = go.FigureWidget(data=data, layout=layout)
    fig.layout.hovermode = 'closest'
    fig.layout.hoverdistance = -1  # ensures no "gaps" for selecting sparse data

    #fig.show()
    graphJSON = fig.to_json()
    return graphJSON