# print(len(dp.get_all()))
    # print(sum(1 if song['id3'] else 0
    # for song in map(dp.get_by_id, dp.get_all())))
    # for song in dp.get_all():
    #     song_data = dp.get_by_id(song)
    #     if song_data['id3']:
    #         pass
    #     exit()
    all_parsers = broadcast([id3_v2_update(),
                             last_fm_update(),
                             echo_nest_update()
                             ])
    # , ])
    # , librosa_update()])
    for i, song in enumerate(dp.get_all(), 1):
        song_data = dp.get_by_id(song)
        # songs_to_print.add((song_data['id3']['artist'],
        #                     song_data['id3']['album'],
        #                     song_data['id3']['title']))
        all_parsers.send(song_data)
        if not (i % 50):
            dp.save_data()
            print(i)
    try:
        all_parsers.send(STOP)
    except StopIteration:
        pass
    dp.save_data()
    print('{} songs updated'.format(len(dp.get_all())))

# for i in sorted(songs_to_print):
import matplotlib.pyplot as plt
from collections import defaultdict
from dataproviders import HardDriveProvider
from scipy import cluster

dp = HardDriveProvider('music')
artist_tags = defaultdict(lambda: [])
song_tags = defaultdict(lambda: [])
artist_plays = defaultdict(lambda: 0)
track_plays = defaultdict(lambda: 0)
track_listeners = defaultdict(lambda: 0)
album_listeners = defaultdict(lambda: 0)
album_plays = defaultdict(lambda: 0)
song_to_album = defaultdict(lambda: '')
for song_path in dp.get_all():
    song = dp.get_by_id(song_path)

    song_id3 = song['id3']
    if song_id3:
        artist_title = song_id3['artist']
        s_tags = [tag_str.strip() for tag_str in
                  song_id3['genre'].split(';')]
        song_tags[song_path].extend(s_tags)
        artist_tags[artist_title].extend(s_tags)

    song_fm = song['lastfm']
    if song_fm:
        album_title = song_fm['album']
        if album_title:
            song_to_album[song_path] = album_title
            album_listeners[album_title] += int(song_fm['albumlisteners'])
    # print(len(dp.get_all()))
    # print(sum(1 if song['id3'] else 0
    # for song in map(dp.get_by_id, dp.get_all())))
    # for song in dp.get_all():
    #     song_data = dp.get_by_id(song)
    #     if song_data['id3']:
    #         pass
    #     exit()
    all_parsers = broadcast(
        [id3_v2_update(),
         last_fm_update(),
         echo_nest_update()])
    # , ])
    # , librosa_update()])
    for i, song in enumerate(dp.get_all(), 1):
        song_data = dp.get_by_id(song)
        # songs_to_print.add((song_data['id3']['artist'],
        #                     song_data['id3']['album'],
        #                     song_data['id3']['title']))
        all_parsers.send(song_data)
        if not (i % 50):
            dp.save_data()
            print(i)
    try:
        all_parsers.send(STOP)
    except StopIteration:
        pass
    dp.save_data()
    print('{} songs updated'.format(len(dp.get_all())))

# for i in sorted(songs_to_print):
from gensim import corpora, models
from itertools import chain
from scipy import cluster

dp = HardDriveProvider('music')
artist_tags = defaultdict(lambda: Counter())
song_tags = defaultdict(lambda: Counter())

artist_plays = defaultdict(lambda: 0)
track_plays = defaultdict(lambda: 0)
track_listeners = defaultdict(lambda: 0)
album_listeners = defaultdict(lambda: 0)
album_plays = defaultdict(lambda: 0)
song_to_album = defaultdict(lambda: '')
for song_path in dp.get_all():
    song = dp.get_by_id(song_path)

    song_id3 = song['id3']
    if song_id3:
        artist_title = song_id3['artist']
        s_tags = [tag_str.strip() for tag_str in
                  song_id3['genre'].split(';')]
        song_tags[song_path].update(s_tags)
        artist_tags[artist_title].update(s_tags)

    song_fm = song['lastfm']
    if song_fm:
        album_title = song_fm['album']
        if album_title:
            song_to_album[song_path] = album_title
            album_listeners[album_title] += int(song_fm['albumlisteners'])