# print(len(dp.get_all())) # print(sum(1 if song['id3'] else 0 # for song in map(dp.get_by_id, dp.get_all()))) # for song in dp.get_all(): # song_data = dp.get_by_id(song) # if song_data['id3']: # pass # exit() all_parsers = broadcast([id3_v2_update(), last_fm_update(), echo_nest_update() ]) # , ]) # , librosa_update()]) for i, song in enumerate(dp.get_all(), 1): song_data = dp.get_by_id(song) # songs_to_print.add((song_data['id3']['artist'], # song_data['id3']['album'], # song_data['id3']['title'])) all_parsers.send(song_data) if not (i % 50): dp.save_data() print(i) try: all_parsers.send(STOP) except StopIteration: pass dp.save_data() print('{} songs updated'.format(len(dp.get_all()))) # for i in sorted(songs_to_print):
import matplotlib.pyplot as plt from collections import defaultdict from dataproviders import HardDriveProvider from scipy import cluster dp = HardDriveProvider('music') artist_tags = defaultdict(lambda: []) song_tags = defaultdict(lambda: []) artist_plays = defaultdict(lambda: 0) track_plays = defaultdict(lambda: 0) track_listeners = defaultdict(lambda: 0) album_listeners = defaultdict(lambda: 0) album_plays = defaultdict(lambda: 0) song_to_album = defaultdict(lambda: '') for song_path in dp.get_all(): song = dp.get_by_id(song_path) song_id3 = song['id3'] if song_id3: artist_title = song_id3['artist'] s_tags = [tag_str.strip() for tag_str in song_id3['genre'].split(';')] song_tags[song_path].extend(s_tags) artist_tags[artist_title].extend(s_tags) song_fm = song['lastfm'] if song_fm: album_title = song_fm['album'] if album_title: song_to_album[song_path] = album_title album_listeners[album_title] += int(song_fm['albumlisteners'])
# print(len(dp.get_all())) # print(sum(1 if song['id3'] else 0 # for song in map(dp.get_by_id, dp.get_all()))) # for song in dp.get_all(): # song_data = dp.get_by_id(song) # if song_data['id3']: # pass # exit() all_parsers = broadcast( [id3_v2_update(), last_fm_update(), echo_nest_update()]) # , ]) # , librosa_update()]) for i, song in enumerate(dp.get_all(), 1): song_data = dp.get_by_id(song) # songs_to_print.add((song_data['id3']['artist'], # song_data['id3']['album'], # song_data['id3']['title'])) all_parsers.send(song_data) if not (i % 50): dp.save_data() print(i) try: all_parsers.send(STOP) except StopIteration: pass dp.save_data() print('{} songs updated'.format(len(dp.get_all()))) # for i in sorted(songs_to_print):
from gensim import corpora, models from itertools import chain from scipy import cluster dp = HardDriveProvider('music') artist_tags = defaultdict(lambda: Counter()) song_tags = defaultdict(lambda: Counter()) artist_plays = defaultdict(lambda: 0) track_plays = defaultdict(lambda: 0) track_listeners = defaultdict(lambda: 0) album_listeners = defaultdict(lambda: 0) album_plays = defaultdict(lambda: 0) song_to_album = defaultdict(lambda: '') for song_path in dp.get_all(): song = dp.get_by_id(song_path) song_id3 = song['id3'] if song_id3: artist_title = song_id3['artist'] s_tags = [tag_str.strip() for tag_str in song_id3['genre'].split(';')] song_tags[song_path].update(s_tags) artist_tags[artist_title].update(s_tags) song_fm = song['lastfm'] if song_fm: album_title = song_fm['album'] if album_title: song_to_album[song_path] = album_title album_listeners[album_title] += int(song_fm['albumlisteners'])