@coroutine def broadcast(targets): while True: message = yield for it in targets: try: it.send(message) except StopIteration: targets.remove(it) continue if message == STOP: break # songs_to_print = set() for dataset_path in ['music']: dp = HardDriveProvider(dataset_path, True) # print(len(dp.get_all())) # print(sum(1 if song['id3'] else 0 # for song in map(dp.get_by_id, dp.get_all()))) # for song in dp.get_all(): # song_data = dp.get_by_id(song) # if song_data['id3']: # pass # exit() all_parsers = broadcast([id3_v2_update(), last_fm_update(), echo_nest_update() ]) # , ]) # , librosa_update()]) for i, song in enumerate(dp.get_all(), 1):
from dataproviders import HardDriveProvider def id3_tabbed(j): return '\t'.join(()) def echonest_tabbed(j): return '\t'.join(()) def lastfm_tabbed(j): return '\t'.join(()) NAME = 'music' data = HardDriveProvider(NAME) with open(NAME + '.tsv', 'w') as w: for d in data.get_all(): w.write('{}\n'.format('\t'.join((id3_tabbed(d), lastfm_tabbed(d), echonest_tabbed(d)))))
import matplotlib.pyplot as plt from collections import defaultdict from dataproviders import HardDriveProvider from scipy import cluster dp = HardDriveProvider('music') artist_tags = defaultdict(lambda: []) song_tags = defaultdict(lambda: []) artist_plays = defaultdict(lambda: 0) track_plays = defaultdict(lambda: 0) track_listeners = defaultdict(lambda: 0) album_listeners = defaultdict(lambda: 0) album_plays = defaultdict(lambda: 0) song_to_album = defaultdict(lambda: '') for song_path in dp.get_all(): song = dp.get_by_id(song_path) song_id3 = song['id3'] if song_id3: artist_title = song_id3['artist'] s_tags = [tag_str.strip() for tag_str in song_id3['genre'].split(';')] song_tags[song_path].extend(s_tags) artist_tags[artist_title].extend(s_tags) song_fm = song['lastfm'] if song_fm: album_title = song_fm['album'] if album_title: song_to_album[song_path] = album_title album_listeners[album_title] += int(song_fm['albumlisteners'])
def broadcast(targets): while True: message = yield for it in targets: try: it.send(message) except StopIteration: targets.remove(it) continue if message == STOP: break # songs_to_print = set() for dataset_path in ['VkDataset #5']: dp = HardDriveProvider(dataset_path) # print(len(dp.get_all())) # print(sum(1 if song['id3'] else 0 # for song in map(dp.get_by_id, dp.get_all()))) # for song in dp.get_all(): # song_data = dp.get_by_id(song) # if song_data['id3']: # pass # exit() all_parsers = broadcast( [id3_v2_update(), last_fm_update(), echo_nest_update()]) # , ]) # , librosa_update()]) for i, song in enumerate(dp.get_all(), 1):
import matplotlib.pyplot as plt from collections import defaultdict, Counter from dataproviders import HardDriveProvider from gensim import corpora, models from itertools import chain from scipy import cluster dp = HardDriveProvider('music') artist_tags = defaultdict(lambda: Counter()) song_tags = defaultdict(lambda: Counter()) artist_plays = defaultdict(lambda: 0) track_plays = defaultdict(lambda: 0) track_listeners = defaultdict(lambda: 0) album_listeners = defaultdict(lambda: 0) album_plays = defaultdict(lambda: 0) song_to_album = defaultdict(lambda: '') for song_path in dp.get_all(): song = dp.get_by_id(song_path) song_id3 = song['id3'] if song_id3: artist_title = song_id3['artist'] s_tags = [tag_str.strip() for tag_str in song_id3['genre'].split(';')] song_tags[song_path].update(s_tags) artist_tags[artist_title].update(s_tags) song_fm = song['lastfm'] if song_fm: album_title = song_fm['album']