def read_msd_index(): index = {} name_index = {} with open('/Users/andreasj/phd/data/msd/index.txt') as f: for line in f: track_id, _, artist, title = line.strip().split('<SEP>') key = (fuzzy(artist), fuzzy(title)) index[key] = track_id name_index[key] = '%s - %s' % (artist, title) return index, name_index
def read_k400(): index = defaultdict(list) regexp = re.compile(r'_(b?tab[\._]|crd|ver[0-9]|acoustic)') with open('/Users/andreasj/phd/data/400k/index.txt') as f: for line in f: filename = line.strip()[2:] if filename == 'index.txt': continue _, artist, title = filename.split('/') title = regexp.split(title)[0] index[(fuzzy(artist), fuzzy(title))].append(filename) return index