-
Notifications
You must be signed in to change notification settings - Fork 2
/
lookup.py
82 lines (71 loc) · 2.64 KB
/
lookup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import time
import datetime
import argparse
import sqlite3
import json
import logging
logfmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
lookuplog = logging.Logger('lookup')
ch = logging.StreamHandler()
ch.setFormatter(logfmt)
lookuplog.setLevel(logging.INFO)
lookuplog.addHandler(ch)
import echonest
import util
def save(folder, id, data):
outdir = os.path.join(folder, id[:4])
outfile = os.path.join(outdir, "%s.json" % id)
util.mkdir_p(outdir)
json.dump(data, open(outfile, "w"))
def load_songs(sqlfname, offset):
conn = sqlite3.connect(sqlfname)
c = conn.cursor()
q = "select distinct(song_id) from songs limit -1 offset %s" % (offset, )
songs = []
for row in c.execute(q):
songs.append(row[0])
return songs
def lookup_song(songid):
output_dir = "songs"
lookuplist = []
for s in songid:
outdir = os.path.join(output_dir, s[:4])
outfile = os.path.join(outdir, "%s.json" % s)
if not os.path.exists(outfile):
lookuplist.append(s)
if lookuplist:
data = echonest.song_by_enid(songid)
results = data["response"].get("songs", [])
for s in results:
songid = s["id"]
outdir = os.path.join(output_dir, songid[:4])
save(output_dir, songid, s)
def status_iter(iterable, callback, chunksize=1, reportsize=10):
itersize = len(iterable)
starttime = time.time()
for i, item in enumerate(util.chunks(iterable, chunksize), 1):
callback(item)
if i % reportsize == 0:
done = i * chunksize
nowtime = time.time()
numblocks = itersize * 1.0 / (reportsize*chunksize)
curblock = done / (reportsize*chunksize)
position = curblock / numblocks
duration = round(nowtime - starttime)
durdelta = datetime.timedelta(seconds=duration)
remaining = round((duration / position) - duration)
remdelta = datetime.timedelta(seconds=remaining)
lookuplog.info("Done %s/%s in %s; %s remaining", done, itersize, str(durdelta), str(remdelta))
lookuplog.info("Finished")
def main(args):
lookuplog.info("loading...")
songs = load_songs(args.sqldb, args.offset)
lookuplog.info("got %s songs to process", len(songs))
status_iter(songs, lookup_song, 10, 10)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Look up echonest data")
parser.add_argument("sqldb", type=str, help="The MSD sqlite metadata file")
parser.add_argument("offset", type=int, nargs='?', default=0, help="offset into the song db")
args = parser.parse_args()
main(args)