forked from echonest/msd-examples
/
lmatch.py
71 lines (57 loc) · 2 KB
/
lmatch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
A map-reduce that calculates the difference in
average volume between the first and the second
half of the song.
"""
from mrjob.job import MRJob
import track
from itertools import imap
import math
import tools
import sys
# if YIELD_ALL is true, we yield all densities, otherwise,
# we yield just the extremes
YIELD_ALL = False
class MRlmatch(MRJob):
""" A map-reduce job that calculates the ramp factor """
DUMP = False
SIZE = 64
VECTOR = True
#MATCH = tools.rnormalize(tools.scale(tools.sin2wave(SIZE), 60, -60), -60, 0)
MATCH = tools.rnormalize(tools.scale(tools.sinwave(SIZE), 60, -60), -60, 0)
def mapper(self, _, line):
""" The mapper loads a track and yields its ramp factor """
t = track.load_track(line)
segments = t['segments']
duration = t['duration']
xdata = []
ydata = []
for i in xrange(len(segments)):
seg = segments[i]
sloudness = seg['loudness_max']
sstart = seg['start'] + seg['loudness_max_time']
xdata.append( sstart )
ydata.append( sloudness )
if duration > 20:
idata = tools.interpolate(xdata, ydata, int(duration) * 10)
smooth = tools.smooth(idata, 20)
samp = tools.sample(smooth, self.SIZE)
ndata = tools.rnormalize(samp, -60, 0)
if self.DUMP:
for i, (x, y) in enumerate(zip(self.MATCH, ndata)):
print i, x, y
if self.VECTOR:
yield (t['artist_name'], t['title'], t['track_id']), ndata
else:
distance = tools.distance(self.MATCH, ndata)
yield (t['artist_name'], t['title'], t['track_id']), distance
# no need for a reducer
#def reducer(self, key, val):
#yield (key, sum(val))
def dump():
data = tools.rnormalize(tools.scale(tools.sin2wave(256), 60, -60), -60, 0)
for d in data:
print d
if __name__ == '__main__':
#dump()
MRlmatch.run()