def setUp(self): self._session = Session( 'test', {'genre': (None, DummyDistanceFunction(), 1.0)}) self.N = 10 with self._session.transaction(): for idx in range(self.N): self._session.add({'genre': self.N - idx})
def create_session(name): print('-- No saved session found, loading new.') session = Session( name='demo', mask={ # Each entry goes like this: 'Genre': pairup( # Pratice: Go lookup what this Providers does. GenreTreeProvider(), # Practice: Same for the DistanceFunction. GenreTreeDistance(), # This has the highest rating of the three attributes: 8 ), 'Title': pairup( # We can also compose Provider, so that the left one # gets the input value, and the right one the value # the left one processed. # In this case we first split the title in words, # then we stem each word. WordlistProvider() | StemProvider(), WordlistDistance(), 1 ), 'Artist': pairup( # If no Provider (None) is given the value is forwarded as-is. # Here we just use the default provider, but enable # compression. Values are saved once and are givean an ID. # Duplicate items get the same ID always. # You can trade off memory vs. speed with this. ArtistNormalizeProvider(compress=True), # If not DistanceFunctions is given, all values are # compare with __eq__ - which might give bad results. None, 1 ) } ) # As in our first example we fill the session, but we dont insert the full # database, we leave out the last song: with session.transaction(): for idx, (artist, title, genre) in enumerate(MY_DATABASE[:3]): # Notice how we use the uppercase keys like above: session.mapping[session.add({ 'Genre': genre, 'Title': title, 'Artist': artist, })] = idx return session
def __init__(self, name='EasySession', disabled_attrs=None): mask = { 'artist': pairup( ArtistNormalizeProvider(compress=True), None, 0.5, ), 'album': pairup( AlbumNormalizeProvider(compress=True), None, 0.5, ), 'title': pairup( TitleNormalizeProvider(compress=False) | StemProvider(), LevenshteinDistance(), 1, ), 'date': pairup(DateProvider(), DateDistance(), 2), 'bpm': pairup(BPMCachedProvider(), BPMDistance(), 3), 'lyrics': pairup(KeywordsProvider(), KeywordsDistance(), 3), 'rating': pairup(None, RatingDistance(), 2), 'genre': pairup(GenreTreeProvider(), GenreTreeAvgDistance(), 4), 'moodbar': pairup(MoodbarAudioFileProvider(), MoodbarDistance(), 5) } if not check_for_moodbar(): logging.warning('Disabling moodbar attr, no binary found in PATH.') del mask['moodbar'] if not check_for_bpmtools(): logging.warning("Disabling bpm attr, no binary found in PATH.") del mask['bpm'] for disabled_attr in disabled_attrs or []: try: del mask[disabled_attr] except KeyError: pass Session.__init__(self, name, mask)
def create_session(name): print('-- No saved session found, loading new.') session = Session( name='demo', mask={ # Each entry goes like this: 'Genre': pairup( # Pratice: Go lookup what this Providers does. GenreTreeProvider(), # Practice: Same for the DistanceFunction. GenreTreeDistance(), # This has the highest rating of the three attributes: 8), 'Title': pairup( # We can also compose Provider, so that the left one # gets the input value, and the right one the value # the left one processed. # In this case we first split the title in words, # then we stem each word. WordlistProvider() | StemProvider(), WordlistDistance(), 1), 'Artist': pairup( # If no Provider (None) is given the value is forwarded as-is. # Here we just use the default provider, but enable # compression. Values are saved once and are givean an ID. # Duplicate items get the same ID always. # You can trade off memory vs. speed with this. ArtistNormalizeProvider(compress=True), # If not DistanceFunctions is given, all values are # compare with __eq__ - which might give bad results. None, 1) }) # As in our first example we fill the session, but we dont insert the full # database, we leave out the last song: with session.transaction(): for idx, (artist, title, genre) in enumerate(MY_DATABASE[:3]): # Notice how we use the uppercase keys like above: session.mapping[session.add({ 'Genre': genre, 'Title': title, 'Artist': artist, })] = idx return session
def test_find_matching_attributes_numeric(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session('session_find_test', { 'x': pairup(None, None, 1), 'y': pairup(None, None, 1) }) a = session[session.add({ 'x': 21, 'y': 42, })] b = session[session.add({ 'x': 0, 'y': 100, })] session[session.add({ 'x': 51, 'y': 50, })] self.assertEqual( list( session.database.find_matching_attributes_numeric( {'x': 10}, 20)), [a, b]) self.assertEqual( list( session.database.find_matching_attributes_numeric( {'y': 100}, 0)), [b]) self.assertEqual( list( session.database.find_matching_attributes_numeric( { 'x': 10, 'y': 40 }, 20)), [a]) self.assertEqual( list( session.database.find_matching_attributes_numeric( { 'x': 10, 'y': 10 }, 0, )), [])
def test_find_matching_attributes_numeric(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session('session_find_test', { 'x': pairup(None, None, 1), 'y': pairup(None, None, 1) }) a = session[session.add({ 'x': 21, 'y': 42, })] b = session[session.add({ 'x': 0, 'y': 100, })] session[session.add({ 'x': 51, 'y': 50, })] self.assertEqual(list(session.database.find_matching_attributes_numeric( {'x': 10}, 20 )), [a, b] ) self.assertEqual(list(session.database.find_matching_attributes_numeric( {'y': 100}, 0 )), [b] ) self.assertEqual(list(session.database.find_matching_attributes_numeric( {'x': 10, 'y': 40}, 20 )), [a] ) self.assertEqual(list(session.database.find_matching_attributes_numeric( {'x': 10, 'y': 10}, 0, )), [] )
def setUp(self): self._session = Session('test', { 'a': (None, None, 1.0), 'b': (None, None, 1.0), 'c': (None, None, 1.0), 'd': (None, None, 1.0), 'e': (None, None, 1.0), 'f': (None, None, 1.0) })
def setUp(self): self._session = Session('test', { 'genre': (None, DummyDistanceFunction(), 1.0) }) self.N = 10 with self._session.transaction(): for idx in range(self.N): self._session.add({'genre': self.N - idx})
def main(): from munin.testing import DummyDistanceFunction LOGGER.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # add formatter to ch ch.setFormatter( logging.Formatter('%(name)s - %(levelname)s - %(message)s') ) # add ch to logger LOGGER.addHandler(ch) session = Session('session_test', { 'genre': (None, DummyDistanceFunction(), 0.2), 'artist': (None, DummyDistanceFunction(), 0.3) }) import math with session.transaction(): N = 100 for i in range(int(N / 2) + 1): session.add({ 'genre': 1.0 - i / N, 'artist': 1.0 - i / N }) # Pseudo-Random, but deterministic: if '--euler' in sys.argv: euler = lambda x: math.fmod(math.e ** x, 1.0) session.database.add({ 'genre': euler((i + 1) % 30), 'artist': euler((N - i + 1) % 30) }) LOGGER.debug('+ Step #3: Layouting and Plotting') if '--plot' in sys.argv: session.database.plot(1000, 500)
class TestNeighborsFrom(unittest.TestCase): def setUp(self): self._session = Session('test', { 'genre': (None, DummyDistanceFunction(), 1.0) }) self.N = 10 with self._session.transaction(): for idx in range(self.N): self._session.add({'genre': self.N - idx}) # self._session.database.plot() def test_neighbors_sorted(self): # Since no rules available, sorted_breadth_first_search will be called. rec = list(self._session.recommend_from_seed(self._session[0], number=self.N)) self.assertEqual(len(rec), self.N - 1) self.assertEqual([r.uid for r in rec], list(range(1, self.N))) rec = list(self._session.recommend_from_seed(self._session[0], number=5)) self.assertEqual(len(rec), 5) self.assertEqual([r.uid for r in rec], list(range(1, 6))) def test_recommend_with_rules(self): # Add two rules, # [0] <-> [100] [0.75] # [0] <-> [50] [0.50] self._session.rule_index.insert_rule(( frozenset([self._session[+0]]), frozenset([self._session[-1]]), self.N // 10, 0.75 )) self._session.rule_index.insert_rule(( frozenset([self._session[+0]]), frozenset([self._session[self.N // 2]]), self.N // 15, 0.50 )) rec = list(self._session.recommend_from_seed(self._session[0], number=self.N)) self.assertEqual(len(rec), self.N - 1) self.assertEqual( [1, 9, 2, 5, 3, 8, 4, 7, 6], [r.uid for r in rec] ) rec = list(self._session.recommend_from_seed(self._session[0], number=5)) self.assertEqual(len(rec), 5) self.assertEqual( [1, 9, 2, 5, 3], [r.uid for r in rec] )
def main(): from munin.testing import DummyDistanceFunction LOGGER.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # add formatter to ch ch.setFormatter( logging.Formatter('%(name)s - %(levelname)s - %(message)s')) # add ch to logger LOGGER.addHandler(ch) session = Session( 'session_test', { 'genre': (None, DummyDistanceFunction(), 0.2), 'artist': (None, DummyDistanceFunction(), 0.3) }) import math with session.transaction(): N = 100 for i in range(int(N / 2) + 1): session.add({'genre': 1.0 - i / N, 'artist': 1.0 - i / N}) # Pseudo-Random, but deterministic: if '--euler' in sys.argv: euler = lambda x: math.fmod(math.e**x, 1.0) session.database.add({ 'genre': euler((i + 1) % 30), 'artist': euler((N - i + 1) % 30) }) LOGGER.debug('+ Step #3: Layouting and Plotting') if '--plot' in sys.argv: session.database.plot(1000, 500)
class TestNeighborsFrom(unittest.TestCase): def setUp(self): self._session = Session( 'test', {'genre': (None, DummyDistanceFunction(), 1.0)}) self.N = 10 with self._session.transaction(): for idx in range(self.N): self._session.add({'genre': self.N - idx}) # self._session.database.plot() def test_neighbors_sorted(self): # Since no rules available, sorted_breadth_first_search will be called. rec = list( self._session.recommend_from_seed(self._session[0], number=self.N)) self.assertEqual(len(rec), self.N - 1) self.assertEqual([r.uid for r in rec], list(range(1, self.N))) rec = list( self._session.recommend_from_seed(self._session[0], number=5)) self.assertEqual(len(rec), 5) self.assertEqual([r.uid for r in rec], list(range(1, 6))) def test_recommend_with_rules(self): # Add two rules, # [0] <-> [100] [0.75] # [0] <-> [50] [0.50] self._session.rule_index.insert_rule( (frozenset([self._session[+0]]), frozenset([self._session[-1]]), self.N // 10, 0.75)) self._session.rule_index.insert_rule( (frozenset([self._session[+0]]), frozenset([self._session[self.N // 2]]), self.N // 15, 0.50)) rec = list( self._session.recommend_from_seed(self._session[0], number=self.N)) self.assertEqual(len(rec), self.N - 1) self.assertEqual([1, 9, 2, 5, 3, 8, 4, 7, 6], [r.uid for r in rec]) rec = list( self._session.recommend_from_seed(self._session[0], number=5)) self.assertEqual(len(rec), 5) self.assertEqual([1, 9, 2, 5, 3], [r.uid for r in rec])
def test_modify(self): from munin.distance.rating import RatingDistance session = Session('session_test_modify', { 'rating': (None, RatingDistance(), 1), }) with session.transaction(): for i in range(0, 6): session.add({'rating': i}) self.assertAlmostEqual( session[5].distance_get(session[0]).distance, 0.5) self.assertAlmostEqual(session[5]['rating'], (5, )) with session.fix_graph(): session.modify(5, {'rating': 0}) self.assertAlmostEqual( session[5].distance_get(session[0]).distance, 0.0) self.assertAlmostEqual(session[5]['rating'], (0, ))
def test_modify(self): from munin.distance.rating import RatingDistance session = Session('session_test_modify', { 'rating': (None, RatingDistance(), 1), }) with session.transaction(): for i in range(0, 6): session.add({'rating': i}) self.assertAlmostEqual(session[5].distance_get(session[0]).distance, 0.5) self.assertAlmostEqual(session[5]['rating'], (5, )) with session.fix_graph(): session.modify(5, {'rating': 0}) self.assertAlmostEqual(session[5].distance_get(session[0]).distance, 0.0) self.assertAlmostEqual(session[5]['rating'], (0, ))
def test_recommendation_history(self): history = RecommendationHistory() session = Session('test', {'artist': (None, None, 1), 'album': (None, None, 1)} ) fst_song = Song(session, { 'artist': 'A', 'album': 'B' }) self.assertEqual(history.allowed(fst_song), True) history.feed(fst_song) self.assertEqual(history.allowed(fst_song), False) for expectation in [False, False, False, False, True]: history.feed(Song(session, { 'artist': 'X', 'album': 'Y' })) self.assertEqual(history.allowed(fst_song), expectation)
original_song = MY_DATABASE[session.mapping[munin_song.uid]] print(' original values:') print(' Artist :', original_song[0]) print(' Album :', original_song[1]) print(' Genre :', original_song[2]) print() if __name__ == '__main__': print('The database:') for idx, song in enumerate(MY_DATABASE): print(' #{} {}'.format(idx, song)) print() # Perhaps we already had an prior session? session = Session.from_name('demo') or create_session('demo') rules = list(session.rule_index) if rules: print('Association Rules:') for left, right, support, rating in rules: print(' {:>10s} <-> {:<10s} [supp={:>5d}, rating={:.5f}]'.format( str([song.uid for song in left]), str([song.uid for song in right]), support, rating)) print() print_recommendations(session) # Let's add some history: for munin_uid in [0, 2, 0, 0, 2]: session.feed_history(munin_uid)
class SongTests(unittest.TestCase): def setUp(self): self._session = Session('test', { 'genre': (None, None, 0.1), 'artist': (None, None, 0.1) }) def test_song_basic_mapping(self): song = Song(self._session, { 'genre': 'alpine brutal death metal', 'artist': 'Herbert' }) self.assertTrue(song.get('artist') == song['artist'] == 'Herbert') with self.assertRaises(TypeError): del song['genre'] def test_song_missing_attr(self): # This should already fail at creation: with self.assertRaises(KeyError): song = Song(self._session, {'a': 'b'}) song = Song(self._session, {'genre': 'berta'}) with self.assertRaises(KeyError): song['berta'] self.assertEqual(song.get('berta'), song.get('barghl')) def test_song_iter(self): input_dict = { 'genre': 'alpine brutal death metal', 'artist': 'Herbert' } song = Song(self._session, input_dict) self.assertEqual( dict(iter(song)), input_dict ) self.assertEqual(dict(iter(song.items())), input_dict) self.assertEqual(set(song.keys()), set(['genre', 'artist'])) self.assertEqual( set(song.values()), set(['alpine brutal death metal', 'Herbert']) ) def test_song_distance_indirect_iter(self): with self._session.transaction(): # Pseudo-Random, but deterministic: import math euler = lambda x: math.fmod(math.e ** x, 1.0) N = 40 for i in range(N): self._session.add({ 'genre': euler(i + 1), 'artist': euler(N - i + 1) }) def test_song_add(self): song_one = Song(self._session, { 'genre': 'alpine brutal death metal', 'artist': 'herbert' }, max_neighbors=5) N = 100 for off in (False, True): for i in range(N): v = i / N if off: v = 1.0 - v song_one.distance_add(Song(self._session, { 'genre': str(i), 'artist': str(i) }, max_neighbors=5), DistanceDummy(v)) self.assertEqual(len(list(song_one.distance_iter())), 5) def test_distances(self): song_one = Song(self._session, { 'genre': 'alpine brutal death metal', 'artist': 'herbert' }) song_two = Song(self._session, { 'genre': 'tirolian brutal death metal', 'artist': 'Gustl' }) song_one.uid = 'base1' song_two.uid = 'base2' self.assertTrue(song_one.distance_add(song_two, DistanceDummy(0.7))) self.assertTrue(song_two.distance_add(song_one, DistanceDummy(0.1))) self.assertEqual(song_one.distance_get(song_one), DistanceDummy(0.0)) self.assertEqual(song_two.distance_get(song_two), DistanceDummy(0.0)) self.assertEqual(song_one.distance_get(song_two), DistanceDummy(0.1)) # Check if max_distance works correctly prev_len = song_one.distance_len() self.assertTrue(not song_one.distance_add(song_two, DistanceDummy(1.0))) self.assertEqual(song_one.distance_len(), prev_len) # Test "only keep the best songs" song_base = Song(self._session, { 'genre': 0, 'artist': 0 }, max_neighbors=10) N = 20 for idx in range(N): song = Song(self._session, { 'genre': str(idx), 'artist': str(idx) }, max_neighbors=10) song.uid = idx song_base.distance_add(song, DistanceDummy(idx / N)) values = list(song_base.distance_iter()) self.assertAlmostEqual(values[+0][1].distance, 0.0) self.assertAlmostEqual(values[-1][1].distance, (N / 2 - 1) / N) def test_disconnect(self): def star(): for v in ['c', 'l', 'r', 't', 'd']: s = Song(self._session, {'genre': [0], 'artist': [0]}) s.uid = v yield s c, l, r, t, d = star() self.assertTrue(c.distance_add(l, DistanceDummy(0.5))) self.assertTrue(c.distance_add(r, DistanceDummy(0.5))) self.assertTrue(c.distance_add(t, DistanceDummy(0.5))) self.assertTrue(c.distance_add(d, DistanceDummy(0.5))) c.disconnect() self.assertTrue(c.distance_get(l) is None) self.assertTrue(c.distance_get(r) is None) self.assertTrue(c.distance_get(t) is None) self.assertTrue(c.distance_get(d) is None) for a, b in combinations((l, r, t, d), 2): self.assertTrue(a.distance_get(b)) self.assertAlmostEqual(a.distance_get(b).distance, 0.0)
def test_find_matching_attributes_generic(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session('session_find_test', { 'genre': pairup(GenreTreeProvider(), GenreTreeDistance(), 5), 'artist': pairup(None, None, 1) }) session.add({ 'artist': 'Berta', 'genre': 'death metal' }) session.add({ 'artist': 'Hans', 'genre': 'metal' }) session.add({ 'artist': 'Berta', 'genre': 'pop' }) found = list(session.find_matching_attributes({'genre': 'metal'})) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list(session.find_matching_attributes( {'genre': 'metal', 'artist': 'Berta'} )) self.assertEqual(len(found), 0) found = list(session.find_matching_attributes( {'genre': 'metal', 'artist': 'Hans'} )) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list(session.find_matching_attributes( {'genre': 'pop', 'artist': 'Berta'} )) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[2]) found = list(session.find_matching_attributes({'artist': 'Berta'})) self.assertEqual(len(found), 2) self.assertEqual(found[0], session[0]) self.assertEqual(found[1], session[2])
def setUp(self): self._session = Session('session_test', { 'genre': (None, None, 0.2), 'artist': (None, None, 0.3) })
def from_name(name='EasySession'): return Session.from_name(name)
def setUp(self): self._session = Session('test', { 'genre': (None, None, 0.1), 'artist': (None, None, 0.1) })
class DatabaseTests(unittest.TestCase): def setUp(self): self._session = Session('session_test', { 'genre': (None, None, 0.2), 'artist': (None, None, 0.3) }) def test_modify(self): from munin.distance.rating import RatingDistance session = Session('session_test_modify', { 'rating': (None, RatingDistance(), 1), }) with session.transaction(): for i in range(0, 6): session.add({'rating': i}) self.assertAlmostEqual(session[5].distance_get(session[0]).distance, 0.5) self.assertAlmostEqual(session[5]['rating'], (5, )) with session.fix_graph(): session.modify(5, {'rating': 0}) self.assertAlmostEqual(session[5].distance_get(session[0]).distance, 0.0) self.assertAlmostEqual(session[5]['rating'], (0, )) def test_basics(self): with self._session.transaction(): N = 20 for i in range(N): self._session.database.add({ 'genre': i / N, 'artist': i / N }) def test_no_match(self): with self.assertRaisesRegex(KeyError, '.*mask.*'): self._session.database.add({ 'not_in_session': 42 }) def test_insert_remove_song(self): songs = [] with self._session.transaction(): for idx, v in enumerate(['l', 'r', 't', 'd']): songs.append(self._session.add({'genre': [0], 'artist': [0]})) # self._session.database.plot(250, 250) with self._session.fix_graph(): self._session.insert({'genre': [0], 'artist': [0]}) # self._session.database.plot(250, 250) for song in self._session.database: for other in self._session.database: if self is not other: self.assertAlmostEqual(song.distance_get(other).distance, 0.0) self._session.remove(4) # self._session.database.plot(250, 250) with self._session.fix_graph(): self._session.insert({'genre': [0], 'artist': [0]}) # self._session.database.plot(250, 250) def test_find_matching_attributes_numeric(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session('session_find_test', { 'x': pairup(None, None, 1), 'y': pairup(None, None, 1) }) a = session[session.add({ 'x': 21, 'y': 42, })] b = session[session.add({ 'x': 0, 'y': 100, })] session[session.add({ 'x': 51, 'y': 50, })] self.assertEqual(list(session.database.find_matching_attributes_numeric( {'x': 10}, 20 )), [a, b] ) self.assertEqual(list(session.database.find_matching_attributes_numeric( {'y': 100}, 0 )), [b] ) self.assertEqual(list(session.database.find_matching_attributes_numeric( {'x': 10, 'y': 40}, 20 )), [a] ) self.assertEqual(list(session.database.find_matching_attributes_numeric( {'x': 10, 'y': 10}, 0, )), [] ) def test_find_matching_attributes_generic(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session('session_find_test', { 'genre': pairup(GenreTreeProvider(), GenreTreeDistance(), 5), 'artist': pairup(None, None, 1) }) session.add({ 'artist': 'Berta', 'genre': 'death metal' }) session.add({ 'artist': 'Hans', 'genre': 'metal' }) session.add({ 'artist': 'Berta', 'genre': 'pop' }) found = list(session.find_matching_attributes({'genre': 'metal'})) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list(session.find_matching_attributes( {'genre': 'metal', 'artist': 'Berta'} )) self.assertEqual(len(found), 0) found = list(session.find_matching_attributes( {'genre': 'metal', 'artist': 'Hans'} )) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list(session.find_matching_attributes( {'genre': 'pop', 'artist': 'Berta'} )) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[2]) found = list(session.find_matching_attributes({'artist': 'Berta'})) self.assertEqual(len(found), 2) self.assertEqual(found[0], session[0]) self.assertEqual(found[1], session[2])
def test_find_matching_attributes_generic(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session( 'session_find_test', { 'genre': pairup(GenreTreeProvider(), GenreTreeDistance(), 5), 'artist': pairup(None, None, 1) }) session.add({'artist': 'Berta', 'genre': 'death metal'}) session.add({'artist': 'Hans', 'genre': 'metal'}) session.add({'artist': 'Berta', 'genre': 'pop'}) found = list(session.find_matching_attributes({'genre': 'metal'})) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list( session.find_matching_attributes({ 'genre': 'metal', 'artist': 'Berta' })) self.assertEqual(len(found), 0) found = list( session.find_matching_attributes({ 'genre': 'metal', 'artist': 'Hans' })) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list( session.find_matching_attributes({ 'genre': 'pop', 'artist': 'Berta' })) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[2]) found = list(session.find_matching_attributes({'artist': 'Berta'})) self.assertEqual(len(found), 2) self.assertEqual(found[0], session[0]) self.assertEqual(found[1], session[2])
def __init__(self, name='EasySession', disabled_attrs=None): mask = { 'artist': pairup( ArtistNormalizeProvider(compress=True), None, 0.5, ), 'album': pairup( AlbumNormalizeProvider(compress=True), None, 0.5, ), 'title': pairup( TitleNormalizeProvider(compress=False) | StemProvider(), LevenshteinDistance(), 1, ), 'date': pairup( DateProvider(), DateDistance(), 2 ), 'bpm': pairup( BPMCachedProvider(), BPMDistance(), 3 ), 'lyrics': pairup( KeywordsProvider(), KeywordsDistance(), 3 ), 'rating': pairup( None, RatingDistance(), 2 ), 'genre': pairup( GenreTreeProvider(), GenreTreeAvgDistance(), 4 ), 'moodbar': pairup( MoodbarAudioFileProvider(), MoodbarDistance(), 5 ) } if not check_for_moodbar(): logging.warning('Disabling moodbar attr, no binary found in PATH.') del mask['moodbar'] if not check_for_bpmtools(): logging.warning("Disabling bpm attr, no binary found in PATH.") del mask['bpm'] for disabled_attr in disabled_attrs or []: try: del mask[disabled_attr] except KeyError: pass Session.__init__(self, name, mask)
class DatabaseTests(unittest.TestCase): def setUp(self): self._session = Session('session_test', { 'genre': (None, None, 0.2), 'artist': (None, None, 0.3) }) def test_modify(self): from munin.distance.rating import RatingDistance session = Session('session_test_modify', { 'rating': (None, RatingDistance(), 1), }) with session.transaction(): for i in range(0, 6): session.add({'rating': i}) self.assertAlmostEqual( session[5].distance_get(session[0]).distance, 0.5) self.assertAlmostEqual(session[5]['rating'], (5, )) with session.fix_graph(): session.modify(5, {'rating': 0}) self.assertAlmostEqual( session[5].distance_get(session[0]).distance, 0.0) self.assertAlmostEqual(session[5]['rating'], (0, )) def test_basics(self): with self._session.transaction(): N = 20 for i in range(N): self._session.database.add({ 'genre': i / N, 'artist': i / N }) def test_no_match(self): with self.assertRaisesRegex(KeyError, '.*mask.*'): self._session.database.add({'not_in_session': 42}) def test_insert_remove_song(self): songs = [] with self._session.transaction(): for idx, v in enumerate(['l', 'r', 't', 'd']): songs.append( self._session.add({ 'genre': [0], 'artist': [0] })) # self._session.database.plot(250, 250) with self._session.fix_graph(): self._session.insert({'genre': [0], 'artist': [0]}) # self._session.database.plot(250, 250) for song in self._session.database: for other in self._session.database: if self is not other: self.assertAlmostEqual( song.distance_get(other).distance, 0.0) self._session.remove(4) # self._session.database.plot(250, 250) with self._session.fix_graph(): self._session.insert({'genre': [0], 'artist': [0]}) # self._session.database.plot(250, 250) def test_find_matching_attributes_numeric(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session('session_find_test', { 'x': pairup(None, None, 1), 'y': pairup(None, None, 1) }) a = session[session.add({ 'x': 21, 'y': 42, })] b = session[session.add({ 'x': 0, 'y': 100, })] session[session.add({ 'x': 51, 'y': 50, })] self.assertEqual( list( session.database.find_matching_attributes_numeric( {'x': 10}, 20)), [a, b]) self.assertEqual( list( session.database.find_matching_attributes_numeric( {'y': 100}, 0)), [b]) self.assertEqual( list( session.database.find_matching_attributes_numeric( { 'x': 10, 'y': 40 }, 20)), [a]) self.assertEqual( list( session.database.find_matching_attributes_numeric( { 'x': 10, 'y': 10 }, 0, )), []) def test_find_matching_attributes_generic(self): from munin.provider import GenreTreeProvider from munin.distance import GenreTreeDistance from munin.helper import pairup session = Session( 'session_find_test', { 'genre': pairup(GenreTreeProvider(), GenreTreeDistance(), 5), 'artist': pairup(None, None, 1) }) session.add({'artist': 'Berta', 'genre': 'death metal'}) session.add({'artist': 'Hans', 'genre': 'metal'}) session.add({'artist': 'Berta', 'genre': 'pop'}) found = list(session.find_matching_attributes({'genre': 'metal'})) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list( session.find_matching_attributes({ 'genre': 'metal', 'artist': 'Berta' })) self.assertEqual(len(found), 0) found = list( session.find_matching_attributes({ 'genre': 'metal', 'artist': 'Hans' })) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[1]) found = list( session.find_matching_attributes({ 'genre': 'pop', 'artist': 'Berta' })) self.assertEqual(len(found), 1) self.assertEqual(found[0], session[2]) found = list(session.find_matching_attributes({'artist': 'Berta'})) self.assertEqual(len(found), 2) self.assertEqual(found[0], session[0]) self.assertEqual(found[1], session[2])
def setUp(self): self._session = Session('test', { 'genre': (None, None, 0.5), 'random': (None, None, 0.1) })
class SongTests(unittest.TestCase): def setUp(self): self._session = Session('test', { 'genre': (None, None, 0.1), 'artist': (None, None, 0.1) }) def test_song_basic_mapping(self): song = Song(self._session, { 'genre': 'alpine brutal death metal', 'artist': 'Herbert' }) self.assertTrue(song.get('artist') == song['artist'] == 'Herbert') with self.assertRaises(TypeError): del song['genre'] def test_song_missing_attr(self): # This should already fail at creation: with self.assertRaises(KeyError): song = Song(self._session, {'a': 'b'}) song = Song(self._session, {'genre': 'berta'}) with self.assertRaises(KeyError): song['berta'] self.assertEqual(song.get('berta'), song.get('barghl')) def test_song_iter(self): input_dict = { 'genre': 'alpine brutal death metal', 'artist': 'Herbert' } song = Song(self._session, input_dict) self.assertEqual(dict(iter(song)), input_dict) self.assertEqual(dict(iter(song.items())), input_dict) self.assertEqual(set(song.keys()), set(['genre', 'artist'])) self.assertEqual(set(song.values()), set(['alpine brutal death metal', 'Herbert'])) def test_song_distance_indirect_iter(self): with self._session.transaction(): # Pseudo-Random, but deterministic: import math euler = lambda x: math.fmod(math.e**x, 1.0) N = 40 for i in range(N): self._session.add({ 'genre': euler(i + 1), 'artist': euler(N - i + 1) }) def test_song_add(self): song_one = Song(self._session, { 'genre': 'alpine brutal death metal', 'artist': 'herbert' }, max_neighbors=5) N = 100 for off in (False, True): for i in range(N): v = i / N if off: v = 1.0 - v song_one.distance_add( Song(self._session, { 'genre': str(i), 'artist': str(i) }, max_neighbors=5), DistanceDummy(v)) self.assertEqual(len(list(song_one.distance_iter())), 5) def test_distances(self): song_one = Song(self._session, { 'genre': 'alpine brutal death metal', 'artist': 'herbert' }) song_two = Song(self._session, { 'genre': 'tirolian brutal death metal', 'artist': 'Gustl' }) song_one.uid = 'base1' song_two.uid = 'base2' self.assertTrue(song_one.distance_add(song_two, DistanceDummy(0.7))) self.assertTrue(song_two.distance_add(song_one, DistanceDummy(0.1))) self.assertEqual(song_one.distance_get(song_one), DistanceDummy(0.0)) self.assertEqual(song_two.distance_get(song_two), DistanceDummy(0.0)) self.assertEqual(song_one.distance_get(song_two), DistanceDummy(0.1)) # Check if max_distance works correctly prev_len = song_one.distance_len() self.assertTrue( not song_one.distance_add(song_two, DistanceDummy(1.0))) self.assertEqual(song_one.distance_len(), prev_len) # Test "only keep the best songs" song_base = Song(self._session, { 'genre': 0, 'artist': 0 }, max_neighbors=10) N = 20 for idx in range(N): song = Song(self._session, { 'genre': str(idx), 'artist': str(idx) }, max_neighbors=10) song.uid = idx song_base.distance_add(song, DistanceDummy(idx / N)) values = list(song_base.distance_iter()) self.assertAlmostEqual(values[+0][1].distance, 0.0) self.assertAlmostEqual(values[-1][1].distance, (N / 2 - 1) / N) def test_disconnect(self): def star(): for v in ['c', 'l', 'r', 't', 'd']: s = Song(self._session, {'genre': [0], 'artist': [0]}) s.uid = v yield s c, l, r, t, d = star() self.assertTrue(c.distance_add(l, DistanceDummy(0.5))) self.assertTrue(c.distance_add(r, DistanceDummy(0.5))) self.assertTrue(c.distance_add(t, DistanceDummy(0.5))) self.assertTrue(c.distance_add(d, DistanceDummy(0.5))) c.disconnect() self.assertTrue(c.distance_get(l) is None) self.assertTrue(c.distance_get(r) is None) self.assertTrue(c.distance_get(t) is None) self.assertTrue(c.distance_get(d) is None) for a, b in combinations((l, r, t, d), 2): self.assertTrue(a.distance_get(b)) self.assertAlmostEqual(a.distance_get(b).distance, 0.0)
original_song = MY_DATABASE[session.mapping[munin_song.uid]] print(' original values:') print(' Artist :', original_song[0]) print(' Album :', original_song[1]) print(' Genre :', original_song[2]) print() if __name__ == '__main__': print('The database:') for idx, song in enumerate(MY_DATABASE): print(' #{} {}'.format(idx, song)) print() # Perhaps we already had an prior session? session = Session.from_name('demo') or create_session('demo') rules = list(session.rule_index) if rules: print('Association Rules:') for left, right, support, rating in rules: print(' {:>10s} <-> {:<10s} [supp={:>5d}, rating={:.5f}]'.format( str([song.uid for song in left]), str([song.uid for song in right]), support, rating )) print() print_recommendations(session) # Let's add some history: for munin_uid in [0, 2, 0, 0, 2]: