def process(user): tracks = list() for track in user.tracks: if track.features is not None: features = uutils.decode_features(track.features.encode('utf-8')) tracks.append([5*x for x in features]) for dim in range(2, 16) + [20, 25, 30, 40]: for k in range(1, 21): pts = np.array([x[:dim] for x in tracks]) clf = mixture.GMM(n_components=k, covariance_type='full') clf.fit(pts) print ("dim = %d, k = %d, cov = full, bic = %f, aic = %f, n = %d" % (dim, k, clf.bic(pts), clf.aic(pts), len(tracks)))
def process(user): tracks = list() for track in user.tracks: if track.features is not None: features = uutils.decode_features(track.features.encode('utf-8')) tracks.append([5 * x for x in features]) for dim in range(2, 16) + [20, 25, 30, 40]: for k in range(1, 21): pts = np.array([x[:dim] for x in tracks]) clf = mixture.GMM(n_components=k, covariance_type='full') clf.fit(pts) print("dim = %d, k = %d, cov = full, bic = %f, aic = %f, n = %d" % (dim, k, clf.bic(pts), clf.aic(pts), len(tracks)))
def get_points(dbname, username, start_dim=0, nb_dim=2): pts = list() labels = dict() # Initialize the user / track DB. store = Store(create_database('sqlite:%s' % dbname)) user = store.find(User, User.name == username).one() if user is None: raise LookupError('username not found in database') for track in user.tracks: if track.features is not None: features = uutils.decode_features(track.features.encode('utf-8')) point = tuple(features[start_dim:start_dim+nb_dim]) pts.append(point) labels[point] = u"%s - %s" % (track.artist, track.title) return pts, labels
def get_points(dbname, username, start_dim=0, nb_dim=2): pts = list() labels = dict() # Initialize the user / track DB. store = Store(create_database('sqlite:%s' % dbname)) user = store.find(User, User.name == username).one() if user is None: raise LookupError('username not found in database') for track in user.tracks: if track.features is not None: features = uutils.decode_features(track.features.encode('utf-8')) point = tuple(features[start_dim:start_dim + nb_dim]) pts.append(point) labels[point] = u"%s - %s" % (track.artist, track.title) return pts, labels
def get_points(uid, store, start_dim=0, nb_dim=2): pts = list() labels = dict() user = store.get(User, uid) if user is None: raise LookupError('uid %d not found in database' % uid) entries = store.find(LibEntry, (LibEntry.user == user) & (LibEntry.is_local == True) & (LibEntry.is_valid == True)) for entry in entries: track = entry.track if track.features is not None: features = uutils.decode_features(track.features) point = tuple(features[start_dim:start_dim+nb_dim]) pts.append(point) labels[point] = u"%s - %s" % (track.artist, track.title) return pts, labels
def get_points(uid, store, start_dim=0, nb_dim=2): pts = list() labels = dict() user = store.get(User, uid) if user is None: raise LookupError('uid %d not found in database' % uid) entries = store.find(LibEntry, (LibEntry.user == user) & (LibEntry.is_local == True) & (LibEntry.is_valid == True)) for entry in entries: track = entry.track if track.features is not None: features = uutils.decode_features(track.features) point = tuple(features[start_dim:start_dim + nb_dim]) pts.append(point) labels[point] = u"%s - %s" % (track.artist, track.title) return pts, labels
def pl_generator(user_id, seeds, options = None): """ Generates a playlist based on the given seeds. The playlist is generated by fetching the tracks with the nearest tags to the seeds. The seeds are stored in a JSONObject of tuples in the form {<type>:<seeds>[, ...]} Supported types: * Tags The options are stored in a JSONObject of tuples in the form {<option>:<value>[, ...]} Supported options: * Filter Available values: - rating>=3 [default] - rating>=4 - rating>=5 * Size (to be extended) Available value: - probabilistic [default] * Sort Available values: - natural [default] - ratings - proximity * Unrated Available values: - True [default] - False * Title The given value is the title to be set to the playlist """ # Set some default values default_filter = 'rating>=3' # [rating>=3|rating>=4|rating>=5] default_size = 'probabilistic' # [probabilistic|ratings|proximity] default_sort = 'natural' # [natural] default_unrated = True # [True|False] default_title = '__unnamed__><((()>' # Reduce probability user chooses the same name # Check the input if seeds is None or not seeds: #TODO Handle error print 'solo_views.pl_generator: seeds is None' raise Exception # Initiate some values probpl = list() # probabilistic playlist aka playlist with probabilities associated to each tracks playlist = list() # pure playlist (only data relative to playlist) # store = utils.get_store() tagsmatrix = list() refvect = list() seeds = json.loads(seeds) for entry in seeds.items(): type = entry[0] seedslist = entry[1] #seedslist.append(entry[1]) # avoids missinterpreting one-element lists if seedslist is not None: for seed in seedslist: if type == 'tags': vect, weight = utils.tag_features(seed) # Selection by tracks is not available for now # if type == 'tracks': # # TODO: update find condition: UNVALID! # track = store.find(LibEntry, (LibEntry.user_id == user_id) & LibEntry.is_valid & LibEntry.is_local & (LibEntry.local_id == seed)) # Can't work: seed is not the local_id ! # if (track is not None): # vect, weight = utils.track_features(track.track.features) # print 'solo_views.pl_generator: added features for track %s to refvect' %(track.track.title) # else: # #TODO Handle error # vect = list() else: #TODO handle error: undefined seed type print 'solo_views.pl_generator: unknown type of tag: %s' % type vect = list() tagsmatrix.append(vect) for i in xrange(len(tagsmatrix[0])): vsum = 0 for tagvect in tagsmatrix: # ugly, find something better, like sympy vsum += tagvect[i] refvect.append(vsum) # Normalization refvect = normalize(refvect) if refvect is None or not refvect: #TODO Handle error print 'solo_views.pl_generator: refvect is None' raise Exception # Get options from input if options is not None and options: options = json.loads(options) try: filter = options.value('filter') except: filter = None try: size = option.value('size') except: size = None try: sort = option.value('sort') except: sort = None try: unrated = option.value('unrated') except: unrated = None try: title = option.value('title') except: title = None else: filter = None size = None sort = None unrated = None title = None # Set default values if filter is None: filter = default_filter if size is None: size = default_size if sort is None: sort = default_sort if unrated is None: unrated = default_unrated if title is None: title = default_title # Fetch LibEntries if unrated: entries = g.store.find(LibEntry, (LibEntry.user_id == user_id) & LibEntry.is_valid & LibEntry.is_local) else: # TODO find if possibility to filter on existing result set entries. entries = g.store.find(LibEntry, (LibEntry.user_id == user_id) & LibEntry.is_valid & LibEntry.is_local & (LibEntry.rating != None) & (LibEntry.rating > 0)) if entries.any() is not None and entries.any(): for entry in entries: added = False proximity=0 # 0=far away, 1=identical if entry.track.features is not None: tagvect = utils.decode_features(entry.track.features) # Not sure if tagvect is normalized, so in doubt normalize it. tagvect = normalize(tagvect) # Compute cosine similarity (dot product), and "normalize" it in [0,1] proximity = sum( [ fabs(sum([refvect[i] * tagvect[i] for i in range(len(tagvect))])), 1 ] ) / 2 # TODO optimization: filter ASAP, to avoid useless computations # Ideal: filter at find() time # Filters if filter is not None: if entry.rating == None: if unrated: added = True else: if unrated and (entry.rating <= 0): added = True if not added: if filter == 'rating>=3': if entry.rating >= 3 : added = True elif filter == 'rating>=4': if entry.rating >= 4 : added = True elif filter == 'rating>=5': if entry.rating >= 5 : added = True # No filtering else: added = True if added: prob = proximity # Associate a probability if (size != 'probabilistic') or (size == 'probabilistic' and prob >= random()) : probpl.append((entry, prob)) else: raise helpers.NotFound(errors.NO_TAGGED_TRACKS, "Could not generate a playlist: no tagged tracks were found.") if probpl is not None and probpl: # Randomize the order before reshaping probpl = pl_randomizer(probpl) # Here should happen the size reshaping # Idea: parse the size if not probabilistic to fetch the criteria # Criterion could be: [>|<]=XX% of probpl; [>|<]=XX (fixed length); # Sorting if sort is not None and sort: if sort == 'ratings': probpl = sorted(probpl, key=lambda x: x[0].rating) elif sort == 'proximity': probpl = sorted(probpl, key=lambda x: x[1]) # Default: 'natural' sorting, does nothing (aka random) # Remove the probabilities for pair in probpl: playlist.append(pair[0]) # Keep only the relevant fields from the tracks tracks = list() index = 1 # First index for entry in playlist: tracks.append({ 'artist': entry.track.artist, 'title': entry.track.title, 'local_id': entry.local_id, 'play_order': index # Postion of the track in the playlist, used by android }) index = index + 1 # Store the playlist in the playlist table jsonify(tracks=tracks) pldb = Playlist(user_id, unicode(title), len(playlist), seeds, options, unicode(refvect), tracks) g.store.add(pldb) g.store.flush() # See Storm Tutorial: https://storm.canonical.com/Tutorial#Flushing if title == default_title: g.store.find(Playlist, Playlist.id == pldb.id).set(title=u"playlist_%s" % pldb.id) # Add it to the user playlist library pledb = PllibEntry(user_id, pldb.id) g.store.add(pledb) g.store.flush() # Make the changes persistent in the DB, see Storm Tutorial: https://storm.canonical.com/Tutorial#Committing g.store.commit() # Craft JSON playlistdescriptor = to_dict(pledb) return playlistdescriptor raise helpers.NotFound(errors.IS_EMPTY, "Could not generate a playlist: no tracks were found in user library.")