def _reset_history(self): self._revoked_uids = set() self._listen_history = ListenHistory( maxlen=self._session.config['history_max_pkg'], time_threshold_sec=self._session.config['history_timeout'], max_group_size=self._session.config['history_pkg_size']) self._rule_index = RuleIndex( maxlen=self._session.config['history_max_rules']) self._playcounts = Counter()
def _reset_history(self): self._revoked_uids = set() self._listen_history = ListenHistory( maxlen=self._session.config['history_max_pkg'], time_threshold_sec=self._session.config['history_timeout'], max_group_size=self._session.config['history_pkg_size'] ) self._rule_index = RuleIndex( maxlen=self._session.config['history_max_rules'] ) self._playcounts = Counter()
class Database: 'Class managing Database concerns.' def __init__(self, session): """Usually you access this as ``.database`` attribute of :class:`munin.session.Session`. You can do the following tasks with it: * Trigger updates (:func:`rebuild`) * Get a plot of the graph for debuggin purpose. * Iterative over the database (``for song in database``). * Get a song by it's uid. (``database[song.uid]``) .. note:: The division of :class:`munin.session.Session` and :class:`Database` is purely cosmetical. Both classes cannot exist on its own. """ self._session = session self._song_list = [] self._reset_history() def _reset_history(self): self._revoked_uids = set() self._listen_history = ListenHistory( maxlen=self._session.config['history_max_pkg'], time_threshold_sec=self._session.config['history_timeout'], max_group_size=self._session.config['history_pkg_size'] ) self._rule_index = RuleIndex( maxlen=self._session.config['history_max_rules'] ) self._playcounts = Counter() def __iter__(self): return filter(None, self._song_list) def __len__(self): return len(self._song_list) - len(self._revoked_uids) def __getitem__(self, idx): """Lookup a certain song by it's uid. :param uid: A uid previously given by :returns: a :class:`munin.song.Song`, which is a read-only mapping of normalized attributes. """ try: return self._song_list[idx] except IndexError: raise IndexError('song uid #{} is invalid'.format(idx)) def _current_uid(self): if self._revoked_uids: return self._revoked_uids.pop() return len(self._song_list) def plot(self, width=1000, height=1000, **kwargs): """Plot the current graph for debugging purpose. Will try to open an installed image viewer - does not return an image. :param database: The database (and the assoicate graph with it) to plot. :param width: Width of the plotted image in pixel. :param height: Width of the plotted image in pixel. """ munin.plot.plot(self, width, height, **kwargs) def playcount(self, song): return self._playcounts.get(song, 0) def playcounts(self, n=0): if n < 1: return self._playcounts else: return self._playcounts.most_common(n) def feed_history(self, song): if self._listen_history.feed(song): rules = self._listen_history.find_rules() self._rule_index.insert_rules(rules) self._playcounts[song] += 1 def find_matching_attributes(self, subset, max_numeric_offset=None): if max_numeric_offset is None: return self.find_matching_attributes_generic(subset) else: return self.find_matching_attributes_numeric(subset, max_numeric_offset) def find_matching_attributes_numeric(self, subset, max_offset): try: numerics = {} for key, value in subset.items(): provider = self._session.provider_for_key(key) numerics[key] = provider.process(value) for song in self: for key in (numerics.keys() & song.keys()): value = song.get(key) if value is None: break compar = numerics[key][0] if not (compar - max_offset <= value[0] <= compar + max_offset): break else: yield song except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) def find_matching_attributes_generic(self, subset): try: value_set = set() for key, value in subset.items(): provider = self._session.provider_for_key(key) value_set.add(provider.process(value)) for song in self: if all((song[key] in value_set for key in subset.keys())): yield song except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) def _rebuild_step_base(self, mean_counter, window_size, step_size): """Do the Base Iterations. This involves three iterations: * :func:`munin.helper.sliding_window` Window over the List (overlapping with * window_size/step_size). * :func:`munin.helper.centering_window` with `parallel=True`. * :func:`munin.helper.centering_window` with `parallel=True`. :param mean_counter: A RunningMean counter to sample the initial mean/sd :param window_size: The max. size of the window in which combinations are taken. :param step_size: The movement of the window per iteration. """ if window_size is None: window_size = self._session.config['rebuild_window_size'] if step_size is None: step_size = self._session.config['rebuild_step_size'] # Base Iteration: slider = sliding_window(self, window_size, step_size) center = centering_window(self, window_size // 2) anticn = centering_window(self, window_size // 2, parallel=False) # Prebind the functions for performance reasons. compute = Song.distance_compute add = Song.distance_add # Select the iterator: for idx, iterator in enumerate((slider, center, anticn)): LOGGER.debug('|-- Applying iteration #{}: {}'.format(idx + 1, iterator)) # Iterate over the list: for window in iterator: # Calculate the combination set: for song_a, song_b in combinations(window, 2): distance = compute(song_a, song_b) add(song_a, song_b, distance) # Sample the newly calculated distance. mean_counter.add(distance.distance) def _rebuild_step_refine(self, mean_counter, num_passes=None, mean_scale=None): """Do the refinement step. .. seealso:: :func:`rebuild` :param mean_counter: RunningMean Counter :param num_passes: How many times the song list shall be iterated. """ if num_passes is None: num_passes = self._session.config['rebuild_refine_passes'] if mean_scale is None: mean_scale = self._session.config['rebuild_mean_scale'] # Prebind the functions for performance reasons: add = Song.distance_add compute = Song.distance_compute # Do the whole thing `num_passes` times... for n_iteration in range(num_passes): threshold = (mean_counter.mean * mean_scale - mean_counter.sd) / mean_scale newly_found = 0 # Go through the song_list... for idx, song in enumerate(self): # ..and remember each calculated distance # we got from compare the song with its indirect neighbors. result_set = deque() # Iterate over the indirect neighbors (those having a certain # distance lower than threshold): for ind_ngb in set(song.distance_indirect_iter(threshold)): distance = compute(song, ind_ngb) result_set.append((ind_ngb, distance)) mean_counter.add(distance.distance) # Add the distances (we should not do this during # iteration) # Also count which of these actually for ind_ngb, dist in result_set: newly_found += add(song, ind_ngb, dist) # Stop iteration when not enough new distances were gathered # (at least one new addition per song) # This usually only triggers for high num_passes if newly_found < len(self) // 2: break LOGGER.debug('Did {}x (of max. {}) refinement steps.'.format(n_iteration, num_passes)) def rebuild_stupid(self): """(Re)build the graph by calculating the combination of all songs. This is a *very* expensive operation which takes quadratic time and only should be ever used for a small amount of songs where accuracy matters even more thant time. """ for song_a, song_b in combinations(self._song_list, 2): distance = Song.distance_compute(song_a, song_b) Song.distance_add(song_a, song_b, distance) def rebuild(self, window_size=None, step_size=None, refine_passes=None, stupid_threshold=None): """Rebuild all distances and the associated graph. This will be triggered for you automatically after a transaction. :param int window_size: The size of the sliding window in the base iteration. :param int step_size: The amount to move the window per iteration. :param int refine_passes: How often step #2 should be repeated. :param int stupid_threshold: If less songs than this just brute forcely calculate all combations of songs. """ if stupid_threshold is None: stupid_threshold = self._session.config['rebuild_stupid_threshold'] if len(self) < stupid_threshold: LOGGER.debug('+ Step #1 + 2: Brute Force calculation due to few songs') self.rebuild_stupid() else: # Average and Standard Deviation Counter: mean_counter = RunningMean() LOGGER.debug('+ Step #1: Calculating base distance (sliding window)') self._rebuild_step_base( mean_counter, window_size=window_size, step_size=step_size ) LOGGER.debug('|-- Mean Distane: {:f} (sd: {:f})'.format( mean_counter.mean, mean_counter.sd )) LOGGER.debug('+ Step #2: Applying refinement:') self._rebuild_step_refine( mean_counter, num_passes=refine_passes ) LOGGER.debug('|-- Mean Distane: {:f} (sd: {:f})'.format( mean_counter.mean, mean_counter.sd )) self._reset_history() def add(self, value_dict): for key, value in value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: value_dict[key] = None else: value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) new_song = Song( self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance'] ) new_song.uid = self._current_uid() if new_song.uid >= len(self._song_list): self._song_list.append(new_song) else: self._song_list[new_song.uid] = new_song return new_song.uid def fix_graph(self): for song in self: song.distance_finalize() # This is just some sort of assert and has no functionality: last = None for other, dist in song.distance_iter(): if last is not None and last > dist: LOGGER.critical('!! warning: unsorted elements: !({} < {})'.format(dist, last)) last = dist def modify(self, song, sub_value_dict, star_threshold=0.75, iterstep_threshold=50): value_dict = song.to_dict() for key, value in sub_value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: sub_value_dict[key] = None else: sub_value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) value_dict.update(sub_value_dict) new_song = Song( self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance'] ) new_song.uid = self.remove(song.uid) self._song_list[song.uid] = new_song # Clear all know distances: new_song.distance_reset() return self._insert_song_to_graph( new_song, star_threshold, iterstep_threshold ) def insert(self, value_dict, star_threshold=0.75, iterstep_threshold=50): new_song = self._song_list[self.add(value_dict)] return self._insert_song_to_graph( new_song, star_threshold, iterstep_threshold ) def _insert_song_to_graph(self, new_song, star_threshold=0.75, iterstep_threshold=50): next_len = len(self._song_list) if len(self) < iterstep_threshold: iterstep = 1 else: iterstep = round(max(1, math.log(max(next_len, 1)))) # Step 1: Find samples with similar songs (similar to the base step) distances = deque() for song in self._song_list[::iterstep]: if song is not None: distance = Song.distance_compute(song, new_song) distances.append((song, distance)) new_song.distance_add(song, distance) # Step 2: Short refinement step for song, distance in distances: if distance.distance > star_threshold: for neighbor in song.neighbors(): distance = new_song.distance_compute(neighbor) new_song.distance_add(neighbor, distance) return new_song.uid def remove(self, uid): if len(self._song_list) <= uid: raise ValueError('Invalid UID #{}'.format(uid)) song = self._song_list[uid] self._song_list[uid] = None self._revoked_uids.add(uid) # Patch the hole: song.disconnect() return uid
class Database: 'Class managing Database concerns.' def __init__(self, session): """Usually you access this as ``.database`` attribute of :class:`munin.session.Session`. You can do the following tasks with it: * Trigger updates (:func:`rebuild`) * Get a plot of the graph for debuggin purpose. * Iterative over the database (``for song in database``). * Get a song by it's uid. (``database[song.uid]``) .. note:: The division of :class:`munin.session.Session` and :class:`Database` is purely cosmetical. Both classes cannot exist on its own. """ self._session = session self._song_list = [] self._reset_history() def _reset_history(self): self._revoked_uids = set() self._listen_history = ListenHistory( maxlen=self._session.config['history_max_pkg'], time_threshold_sec=self._session.config['history_timeout'], max_group_size=self._session.config['history_pkg_size']) self._rule_index = RuleIndex( maxlen=self._session.config['history_max_rules']) self._playcounts = Counter() def __iter__(self): return filter(None, self._song_list) def __len__(self): return len(self._song_list) - len(self._revoked_uids) def __getitem__(self, idx): """Lookup a certain song by it's uid. :param uid: A uid previously given by :returns: a :class:`munin.song.Song`, which is a read-only mapping of normalized attributes. """ try: return self._song_list[idx] except IndexError: raise IndexError('song uid #{} is invalid'.format(idx)) def _current_uid(self): if self._revoked_uids: return self._revoked_uids.pop() return len(self._song_list) def plot(self, width=1000, height=1000, **kwargs): """Plot the current graph for debugging purpose. Will try to open an installed image viewer - does not return an image. :param database: The database (and the assoicate graph with it) to plot. :param width: Width of the plotted image in pixel. :param height: Width of the plotted image in pixel. """ munin.plot.plot(self, width, height, **kwargs) def playcount(self, song): return self._playcounts.get(song, 0) def playcounts(self, n=0): if n < 1: return self._playcounts else: return self._playcounts.most_common(n) def feed_history(self, song): if self._listen_history.feed(song): rules = self._listen_history.find_rules() self._rule_index.insert_rules(rules) self._playcounts[song] += 1 def find_matching_attributes(self, subset, max_numeric_offset=None): if max_numeric_offset is None: return self.find_matching_attributes_generic(subset) else: return self.find_matching_attributes_numeric( subset, max_numeric_offset) def find_matching_attributes_numeric(self, subset, max_offset): try: numerics = {} for key, value in subset.items(): provider = self._session.provider_for_key(key) numerics[key] = provider.process(value) for song in self: for key in (numerics.keys() & song.keys()): value = song.get(key) if value is None: break compar = numerics[key][0] if not (compar - max_offset <= value[0] <= compar + max_offset): break else: yield song except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) def find_matching_attributes_generic(self, subset): try: value_set = set() for key, value in subset.items(): provider = self._session.provider_for_key(key) value_set.add(provider.process(value)) for song in self: if all((song[key] in value_set for key in subset.keys())): yield song except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) def _rebuild_step_base(self, mean_counter, window_size, step_size): """Do the Base Iterations. This involves three iterations: * :func:`munin.helper.sliding_window` Window over the List (overlapping with * window_size/step_size). * :func:`munin.helper.centering_window` with `parallel=True`. * :func:`munin.helper.centering_window` with `parallel=True`. :param mean_counter: A RunningMean counter to sample the initial mean/sd :param window_size: The max. size of the window in which combinations are taken. :param step_size: The movement of the window per iteration. """ if window_size is None: window_size = self._session.config['rebuild_window_size'] if step_size is None: step_size = self._session.config['rebuild_step_size'] # Base Iteration: slider = sliding_window(self, window_size, step_size) center = centering_window(self, window_size // 2) anticn = centering_window(self, window_size // 2, parallel=False) # Prebind the functions for performance reasons. compute = Song.distance_compute add = Song.distance_add # Select the iterator: for idx, iterator in enumerate((slider, center, anticn)): LOGGER.debug('|-- Applying iteration #{}: {}'.format( idx + 1, iterator)) # Iterate over the list: for window in iterator: # Calculate the combination set: for song_a, song_b in combinations(window, 2): distance = compute(song_a, song_b) add(song_a, song_b, distance) # Sample the newly calculated distance. mean_counter.add(distance.distance) def _rebuild_step_refine(self, mean_counter, num_passes=None, mean_scale=None): """Do the refinement step. .. seealso:: :func:`rebuild` :param mean_counter: RunningMean Counter :param num_passes: How many times the song list shall be iterated. """ if num_passes is None: num_passes = self._session.config['rebuild_refine_passes'] if mean_scale is None: mean_scale = self._session.config['rebuild_mean_scale'] # Prebind the functions for performance reasons: add = Song.distance_add compute = Song.distance_compute # Do the whole thing `num_passes` times... for n_iteration in range(num_passes): threshold = (mean_counter.mean * mean_scale - mean_counter.sd) / mean_scale newly_found = 0 # Go through the song_list... for idx, song in enumerate(self): # ..and remember each calculated distance # we got from compare the song with its indirect neighbors. result_set = deque() # Iterate over the indirect neighbors (those having a certain # distance lower than threshold): for ind_ngb in set(song.distance_indirect_iter(threshold)): distance = compute(song, ind_ngb) result_set.append((ind_ngb, distance)) mean_counter.add(distance.distance) # Add the distances (we should not do this during # iteration) # Also count which of these actually for ind_ngb, dist in result_set: newly_found += add(song, ind_ngb, dist) # Stop iteration when not enough new distances were gathered # (at least one new addition per song) # This usually only triggers for high num_passes if newly_found < len(self) // 2: break LOGGER.debug('Did {}x (of max. {}) refinement steps.'.format( n_iteration, num_passes)) def rebuild_stupid(self): """(Re)build the graph by calculating the combination of all songs. This is a *very* expensive operation which takes quadratic time and only should be ever used for a small amount of songs where accuracy matters even more thant time. """ for song_a, song_b in combinations(self._song_list, 2): distance = Song.distance_compute(song_a, song_b) Song.distance_add(song_a, song_b, distance) def rebuild(self, window_size=None, step_size=None, refine_passes=None, stupid_threshold=None): """Rebuild all distances and the associated graph. This will be triggered for you automatically after a transaction. :param int window_size: The size of the sliding window in the base iteration. :param int step_size: The amount to move the window per iteration. :param int refine_passes: How often step #2 should be repeated. :param int stupid_threshold: If less songs than this just brute forcely calculate all combations of songs. """ if stupid_threshold is None: stupid_threshold = self._session.config['rebuild_stupid_threshold'] if len(self) < stupid_threshold: LOGGER.debug( '+ Step #1 + 2: Brute Force calculation due to few songs') self.rebuild_stupid() else: # Average and Standard Deviation Counter: mean_counter = RunningMean() LOGGER.debug( '+ Step #1: Calculating base distance (sliding window)') self._rebuild_step_base(mean_counter, window_size=window_size, step_size=step_size) LOGGER.debug('|-- Mean Distane: {:f} (sd: {:f})'.format( mean_counter.mean, mean_counter.sd)) LOGGER.debug('+ Step #2: Applying refinement:') self._rebuild_step_refine(mean_counter, num_passes=refine_passes) LOGGER.debug('|-- Mean Distane: {:f} (sd: {:f})'.format( mean_counter.mean, mean_counter.sd)) self._reset_history() def add(self, value_dict): for key, value in value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: value_dict[key] = None else: value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) new_song = Song(self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance']) new_song.uid = self._current_uid() if new_song.uid >= len(self._song_list): self._song_list.append(new_song) else: self._song_list[new_song.uid] = new_song return new_song.uid def fix_graph(self): for song in self: song.distance_finalize() # This is just some sort of assert and has no functionality: last = None for other, dist in song.distance_iter(): if last is not None and last > dist: LOGGER.critical( '!! warning: unsorted elements: !({} < {})'.format( dist, last)) last = dist def modify(self, song, sub_value_dict, star_threshold=0.75, iterstep_threshold=50): value_dict = song.to_dict() for key, value in sub_value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: sub_value_dict[key] = None else: sub_value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) value_dict.update(sub_value_dict) new_song = Song(self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance']) new_song.uid = self.remove(song.uid) self._song_list[song.uid] = new_song # Clear all know distances: new_song.distance_reset() return self._insert_song_to_graph(new_song, star_threshold, iterstep_threshold) def insert(self, value_dict, star_threshold=0.75, iterstep_threshold=50): new_song = self._song_list[self.add(value_dict)] return self._insert_song_to_graph(new_song, star_threshold, iterstep_threshold) def _insert_song_to_graph(self, new_song, star_threshold=0.75, iterstep_threshold=50): next_len = len(self._song_list) if len(self) < iterstep_threshold: iterstep = 1 else: iterstep = round(max(1, math.log(max(next_len, 1)))) # Step 1: Find samples with similar songs (similar to the base step) distances = deque() for song in self._song_list[::iterstep]: if song is not None: distance = Song.distance_compute(song, new_song) distances.append((song, distance)) new_song.distance_add(song, distance) # Step 2: Short refinement step for song, distance in distances: if distance.distance > star_threshold: for neighbor in song.neighbors(): distance = new_song.distance_compute(neighbor) new_song.distance_add(neighbor, distance) return new_song.uid def remove(self, uid): if len(self._song_list) <= uid: raise ValueError('Invalid UID #{}'.format(uid)) song = self._song_list[uid] self._song_list[uid] = None self._revoked_uids.add(uid) # Patch the hole: song.disconnect() return uid