def process(self, activations, **kwargs): """ Detect the tempi from the (beat) activations. Parameters ---------- activations : numpy array Beat activation function. Returns ------- tempi : numpy array Array with the dominant tempi [bpm] (first column) and their relative strengths (second column). """ # smooth the activations act_smooth = int(round(self.fps * self.act_smooth)) activations = smooth_signal(activations, act_smooth) # generate a histogram of beat intervals histogram = self.interval_histogram(activations.astype(np.float)) # smooth the histogram histogram = smooth_histogram(histogram, self.hist_smooth) # detect the tempi and return them return detect_tempo(histogram, self.fps)
def smooth_histogram(histogram, smooth): """ Smooth the given histogram. Parameters ---------- histogram : tuple Histogram (tuple of 2 numpy arrays, the first giving the strengths of the bins and the second corresponding delay values). smooth : int or numpy array Smoothing kernel (size). Returns ------- histogram_bins : numpy array Bins of the smoothed histogram. histogram_delays : numpy array Corresponding delays. Notes ----- If `smooth` is an integer, a Hamming window of that length will be used as a smoothing kernel. """ # smooth only the histogram bins, not the corresponding delays return smooth_signal(histogram[0], smooth), histogram[1]
def process(self, activations): """ Detect the beats in the given activation function. :param activations: beat activation function :return: detected beat positions [seconds] """ # smooth activations act_smooth = int(self.fps * self.tempo_estimator.act_smooth) activations = smooth_signal(activations, act_smooth) # TODO: refactor interval stuff to use TempoEstimation # if look_ahead is not defined, assume a global tempo if self.look_ahead is None: # create a interval histogram histogram = self.tempo_estimator.interval_histogram(activations) # get the dominant interval interval = self.tempo_estimator.dominant_interval(histogram) # detect beats based on this interval detections = detect_beats(activations, interval, self.look_aside) else: # allow varying tempo look_ahead_frames = int(self.look_ahead * self.fps) # detect the beats detections = [] pos = 0 # TODO: make this _much_ faster! while pos < len(activations): # look N frames around the actual position start = pos - look_ahead_frames end = pos + look_ahead_frames if start < 0: # pad with zeros act = np.append(np.zeros(-start), activations[0:end]) elif end > len(activations): # append zeros accordingly zeros = np.zeros(end - len(activations)) act = np.append(activations[start:], zeros) else: act = activations[start:end] # create a interval histogram histogram = self.tempo_estimator.interval_histogram(act) # get the dominant interval interval = self.tempo_estimator.dominant_interval(histogram) # add the offset (i.e. the new detected start position) positions = detect_beats(act, interval, self.look_aside) # correct the beat positions positions += start # search the closest beat to the predicted beat position pos = positions[(np.abs(positions - pos)).argmin()] # append to the beats detections.append(pos) pos += interval # convert detected beats to a list of timestamps detections = np.array(detections) / float(self.fps) # remove beats with negative times and return them return detections[np.searchsorted(detections, 0):]
def smooth_histogram(histogram, smooth): """ Smooth the given histogram. :param histogram: histogram :param smooth: smoothing kernel [numpy array or int] :return: smoothed histogram Note: If 'smooth' is an integer, a Hamming window of that length will be used as a smoothing kernel. """ # smooth only the the histogram bins, not the corresponding delays return smooth_signal(histogram[0], smooth), histogram[1]
def process(self, activations): """ Detect the beats in the given activation function. :param activations: beat activation function :return: detected beat positions [seconds] """ import itertools as it # estimate the tempo tempi = self.tempo_estimator.process(activations) intervals = self.fps * 60. / tempi[:, 0] # compute possible intervals if self.use_factors: # use the dominant interval with different factors possible_intervals = [int(intervals[0] * f) for f in self.factors] possible_intervals = [i for i in possible_intervals if self.tempo_estimator.max_interval >= i >= self.tempo_estimator.min_interval] else: # take the top n intervals from the tempo estimator possible_intervals = intervals[:self.num_intervals] # sort and start from the greatest interval possible_intervals.sort() possible_intervals = [int(i) for i in possible_intervals[::-1]] # smooth activations act_smooth = int(self.fps * self.tempo_estimator.act_smooth) activations = smooth_signal(activations, act_smooth) # since the cython code uses memory views, we need to make sure that # the activations are C-contiguous and of C-type float (np.float32) contiguous_act = np.ascontiguousarray(activations, dtype=np.float32) results = self.map(_process_crf, it.izip(it.repeat(contiguous_act), possible_intervals, it.repeat(self.interval_sigma))) # normalize their probabilities normalized_seq_probabilities = np.array([r[1] / r[0].shape[0] for r in results]) # pick the best one best_seq = results[normalized_seq_probabilities.argmax()][0] # convert the detected beat positions to seconds and return them return best_seq.astype(np.float) / self.fps
def process(self, activations): """ Detect the tempi from the beat activations. :param activations: RNN beat activation function :return: numpy array with the dominant tempi (first column) and their relative strengths (second column) """ # smooth the activations act_smooth = int(round(self.fps * self.act_smooth)) activations = smooth_signal(activations, act_smooth) # generate a histogram of beat intervals histogram = self.interval_histogram(activations.astype(np.float)) # smooth the histogram histogram = smooth_histogram(histogram, self.hist_smooth) # detect the tempi and return them return detect_tempo(histogram, self.fps)
def peak_picking(activations, threshold, smooth=None, pre_avg=0, post_avg=0, pre_max=1, post_max=1): """ Perform thresholding and peak-picking on the given activation function. Parameters ---------- activations : numpy array Activation function. threshold : float Threshold for peak-picking smooth : int or numpy array Smooth the activation function with the kernel (size). pre_avg : int, optional Use `pre_avg` frames past information for moving average. post_avg : int, optional Use `post_avg` frames future information for moving average. pre_max : int, optional Use `pre_max` frames past information for moving maximum. post_max : int, optional Use `post_max` frames future information for moving maximum. Returns ------- peak_idx : numpy array Indices of the detected peaks. See Also -------- :func:`smooth` Notes ----- If no moving average is needed (e.g. the activations are independent of the signal's level as for neural network activations), set `pre_avg` and `post_avg` to 0. For peak picking of local maxima, set `pre_max` and `post_max` to 1. For online peak picking, set all `post_` parameters to 0. References ---------- .. [1] Sebastian Böck, Florian Krebs and Markus Schedl, "Evaluating the Online Capabilities of Onset Detection Methods", Proceedings of the 13th International Society for Music Information Retrieval Conference (ISMIR), 2012. """ # smooth activations if smooth not in (None, 0): activations = smooth_signal(activations, smooth) # compute a moving average avg_length = pre_avg + post_avg + 1 if avg_length > 1: # TODO: make the averaging function exchangeable (mean/median/etc.) avg_origin = int(np.floor((pre_avg - post_avg) / 2)) if activations.ndim == 1: filter_size = avg_length elif activations.ndim == 2: filter_size = [avg_length, 1] else: raise ValueError('`activations` must be either 1D or 2D') mov_avg = uniform_filter(activations, filter_size, mode='constant', origin=avg_origin) else: # do not use a moving average mov_avg = 0 # detections are those activations above the moving average + the threshold detections = activations * (activations >= mov_avg + threshold) # peak-picking max_length = pre_max + post_max + 1 if max_length > 1: # compute a moving maximum max_origin = int(np.floor((pre_max - post_max) / 2)) if activations.ndim == 1: filter_size = max_length elif activations.ndim == 2: filter_size = [max_length, 1] else: raise ValueError('`activations` must be either 1D or 2D') mov_max = maximum_filter(detections, filter_size, mode='constant', origin=max_origin) # detections are peak positions detections *= (detections == mov_max) # return indices if activations.ndim == 1: return np.nonzero(detections)[0] elif activations.ndim == 2: return np.nonzero(detections) else: raise ValueError('`activations` must be either 1D or 2D')