def predictOne(self, samples: Signal): """TODO """ hopLength = self.parameters["hopLength"].value onsets = librosa.onset.onset_detect(y=samples.values, sr=samples.sampleRate, hop_length=hopLength, backtrack=self.parameters["backtrack"].value) result = Signal(samples[onsets], times=[samples.getTime(onset * hopLength) for onset in onsets], sparse=True) return (result, )
def _getOnsets(self, signal): """ Returns a first order difference of the signal and the absolute first order difference """ diff = np.diff(signal) return Signal(diff, times=signal.getTimes()[1:]), Signal( np.abs(diff), times=signal.getTimes()[1:])
def predictOne(self, samples: Signal) -> List[Signal]: """ Computes the hpss of the given audio using librosa. """ y_harmonic, y_percussive = librosa.effects.hpss(samples.values) return (Signal(np.array(y_harmonic), sampleRate=samples.sampleRate), Signal(np.array(y_percussive), sampleRate=samples.sampleRate))
def deserializeTrack(path, agreement=0.51, distanceAgreement=0.5, minimalAnnotator=0, minimalConfidence=0): """instantiate a Track from the jams encoding. https://github.com/marl/jams/ Args: ---- path (list[str]): path to the .JAMS file agreement (float, optional): minimal ratio of annotators agreeing to keep the point. Defaults to 0.51. distanceAgreement (float, optional): distance between annotations to cluster them to the same point. Defaults to 0.5. minimalAnnotator (int, optional): minimal number of annotators to keep the annotation. Defaults to 0. minimalConfidence (int, optional): minimal confidence to keep the annotation. Defaults to 0. Returns: ------- Track: a track with annotations in it's features """ reference = None track = Track() with open(path) as file: reference = json.load(file) # meta track.path = path track.features["duration"] = reference["file_metadata"]["duration"] track.name = reference["file_metadata"]["title"] switchsIn = [] switchsOut = [] for annotation in reference["annotations"]: # meta annotator = annotation["annotation_metadata"]["annotator"]["name"] # if annotator == "Marco": # continue # old format segment_open if annotation["namespace"] == "segment_open": segments = annotation["data"] track.features["boundaries"] = Signal(1, times=[segment["time"] for segment in segments], sparse=True) track.features["labels"] = [segment["value"] for segment in segments] # tempo elif annotation["namespace"] == "tempo": track.features["tempo"] = annotation["data"][0]["value"] # Current format with confidence, segment, and multiple annotators elif annotation["namespace"] == "cue_point": segments = annotation["data"] switchsIn.append([segment for segment in segments if segment["value"]["label"] == "IN"]) switchsOut.append([segment for segment in segments if segment["value"]["label"] == "OUT"]) track.features["switchIn-" + annotator] = Signal( 1, times=[segment["time"] for segment in segments if segment["value"]["label"] == "IN"], sparse=True) track.features["switchIn"] = JamsSerializer.aggregateAnnotations(switchsIn, agreementThreshold=agreement, distanceAgreement=distanceAgreement, minimalAnnotator=minimalAnnotator) # track.features["switchOut"] = JamsSerializer.aggregateAnnotations(switchsOut, # agreementThreshold=agreement, # distanceAgreement=distanceAgreement, # minimalAnnotator=minimalAnnotator) return track
def diff(grid, values, maxThreshold=-1): """ get the difference between the ground truth values (grid) and the values. if the difference is above the maxThreshold, then the difference is considered to be zero. By default the maxThreshold is going to be the half the mean distance between to ticks in the GT values (grid) This is usefull for looking at the difference between events in two tracks. TODO: Include that in signal class ? """ gridSignal = Signal(1, times=grid) valuesSignal = Signal(1, times=values) valuesSignal.quantizeTo(gridSignal, maxThreshold=maxThreshold, removeOutOfBound=False, removeDuplicatedValues=False) return valuesSignal.times - values
def predictOne(self, samples: Signal, grid: Signal): if grid is not None: values = [ standard.ReplayGain(sampleRate=samples.sampleRate)( samples.getValues(grid.times[i], grid.times[i + 1])) for i in range(len(grid.times) - 1) ] return (Signal(values, times=grid.times[:-1]), ) else: values = standard.ReplayGain(sampleRate=samples.sampleRate)( samples.values) return (Signal(values, times=[0]), )
def getSalience(self, point, features: List[Signal], grid: Signal, window): """ Return a salience of the window following the point """ score = 0 for feature in features: try: amount = feature.getValues(point, grid.getTime(grid.getIndex(point) + window)) except IndexError as e: amount = [0] #TODO sometimes the posiiton is beyond the grid ? score += np.mean(amount) if len(amount) else 0 return score / len(features) if len(features) != 0 else 0
def predictOne(self, path): # call madmom to get beats fps = 100 act = madmom.features.RNNDownBeatProcessor()(str(path)) proc = madmom.features.DBNDownBeatTrackingProcessor( beats_per_bar=[3, 4], fps=fps, transition_lambda=self.parameters["transitionLambda"].value, correct=self.parameters["correctToActivation"].value) beats = proc(act) if len([ beat for i, beat in enumerate(beats) if (i + beats[0][1] - 1) % 4 + 1 != beat[1] ]): logging.error("Beat detection skipped a beat") # get the tempo # evenGrids = quantization.separateInEvenGrids(beats[:, 0], regularityThreshold=self.parameters["snapDistance"].value) # longuestEvenGridIndex = np.argmax([len(grid) for grid in evenGrids]) # tau = np.average([(evenGrid[-1] - evenGrid[0]) / (len(evenGrid) - 1) for evenGrid in evenGrids if len(evenGrid) > 1], # weights=[len(evenGrid) # for evenGrid in evenGrids if len(evenGrid) > 1]) * fps # TODO: use only the longest portion ? # tempo = 60 * fps / tau # beatLength = tau / fps # i.e 0.5s # refBeat = [beat for beat in beats if beat[0] == evenGrids[longuestEvenGridIndex][0]][0] # # extend the grid of beats to remove holes in it # trackLength = float(len(act)) / fps # extendedBeats = quantization.extendGrid(refBeat, # beats, # trackLength, # beatLength, # SnapDistance=self.parameters["snapDistance"].value) tempo = 60 / np.mean(np.diff(np.array(beats)[:, 0])) # Get the confidence as the mean of the activation at each GT beat. Sums the two outputs of the NN # beat = self._getConfidence(act, beat, fps, extendedBeats) beatsT = [beat[0] for beat in beats] downbeatsT = [beat[0] for beat in beats if beat[1] == 1] strongBeatsT = [ beat[0] for beat in beats if beat[1] == 1 or beat[1] == 3 ] return (Signal(np.ones(len(beatsT)), times=beatsT, sparse=True), Signal(np.ones(len(downbeatsT)), times=downbeatsT, sparse=True), Signal(np.ones(len(strongBeatsT)), times=strongBeatsT, sparse=True), tempo)
def chromagram(self, samples: Signal): sr = samples.sampleRate result = librosa.feature.chroma_stft(y=samples.values, sr=sr) hop_length = self.parameters["hopLength"].value pcp_sr = sr / hop_length return (Signal(result.T, sampleRate=pcp_sr), )
def predictOne(self, inputFeatures: List[Signal], inputGrid: Signal): # for period in self.parameters["period"].value: period = self.parameters["period"].value phase = self.getPhase(period, inputFeatures, inputGrid) return (Signal(inputGrid.values[phase::period], times=inputGrid.times[phase::period], sparse=True), )
def run(self, mix, boundaries): tracks = Rule.getTracks(mix, boundaries) noiseThreshold = 0.1 silenceRatio = 0.1 masterSignal = Signal([], times=[]) for track in tracks: postFXSignal = track.applyEffects(track.getFeature("barMSE")) postFXSignal.times = track.getDeckTime(postFXSignal.times) masterSignal.addSignal(postFXSignal) values = masterSignal.getValues(*boundaries) proportion = float(len([value for value in values if value < noiseThreshold])) / len(values) if proportion > silenceRatio: return 1-proportion else: return 1
def predictOne(self, peakSignals: List[Signal], grid: Signal, salienceSignals: List[Signal]): # Cluster the peaks to remove close outliers peaks = Signal.clusterSignals(peakSignals, minDistance=self.parameters["clusterDistance"].value, mergeValue=self.parameters["mergeFunction"].value) # Get the Salience of the following segment peaks, nonSalientPeaks = self.getSalientPoints(salienceSignals, grid, peaks) # Filter the peaks too far away from the start of the track peaks = self.getEarlyPeaks(peaks, grid) # Get the first absolute k-beats TODO: Set the selection to an "or" ? -> I don't like it so much, because we can't # disable the position filtering with an or peaks = Signal(peaks.values[:self.parameters["absoluteTop"].value], times=peaks.times[:self.parameters["absoluteTop"].value], sparse=True) return (peaks, nonSalientPeaks)
def predictOne(self, path: str): X_cqt, X_timbre, beat_intervals = segmenter.features(path) boundaries, beat_intervals, labels = segmenter.lsd( X_cqt, X_timbre, beat_intervals, {"num_types": False}) result = Signal(labels, times=[beat_intervals[i][0] for i in boundaries[:-1]], sparse=True) return (result, )
def _getWindows(self, signal: Signal, grid: Signal, addAnacrusis=False, addAfterLastBeat=False, window="square", aggregation='rmse'): """ Get the root mean square amplitude between each tick of the grid (in seconds). addAnacrusis add also the energy from the first sample in the signal to the first tick of the grid, and the last tick of the grid to the last sample of the signal. return eg [0.1,0.2,0.1,0.2,0.8,0.9,0.8,0.9] """ result = [] times = copy.copy(grid.times) # pan times panning = self.parameters["panning"].value * np.median(np.diff(times)) times = [time - panning for time in times] # if addAnacrusis: # times = np.insert(times, 0, 0) # TODO make it faster by not creating a new array # annacrusisValues = signal.getValues(0, times[]) # if len(annacrusisValues): # result.append(self._getWindow(annacrusisValues, window, aggregation)) # else: # If the first tick is at 0, then the anacrusis is 0, or [0 ,..., 0] if the signal is multidimensional # result.append(signal.values[0] * 0.) for i in range(len(grid) - 1): result.append( self._getWindow(signal.getValues(times[i], times[i + 1]), signal.sampleRate, window, aggregation)) # if addAfterLastBeat: # afterValues = signal.getValues(grid.times[-1], signal.duration) # if len(afterValues): # result.append(self._getWindow(afterValues, window, aggregation)) # else: # result.append(signal.values[0] * 0.) # else: # times = times[:-1] return Signal(result, times=grid.times[:-1])
def _subdivide(self, grid, steps): newTimes = [] for i in range(len(grid.times) - 1): newTimes = np.concatenate( (newTimes, np.arange(grid.times[i], grid.times[i + 1], (grid.times[i + 1] - grid.times[i]) / steps))) newTimes = np.concatenate( (newTimes, [grid.times[-1]])) # TODO: clean that return Signal(np.ones(len(newTimes)), times=newTimes)
def predictOne(self, values: Signal): listV = np.array(values.values) if self.parameters["relativeThreshold"].value: # compute the thrshold at x times the maximum value threshold = np.max( listV[:int(len(listV) * self.parameters["thresholdIndex"].value)]) * self.parameters["relativeThreshold"].value peaks, peaksValues = self.staticThreshold(listV, threshold, self.parameters["minDistance"].value) else: peaks, peaksValues = self.adaptiveThreshold(listV, L=self.parameters["medianSize"].value) result = Signal(peaksValues, times=[values.times[peak] for peak in peaks], sparse=True) return (result, )
def getEarlyPeaks(self, peaks, grid): """ Filter the peaks by relative distance from the start """ if self.parameters["relativeDistance"].value < 1: earlyPeaks = [ i for i, pos in enumerate(peaks.times) if pos <= grid.duration * self.parameters["relativeDistance"].value ] # if len(earlyPeaks) == 0: # earlyPeaks = [peaks[0]] peaks = Signal([peaks.values[i] for i in earlyPeaks], times=[peaks.times[i] for i in earlyPeaks]) return peaks
def nietoPCP(self, samples: Signal): sr = samples.sampleRate hop_length = self.parameters["hopLength"].value pcp_sr = sr / hop_length audio_harmonic, _ = librosa.effects.hpss(samples.values) # I double checked, and the parameters are the one used in MSAF. 7 octave in pcp_cqt and 6 octaves in pcp pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic, sr=sr, hop_length=hop_length, n_bins=7 * 12, norm=np.inf, fmin=27.5))**2 pcp = librosa.feature.chroma_cqt(C=pcp_cqt, sr=sr, hop_length=hop_length, n_octaves=6, fmin=27.5).T return (Signal(pcp, sampleRate=pcp_sr), )
def _getRatioThresholdBoundaries(self, signal): onsets = Signal([ signal[i + 1] / signal[i] if signal[i] != 0 else 10000 for i in range(len(signal) - 1) ], times=signal.getTimes()[1:]) incTH = self.parameters["ratioThreshold"].value decTH = 1. / incTH return [ i + 1 for i, ratio in enumerate(onsets) if ratio >= incTH or ratio <= decTH ], onsets
def predictOne(self, values: Signal, grid: Signal): mean = self._rms(values) times = grid.times if self.parameters["includeBorders"].value: times = [0] + list(times) + [99999] positionTuples = [(times[i], times[i + 1]) for i in range(len(times) - 1)] result = SparseSegmentSignal([ self._rms(values.getValues(start, stop)) > mean for start, stop in positionTuples ], [(start, stop) for start, stop in positionTuples]) return (result, )
def predictOne(self, path: str): # TODO: Is it possible to install both version of madmom ? # args = ["ls", "-l"] args = [ resource_filename(__name__, "../../../vendors/madmomDrumsEnv/bin/python"), resource_filename( __name__, "../../../vendors/madmom-0.16.dev0/bin/DrumTranscriptor"), "-m", self.parameters["model"].value, "single", path ] # Calling python from python, Yay... process = subprocess.Popen(args, stdout=subprocess.PIPE) output = process.stdout.read().decode() # TODO read stderr=subprocess.STDOUT # err = process.stderr.read().decode() # if err: # log.error(err) result = [event.split("\t") for event in output.split("\n") if event] result = [ row for row in result if len(row) == 2 and self.is_number(row[0]) and self.is_number(row[1]) ] kicks = [ float(row[0]) for row in result if row[1] == "35" or row[1] == "0" ] snares = [ float(row[0]) for row in result if row[1] == "38" or row[1] == "1" ] hihats = [ float(row[0]) for row in result if row[1] == "42" or row[1] == "2" ] return (Signal(np.ones(len(kicks)), times=kicks, sparse=True), Signal(np.ones(len(snares)), times=snares, sparse=True), Signal(np.ones(len(hihats)), times=hihats, sparse=True))
def getSalientPoints(self, salienceSignals, grid, peaks): """ split peaks signal into two: Salient points, and non-salient points """ if self.parameters["salienceTreshold"].value: salience = [ self.getSalience(pos, salienceSignals, grid, self.parameters["salienceWindow"].value) for pos in peaks.times ] salientPoints = [i for i, v in enumerate(salience) if v >= self.parameters["salienceTreshold"].value] nonSalientPoints = [i for i, v in enumerate(salience) if v < self.parameters["salienceTreshold"].value] # if there is no point above the threshold of salience, just return the most salient one if len(salientPoints) == 0 and len(salience) > 0: salientPoints = [np.argmax(salience)] nonSalientPoints = [p for p in nonSalientPoints if p not in salientPoints] nonSalient = Signal([peaks.values[i] for i in nonSalientPoints], times=[peaks.times[i] for i in nonSalientPoints], sparse=True) peaks = Signal([peaks.values[i] for i in salientPoints], times=[peaks.times[i] for i in salientPoints]) return peaks, nonSalient else: return peaks, Signal([], times=[])
def predictOne(self, path: str): """ method copied from the main file in the project """ # pkg_resources.() # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project") from project.MelodyExt import feature_extraction from project.utils import load_model, save_model, matrix_parser from project.test import inference from project.model import seg, seg_pnn, sparse_loss from project.train import train_audio # load wav song = path # Feature extraction feature = feature_extraction(song) feature = np.transpose(feature[0:4], axes=(2, 1, 0)) # load model model = load_model( resource_filename( __name__, "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" + self.parameters["model"].value)) batch_size_test = 10 # Inference print(feature[:, :, 0].shape) extract_result = inference(feature=feature[:, :, 0], model=model, batch_size=batch_size_test) # Output r = matrix_parser(extract_result) return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
def predictOne(self, samples: Signal): """Calculates the cqt of the given audio using librosa. Args: samples (Signal): The samples of the audio. grid (list of float): The . Returns: tuple of List[float]: The cqt of the audio. """ sr = samples.sampleRate hop_length = self.parameters["hopLength"].value n_bins = self.parameters["binNumber"].value cqt_sr = sr / hop_length cqt = librosa.cqt(samples.values, sr=sr, hop_length=hop_length, n_bins=n_bins) linear_cqt = np.abs(cqt) if self.parameters["scale"].value == "Amplitude": result = linear_cqt elif self.parameters["scale"].value == "Power": result = linear_cqt**2 elif self.parameters["scale"].value == "MSAF": result = librosa.amplitude_to_db(linear_cqt**2, ref=np.max) result += np.min( result ) * -1 # Inverting the db scale (don't know if this is correct) elif self.parameters["scale"].value == "Power dB": result = librosa.amplitude_to_db( linear_cqt, ref=np.max) # Based on Librosa, standard power spectrum in dB result += np.min(result) * -1 elif self.parameters["scale"].value == "Perceived dB": freqs = librosa.cqt_frequencies(linear_cqt.shape[0], fmin=librosa.note_to_hz('C1')) result = librosa.perceptual_weighting(linear_cqt**2, freqs, ref=np.max) result += np.min(result) * -1 else: raise ValueError("parameterScale is not a correct value") return (Signal(result.T, sampleRate=cqt_sr), )
def findPhase(signal: Signal, grid: Signal, period: int, toleranceWindow=0): """ find the phase of the signal based on it's amplitude at the grid positions and the number of peaks - signal: works best with a discrete signal as no aglomeration is done - grid: positions of the beats - period: the periodicity to test - tolerance window: if not at 0, returns the closest value in the signal to the grid, within the tolerance window test: # result = findPhase(Signal(np.ones(5), times=np.array([0, 4, 8, 9, 12])+1), Signal(np.ones(16), times=range(16)), period=4) # print(result) = 1 """ phases = [] for phase in range(period): values = [signal.getValue(grid.times[i], toleranceWindow=0) for i in range(phase, len(grid), period)] phases.append((np.sum([v for v in values if v is not None]) * len(values))) bestPhase = np.argmax(phases) return bestPhase
def recursiveMap(obj): """ recursively map all the fields of the json decoded object to class from the model """ try: from automix.model.classes.signal import Signal, SparseSignal, SparseSegmentSignal if isinstance(obj, dict): if u'type' in obj and (obj[u"type"] == str(Signal) or obj[u"type"] == str(SparseSignal)): obj = Signal.jsonDeserialize(obj) elif u'type' in obj and obj[u"type"] == str(SparseSegmentSignal): obj = SparseSegmentSignal.jsonDeserialize(obj) else: for key, value in obj.items(): obj[key] = recursiveMap(obj[key]) elif isinstance(obj, list): for key, value in enumerate(obj): obj[key] = recursiveMap(obj[key]) except Exception: pass return obj
def findPhaseLocal(self, period: int, signal: Signal, grid: Signal, toleranceWindow=0.1): """ find the phase of the signal based on it's amplitude at the grid positions and the number of peaks - signal: works best with a discrete signal as no aglomeration is done - grid: positions of the beats - period: the periodicity to test - tolerance window: if not at 0, returns the closest value in the signal to the grid, within the tolerance window test: # result = findPhase(Signal(np.ones(5), times=np.array([0, 4, 8, 9, 12])+1), Signal(np.ones(16), times=range(16)), period=4) # print(result) = 1 """ phases = [] for phase in range(period): values = [ signal.getValue(grid.times[i], toleranceWindow=toleranceWindow) for i in range(phase, len(grid), period) ] values = [v for v in values if v is not None] if self.parameters["distanceMetric"].value == "RMS": value = np.sqrt(np.mean(np.array(values)**2)) elif self.parameters["distanceMetric"].value == "sum": value = np.sum(values) elif self.parameters["distanceMetric"].value == "Veire": value = np.sum(values) * len(values) else: raise Exception("Bad distance metric parameter" + self.parameters["distanceMetric"].value) phases.append(value) # bestPhase = np.argmax(phases) return phases
def predictOne(self, path): y, sr = librosa.load(path, sr=self.parameters["sampleRate"].value) return (Signal(y, sampleRate=sr), )
def predictOne(self, samples: Signal): """ """ y, sr = samples.values, samples.sampleRate # And compute the spectrogram magnitude and phase S_full, phase = librosa.magphase(librosa.stft(y)) hopLength = 2048 / 4 newSampleRate = sr / hopLength # We'll compare frames using cosine similarity, and aggregate similar frames # by taking their (per-frequency) median value. # # To avoid being biased by local continuity, we constrain similar frames to be # separated by at least 2 seconds. # # This suppresses sparse/non-repetetitive deviations from the average spectrum, # and works well to discard vocal elements. S_filter = librosa.decompose.nn_filter( S_full, aggregate=np.median, metric='cosine', width=int(librosa.time_to_frames(2, sr=sr))) # The output of the filter shouldn't be greater than the input # if we assume signals are additive. Taking the pointwise minimium # with the input spectrum forces this. S_filter = np.minimum(S_full, S_filter) # We can also use a margin to reduce bleed between the vocals and instrumentation masks. # Note: the margins need not be equal for foreground and background separation margin_i, margin_v = 2, 10 power = 2 mask_i = librosa.util.softmask(S_filter, margin_i * (S_full - S_filter), power=power) mask_v = librosa.util.softmask(S_full - S_filter, margin_v * S_filter, power=power) # Once we have the masks, simply multiply them with the input spectrum # to separate the components S_foreground = mask_v * S_full S_background = mask_i * S_full # # sphinx_gallery_thumbnail_number = 2 # idx = slice(*librosa.time_to_frames([30, 35], sr=sr)) # plt.figure(figsize=(12, 8)) # plt.subplot(3, 1, 1) # librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max), # y_axis='log', sr=sr) # plt.title('Full spectrum') # plt.colorbar() # plt.subplot(3, 1, 2) # librosa.display.specshow(librosa.amplitude_to_db(S_background[:, idx], ref=np.max), # y_axis='log', sr=sr) # plt.title('Background') # plt.colorbar() # plt.subplot(3, 1, 3) # librosa.display.specshow(librosa.amplitude_to_db(S_foreground[:, idx], ref=np.max), # y_axis='log', x_axis='time', sr=sr) # plt.title('Foreground') # plt.colorbar() # plt.tight_layout() # plt.show() return (Signal(S_foreground.T, sampleRate=newSampleRate), Signal(S_background.T, sampleRate=newSampleRate))
def predictOne(self, samples: Signal) -> Signal: """ TODO """ # Structural Features params # Mp = self.parameters["Mp_adaptive"].value # Size of the adaptive threshold for # peak picking # od = self.parameters["offset_thres"].value # Offset coefficient for adaptive # thresholding M = self.parameters[ "M_gaussian"].value # Size of gaussian kernel in beats m = self.parameters[ "m_embedded"].value # Number of embedded dimensions k = self.parameters["k_nearest"].value # k*N-nearest neighbors for the # recurrence plot # Preprocess to obtain features, times, and input boundary indeces F = np.array(samples.values) if F.ndim == 1: F = np.array([F]).T if len(F.shape) == 2: F = np.concatenate((np.zeros( (m // 2, F.shape[1])), F, np.zeros((m // 2, F.shape[1])))) else: F = np.concatenate((np.zeros(m // 2), F, np.zeros(m // 2))) # Normalize # F_norm = Normalize().predictOne(F) # F = U.normalize(F, norm_type=self.parameters["bound_norm_feats"].value) # Check size in case the track is too short if F.shape[0] > 20: # if self.framesync: # Whether to use frame-synchronous or beat-synchronous features. # red = 0.1 # F_copy = np.copy(F) # F = librosa.util.utils.sync(F.T, np.linspace(0, F.shape[0], num=F.shape[0] * red), pad=False).T # Emedding the feature space (i.e. shingle) # E[i] = F[i]+F[i+1]+F[i+2] E = embedded_space(F, m) # plt.imshow(E.T, interpolation="nearest", aspect="auto"); plt.show() # Recurrence matrix R = librosa.segment.recurrence_matrix( E.T, k=k * int(F.shape[0]), width=1, # zeros from the diagonal metric="euclidean", sym=True).astype(np.float32) # Circular shift L = circular_shift(R) # Obtain structural features by filtering the lag matrix SF = gaussian_filter(L.T, M=M, axis=1) SF = gaussian_filter(L.T, M=1, axis=0) # Compute the novelty curve nc = compute_nc(SF) nc = nc[m // 2:-m // 2] times = samples.times[:-m] return (Signal(nc, times=times), ) else: return (None, )