Ejemplo n.º 1
0
 def predictOne(self, samples: Signal):
     """TODO
     """
     hopLength = self.parameters["hopLength"].value
     onsets = librosa.onset.onset_detect(y=samples.values, sr=samples.sampleRate, hop_length=hopLength, backtrack=self.parameters["backtrack"].value)
     result = Signal(samples[onsets], times=[samples.getTime(onset * hopLength) for onset in onsets], sparse=True)
     return (result, )
Ejemplo n.º 2
0
    def _getOnsets(self, signal):
        """
        Returns a first order difference of the signal and the absolute first order difference

        """
        diff = np.diff(signal)
        return Signal(diff, times=signal.getTimes()[1:]), Signal(
            np.abs(diff), times=signal.getTimes()[1:])
Ejemplo n.º 3
0
    def predictOne(self, samples: Signal) -> List[Signal]:
        """
        Computes the hpss of the given audio using librosa.
        """

        y_harmonic, y_percussive = librosa.effects.hpss(samples.values)

        return (Signal(np.array(y_harmonic), sampleRate=samples.sampleRate),
                Signal(np.array(y_percussive), sampleRate=samples.sampleRate))
Ejemplo n.º 4
0
    def deserializeTrack(path, agreement=0.51, distanceAgreement=0.5, minimalAnnotator=0, minimalConfidence=0):
        """instantiate a Track from the jams encoding. https://github.com/marl/jams/
        
        Args:
        ----
            path (list[str]): path to the .JAMS file
            agreement (float, optional): minimal ratio of annotators agreeing to keep the point. Defaults to 0.51.
            distanceAgreement (float, optional): distance between annotations to cluster them to the same point. Defaults to 0.5.
            minimalAnnotator (int, optional): minimal number of annotators to keep the annotation. Defaults to 0.
            minimalConfidence (int, optional): minimal confidence to keep the annotation. Defaults to 0.
        
        Returns:
        -------
            Track: a track with annotations in it's features
        """
        reference = None
        track = Track()
        with open(path) as file:
            reference = json.load(file)

        # meta
        track.path = path
        track.features["duration"] = reference["file_metadata"]["duration"]
        track.name = reference["file_metadata"]["title"]

        switchsIn = []
        switchsOut = []
        for annotation in reference["annotations"]:
            # meta
            annotator = annotation["annotation_metadata"]["annotator"]["name"]
            # if annotator == "Marco":
            #     continue
            # old format segment_open
            if annotation["namespace"] == "segment_open":
                segments = annotation["data"]
                track.features["boundaries"] = Signal(1, times=[segment["time"] for segment in segments], sparse=True)
                track.features["labels"] = [segment["value"] for segment in segments]
            # tempo
            elif annotation["namespace"] == "tempo":
                track.features["tempo"] = annotation["data"][0]["value"]
            # Current format with confidence, segment, and multiple annotators
            elif annotation["namespace"] == "cue_point":
                segments = annotation["data"]
                switchsIn.append([segment for segment in segments if segment["value"]["label"] == "IN"])
                switchsOut.append([segment for segment in segments if segment["value"]["label"] == "OUT"])
                track.features["switchIn-" + annotator] = Signal(
                    1, times=[segment["time"] for segment in segments if segment["value"]["label"] == "IN"], sparse=True)

        track.features["switchIn"] = JamsSerializer.aggregateAnnotations(switchsIn,
                                                                         agreementThreshold=agreement,
                                                                         distanceAgreement=distanceAgreement,
                                                                         minimalAnnotator=minimalAnnotator)
        # track.features["switchOut"] = JamsSerializer.aggregateAnnotations(switchsOut,
        #                                                                   agreementThreshold=agreement,
        #                                                                   distanceAgreement=distanceAgreement,
        #                                                                   minimalAnnotator=minimalAnnotator)
        return track
Ejemplo n.º 5
0
def diff(grid, values, maxThreshold=-1):
    """
    get the difference between the ground truth values (grid) and the values.
    if the difference is above the maxThreshold, then the difference is considered to be zero.
    By default the maxThreshold is going to be the half the mean distance between to ticks in the GT values (grid)
    This is usefull for looking at the difference between events in two tracks.
    TODO: Include that in signal class ?
    """
    gridSignal = Signal(1, times=grid)
    valuesSignal = Signal(1, times=values)
    valuesSignal.quantizeTo(gridSignal, maxThreshold=maxThreshold, removeOutOfBound=False, removeDuplicatedValues=False)
    return valuesSignal.times - values
Ejemplo n.º 6
0
    def predictOne(self, samples: Signal, grid: Signal):

        if grid is not None:
            values = [
                standard.ReplayGain(sampleRate=samples.sampleRate)(
                    samples.getValues(grid.times[i], grid.times[i + 1]))
                for i in range(len(grid.times) - 1)
            ]
            return (Signal(values, times=grid.times[:-1]), )
        else:
            values = standard.ReplayGain(sampleRate=samples.sampleRate)(
                samples.values)
            return (Signal(values, times=[0]), )
Ejemplo n.º 7
0
    def getSalience(self, point, features: List[Signal], grid: Signal, window):
        """
        Return a salience of the window following the point
        """
        score = 0
        for feature in features:
            try:
                amount = feature.getValues(point, grid.getTime(grid.getIndex(point) + window))
            except IndexError as e:
                amount = [0]  #TODO sometimes the posiiton is beyond the grid ?

            score += np.mean(amount) if len(amount) else 0
        return score / len(features) if len(features) != 0 else 0
Ejemplo n.º 8
0
    def predictOne(self, path):

        # call madmom to get beats
        fps = 100
        act = madmom.features.RNNDownBeatProcessor()(str(path))
        proc = madmom.features.DBNDownBeatTrackingProcessor(
            beats_per_bar=[3, 4],
            fps=fps,
            transition_lambda=self.parameters["transitionLambda"].value,
            correct=self.parameters["correctToActivation"].value)
        beats = proc(act)
        if len([
                beat for i, beat in enumerate(beats)
                if (i + beats[0][1] - 1) % 4 + 1 != beat[1]
        ]):
            logging.error("Beat detection skipped a beat")
        # get the tempo
        # evenGrids = quantization.separateInEvenGrids(beats[:, 0], regularityThreshold=self.parameters["snapDistance"].value)
        # longuestEvenGridIndex = np.argmax([len(grid) for grid in evenGrids])
        # tau = np.average([(evenGrid[-1] - evenGrid[0]) / (len(evenGrid) - 1) for evenGrid in evenGrids if len(evenGrid) > 1],
        #                  weights=[len(evenGrid)
        #                           for evenGrid in evenGrids if len(evenGrid) > 1]) * fps  # TODO: use only the longest portion ?
        # tempo = 60 * fps / tau
        # beatLength = tau / fps  # i.e 0.5s
        # refBeat = [beat for beat in beats if beat[0] == evenGrids[longuestEvenGridIndex][0]][0]

        # # extend the grid of beats to remove holes in it
        # trackLength = float(len(act)) / fps
        # extendedBeats = quantization.extendGrid(refBeat,
        #                                         beats,
        #                                         trackLength,
        #                                         beatLength,
        #                                         SnapDistance=self.parameters["snapDistance"].value)
        tempo = 60 / np.mean(np.diff(np.array(beats)[:, 0]))

        # Get the confidence as the mean of the activation at each GT beat. Sums the two outputs of the NN
        # beat = self._getConfidence(act, beat, fps, extendedBeats)
        beatsT = [beat[0] for beat in beats]
        downbeatsT = [beat[0] for beat in beats if beat[1] == 1]
        strongBeatsT = [
            beat[0] for beat in beats if beat[1] == 1 or beat[1] == 3
        ]
        return (Signal(np.ones(len(beatsT)), times=beatsT, sparse=True),
                Signal(np.ones(len(downbeatsT)), times=downbeatsT,
                       sparse=True),
                Signal(np.ones(len(strongBeatsT)),
                       times=strongBeatsT,
                       sparse=True), tempo)
Ejemplo n.º 9
0
    def chromagram(self, samples: Signal):
        sr = samples.sampleRate
        result = librosa.feature.chroma_stft(y=samples.values, sr=sr)
        hop_length = self.parameters["hopLength"].value
        pcp_sr = sr / hop_length

        return (Signal(result.T, sampleRate=pcp_sr), )
Ejemplo n.º 10
0
    def predictOne(self, inputFeatures: List[Signal], inputGrid: Signal):
        # for period in self.parameters["period"].value:
        period = self.parameters["period"].value
        phase = self.getPhase(period, inputFeatures, inputGrid)

        return (Signal(inputGrid.values[phase::period],
                       times=inputGrid.times[phase::period],
                       sparse=True), )
Ejemplo n.º 11
0
    def run(self, mix, boundaries):
        tracks = Rule.getTracks(mix, boundaries)
        noiseThreshold = 0.1
        silenceRatio = 0.1

        masterSignal = Signal([], times=[])
        for track in tracks:
            postFXSignal = track.applyEffects(track.getFeature("barMSE"))
            postFXSignal.times = track.getDeckTime(postFXSignal.times)
            masterSignal.addSignal(postFXSignal)

        values = masterSignal.getValues(*boundaries)
        proportion = float(len([value for value in values if value < noiseThreshold])) / len(values)
        if proportion > silenceRatio:
            return 1-proportion
        else:
            return 1
Ejemplo n.º 12
0
    def predictOne(self, peakSignals: List[Signal], grid: Signal, salienceSignals: List[Signal]):
        # Cluster the peaks to remove close outliers
        peaks = Signal.clusterSignals(peakSignals,
                                      minDistance=self.parameters["clusterDistance"].value,
                                      mergeValue=self.parameters["mergeFunction"].value)

        # Get the Salience of the following segment
        peaks, nonSalientPeaks = self.getSalientPoints(salienceSignals, grid, peaks)

        # Filter the peaks too far away from the start of the track
        peaks = self.getEarlyPeaks(peaks, grid)

        # Get the first absolute k-beats TODO: Set the selection to an "or" ? -> I don't like it so much, because we can't
        # disable the position filtering with an or
        peaks = Signal(peaks.values[:self.parameters["absoluteTop"].value],
                       times=peaks.times[:self.parameters["absoluteTop"].value],
                       sparse=True)

        return (peaks, nonSalientPeaks)
Ejemplo n.º 13
0
    def predictOne(self, path: str):

        X_cqt, X_timbre, beat_intervals = segmenter.features(path)

        boundaries, beat_intervals, labels = segmenter.lsd(
            X_cqt, X_timbre, beat_intervals, {"num_types": False})
        result = Signal(labels,
                        times=[beat_intervals[i][0] for i in boundaries[:-1]],
                        sparse=True)
        return (result, )
Ejemplo n.º 14
0
    def _getWindows(self,
                    signal: Signal,
                    grid: Signal,
                    addAnacrusis=False,
                    addAfterLastBeat=False,
                    window="square",
                    aggregation='rmse'):
        """
        Get the root mean square amplitude between each tick of the grid (in seconds).
        addAnacrusis add also the energy from the first sample in the signal to the first tick of the grid,
        and the last tick of the grid to the last sample of the signal.
        return eg [0.1,0.2,0.1,0.2,0.8,0.9,0.8,0.9]
        """

        result = []
        times = copy.copy(grid.times)
        # pan times
        panning = self.parameters["panning"].value * np.median(np.diff(times))
        times = [time - panning for time in times]
        # if addAnacrusis:
        #     times = np.insert(times, 0, 0)  # TODO make it faster by not creating a new array
        #     annacrusisValues = signal.getValues(0, times[])
        #     if len(annacrusisValues):
        #         result.append(self._getWindow(annacrusisValues, window, aggregation))
        #     else:  # If the first tick is at 0, then the anacrusis is 0, or [0 ,..., 0] if the signal is multidimensional
        #         result.append(signal.values[0] * 0.)

        for i in range(len(grid) - 1):
            result.append(
                self._getWindow(signal.getValues(times[i], times[i + 1]),
                                signal.sampleRate, window, aggregation))

        # if addAfterLastBeat:
        #     afterValues = signal.getValues(grid.times[-1], signal.duration)
        #     if len(afterValues):
        #         result.append(self._getWindow(afterValues, window, aggregation))
        #     else:
        #         result.append(signal.values[0] * 0.)
        # else:
        #     times = times[:-1]

        return Signal(result, times=grid.times[:-1])
Ejemplo n.º 15
0
    def _subdivide(self, grid, steps):
        newTimes = []
        for i in range(len(grid.times) - 1):
            newTimes = np.concatenate(
                (newTimes,
                 np.arange(grid.times[i], grid.times[i + 1],
                           (grid.times[i + 1] - grid.times[i]) / steps)))

        newTimes = np.concatenate(
            (newTimes, [grid.times[-1]]))  # TODO: clean that

        return Signal(np.ones(len(newTimes)), times=newTimes)
Ejemplo n.º 16
0
    def predictOne(self, values: Signal):
        listV = np.array(values.values)
        if self.parameters["relativeThreshold"].value:
            # compute the thrshold at x times the maximum value
            threshold = np.max(
                listV[:int(len(listV) * self.parameters["thresholdIndex"].value)]) * self.parameters["relativeThreshold"].value
            peaks, peaksValues = self.staticThreshold(listV, threshold, self.parameters["minDistance"].value)
        else:
            peaks, peaksValues = self.adaptiveThreshold(listV, L=self.parameters["medianSize"].value)

        result = Signal(peaksValues, times=[values.times[peak] for peak in peaks], sparse=True)
        return (result, )
Ejemplo n.º 17
0
 def getEarlyPeaks(self, peaks, grid):
     """
     Filter the peaks by relative distance from the start
     """
     if self.parameters["relativeDistance"].value < 1:
         earlyPeaks = [
             i for i, pos in enumerate(peaks.times) if pos <= grid.duration * self.parameters["relativeDistance"].value
         ]
         # if len(earlyPeaks) == 0:
         #     earlyPeaks = [peaks[0]]
         peaks = Signal([peaks.values[i] for i in earlyPeaks], times=[peaks.times[i] for i in earlyPeaks])
     return peaks
Ejemplo n.º 18
0
    def nietoPCP(self, samples: Signal):
        sr = samples.sampleRate
        hop_length = self.parameters["hopLength"].value
        pcp_sr = sr / hop_length

        audio_harmonic, _ = librosa.effects.hpss(samples.values)
        # I double checked, and the parameters are the one used in MSAF. 7 octave in pcp_cqt and 6 octaves in pcp
        pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic, sr=sr, hop_length=hop_length, n_bins=7 * 12, norm=np.inf,
                                            fmin=27.5))**2
        pcp = librosa.feature.chroma_cqt(C=pcp_cqt, sr=sr, hop_length=hop_length, n_octaves=6, fmin=27.5).T

        return (Signal(pcp, sampleRate=pcp_sr), )
Ejemplo n.º 19
0
 def _getRatioThresholdBoundaries(self, signal):
     onsets = Signal([
         signal[i + 1] / signal[i] if signal[i] != 0 else 10000
         for i in range(len(signal) - 1)
     ],
                     times=signal.getTimes()[1:])
     incTH = self.parameters["ratioThreshold"].value
     decTH = 1. / incTH
     return [
         i + 1 for i, ratio in enumerate(onsets)
         if ratio >= incTH or ratio <= decTH
     ], onsets
Ejemplo n.º 20
0
    def predictOne(self, values: Signal, grid: Signal):
        mean = self._rms(values)
        times = grid.times
        if self.parameters["includeBorders"].value:
            times = [0] + list(times) + [99999]
        positionTuples = [(times[i], times[i + 1])
                          for i in range(len(times) - 1)]

        result = SparseSegmentSignal([
            self._rms(values.getValues(start, stop)) > mean
            for start, stop in positionTuples
        ], [(start, stop) for start, stop in positionTuples])
        return (result, )
Ejemplo n.º 21
0
    def predictOne(self, path: str):
        # TODO: Is it possible to install both version of madmom ?
        # args = ["ls", "-l"]
        args = [
            resource_filename(__name__,
                              "../../../vendors/madmomDrumsEnv/bin/python"),
            resource_filename(
                __name__,
                "../../../vendors/madmom-0.16.dev0/bin/DrumTranscriptor"),
            "-m", self.parameters["model"].value, "single", path
        ]  # Calling python from python, Yay...
        process = subprocess.Popen(args, stdout=subprocess.PIPE)
        output = process.stdout.read().decode()

        # TODO read  stderr=subprocess.STDOUT
        # err = process.stderr.read().decode()
        # if err:
        #     log.error(err)

        result = [event.split("\t") for event in output.split("\n") if event]
        result = [
            row for row in result if len(row) == 2 and self.is_number(row[0])
            and self.is_number(row[1])
        ]
        kicks = [
            float(row[0]) for row in result if row[1] == "35" or row[1] == "0"
        ]
        snares = [
            float(row[0]) for row in result if row[1] == "38" or row[1] == "1"
        ]
        hihats = [
            float(row[0]) for row in result if row[1] == "42" or row[1] == "2"
        ]

        return (Signal(np.ones(len(kicks)), times=kicks, sparse=True),
                Signal(np.ones(len(snares)), times=snares, sparse=True),
                Signal(np.ones(len(hihats)), times=hihats, sparse=True))
Ejemplo n.º 22
0
    def getSalientPoints(self, salienceSignals, grid, peaks):
        """
        split peaks signal into two: 
        Salient points, and non-salient points
        """
        if self.parameters["salienceTreshold"].value:
            salience = [
                self.getSalience(pos, salienceSignals, grid, self.parameters["salienceWindow"].value) for pos in peaks.times
            ]
            salientPoints = [i for i, v in enumerate(salience) if v >= self.parameters["salienceTreshold"].value]
            nonSalientPoints = [i for i, v in enumerate(salience) if v < self.parameters["salienceTreshold"].value]

            # if there is no point above the threshold of salience, just return the most salient one
            if len(salientPoints) == 0 and len(salience) > 0:
                salientPoints = [np.argmax(salience)]
                nonSalientPoints = [p for p in nonSalientPoints if p not in salientPoints] 
            
            nonSalient = Signal([peaks.values[i] for i in nonSalientPoints],
                                times=[peaks.times[i] for i in nonSalientPoints],
                                sparse=True)
            peaks = Signal([peaks.values[i] for i in salientPoints], times=[peaks.times[i] for i in salientPoints])
            return peaks, nonSalient
        else:
            return peaks, Signal([], times=[])
Ejemplo n.º 23
0
    def predictOne(self, path: str):
        """
        method copied from the main file in the project
        """
        # pkg_resources.()
        # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project")
        from project.MelodyExt import feature_extraction
        from project.utils import load_model, save_model, matrix_parser
        from project.test import inference
        from project.model import seg, seg_pnn, sparse_loss
        from project.train import train_audio

        # load wav
        song = path

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model

        model = load_model(
            resource_filename(
                __name__,
                "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" +
                self.parameters["model"].value))
        batch_size_test = 10
        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=batch_size_test)

        # Output
        r = matrix_parser(extract_result)
        return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
Ejemplo n.º 24
0
Archivo: cqt.py Proyecto: jarey/Automix
    def predictOne(self, samples: Signal):
        """Calculates the cqt of the given audio using librosa.

        Args:
            samples (Signal): The samples of the audio.
            grid (list of float): The .

        Returns:
            tuple of List[float]: The cqt of the audio.

        """
        sr = samples.sampleRate
        hop_length = self.parameters["hopLength"].value
        n_bins = self.parameters["binNumber"].value
        cqt_sr = sr / hop_length
        cqt = librosa.cqt(samples.values,
                          sr=sr,
                          hop_length=hop_length,
                          n_bins=n_bins)
        linear_cqt = np.abs(cqt)

        if self.parameters["scale"].value == "Amplitude":
            result = linear_cqt
        elif self.parameters["scale"].value == "Power":
            result = linear_cqt**2
        elif self.parameters["scale"].value == "MSAF":
            result = librosa.amplitude_to_db(linear_cqt**2, ref=np.max)
            result += np.min(
                result
            ) * -1  # Inverting the db scale (don't know if this is correct)
        elif self.parameters["scale"].value == "Power dB":
            result = librosa.amplitude_to_db(
                linear_cqt,
                ref=np.max)  # Based on Librosa, standard power spectrum in dB
            result += np.min(result) * -1
        elif self.parameters["scale"].value == "Perceived dB":
            freqs = librosa.cqt_frequencies(linear_cqt.shape[0],
                                            fmin=librosa.note_to_hz('C1'))
            result = librosa.perceptual_weighting(linear_cqt**2,
                                                  freqs,
                                                  ref=np.max)
            result += np.min(result) * -1
        else:
            raise ValueError("parameterScale is not a correct value")

        return (Signal(result.T, sampleRate=cqt_sr), )
Ejemplo n.º 25
0
def findPhase(signal: Signal, grid: Signal, period: int, toleranceWindow=0):
    """
    find the phase of the signal based on it's amplitude at the grid positions and the number of peaks
    - signal: works best with a discrete signal as no aglomeration is done
    - grid: positions of the beats
    - period: the periodicity to test
    - tolerance window: if not at 0, returns the closest value in the signal to the grid, within the tolerance window
    
    test:
    # result = findPhase(Signal(np.ones(5), times=np.array([0, 4, 8, 9, 12])+1), Signal(np.ones(16), times=range(16)), period=4)
    # print(result) = 1
    """
    phases = []
    for phase in range(period):
        values = [signal.getValue(grid.times[i], toleranceWindow=0) for i in range(phase, len(grid), period)]
        phases.append((np.sum([v for v in values if v is not None]) * len(values)))

    bestPhase = np.argmax(phases)
    return bestPhase
Ejemplo n.º 26
0
def recursiveMap(obj):
    """
    recursively map all the fields of the json decoded object to class from the model
    """
    try:
        from automix.model.classes.signal import Signal, SparseSignal, SparseSegmentSignal
        if isinstance(obj, dict):
            if u'type' in obj and (obj[u"type"] == str(Signal) or obj[u"type"] == str(SparseSignal)):
                obj = Signal.jsonDeserialize(obj)
            elif u'type' in obj and obj[u"type"] == str(SparseSegmentSignal):
                obj = SparseSegmentSignal.jsonDeserialize(obj)
            else:
                for key, value in obj.items():
                    obj[key] = recursiveMap(obj[key])
        elif isinstance(obj, list):
            for key, value in enumerate(obj):
                obj[key] = recursiveMap(obj[key])
    except Exception:
        pass

    return obj
Ejemplo n.º 27
0
    def findPhaseLocal(self,
                       period: int,
                       signal: Signal,
                       grid: Signal,
                       toleranceWindow=0.1):
        """
        find the phase of the signal based on it's amplitude at the grid positions and the number of peaks
        - signal: works best with a discrete signal as no aglomeration is done
        - grid: positions of the beats
        - period: the periodicity to test
        - tolerance window: if not at 0, returns the closest value in the signal to the grid, within the tolerance window

        test:
        # result = findPhase(Signal(np.ones(5), times=np.array([0, 4, 8, 9, 12])+1), Signal(np.ones(16), times=range(16)), 
            period=4)
        # print(result) = 1
        """
        phases = []
        for phase in range(period):
            values = [
                signal.getValue(grid.times[i], toleranceWindow=toleranceWindow)
                for i in range(phase, len(grid), period)
            ]
            values = [v for v in values if v is not None]
            if self.parameters["distanceMetric"].value == "RMS":
                value = np.sqrt(np.mean(np.array(values)**2))
            elif self.parameters["distanceMetric"].value == "sum":
                value = np.sum(values)
            elif self.parameters["distanceMetric"].value == "Veire":
                value = np.sum(values) * len(values)
            else:
                raise Exception("Bad distance metric parameter" +
                                self.parameters["distanceMetric"].value)
            phases.append(value)

        # bestPhase = np.argmax(phases)
        return phases
Ejemplo n.º 28
0
 def predictOne(self, path):
     y, sr = librosa.load(path, sr=self.parameters["sampleRate"].value)
     return (Signal(y, sampleRate=sr), )
Ejemplo n.º 29
0
    def predictOne(self, samples: Signal):
        """
        """
        y, sr = samples.values, samples.sampleRate

        # And compute the spectrogram magnitude and phase
        S_full, phase = librosa.magphase(librosa.stft(y))

        hopLength = 2048 / 4
        newSampleRate = sr / hopLength

        # We'll compare frames using cosine similarity, and aggregate similar frames
        # by taking their (per-frequency) median value.
        #
        # To avoid being biased by local continuity, we constrain similar frames to be
        # separated by at least 2 seconds.
        #
        # This suppresses sparse/non-repetetitive deviations from the average spectrum,
        # and works well to discard vocal elements.
        S_filter = librosa.decompose.nn_filter(
            S_full,
            aggregate=np.median,
            metric='cosine',
            width=int(librosa.time_to_frames(2, sr=sr)))

        # The output of the filter shouldn't be greater than the input
        # if we assume signals are additive.  Taking the pointwise minimium
        # with the input spectrum forces this.
        S_filter = np.minimum(S_full, S_filter)

        # We can also use a margin to reduce bleed between the vocals and instrumentation masks.
        # Note: the margins need not be equal for foreground and background separation
        margin_i, margin_v = 2, 10
        power = 2

        mask_i = librosa.util.softmask(S_filter,
                                       margin_i * (S_full - S_filter),
                                       power=power)

        mask_v = librosa.util.softmask(S_full - S_filter,
                                       margin_v * S_filter,
                                       power=power)

        # Once we have the masks, simply multiply them with the input spectrum
        # to separate the components
        S_foreground = mask_v * S_full
        S_background = mask_i * S_full

        # # sphinx_gallery_thumbnail_number = 2
        # idx = slice(*librosa.time_to_frames([30, 35], sr=sr))
        # plt.figure(figsize=(12, 8))
        # plt.subplot(3, 1, 1)
        # librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max),
        #                         y_axis='log', sr=sr)
        # plt.title('Full spectrum')
        # plt.colorbar()

        # plt.subplot(3, 1, 2)
        # librosa.display.specshow(librosa.amplitude_to_db(S_background[:, idx], ref=np.max),
        #                         y_axis='log', sr=sr)
        # plt.title('Background')
        # plt.colorbar()
        # plt.subplot(3, 1, 3)
        # librosa.display.specshow(librosa.amplitude_to_db(S_foreground[:, idx], ref=np.max),
        #                         y_axis='log', x_axis='time', sr=sr)
        # plt.title('Foreground')
        # plt.colorbar()
        # plt.tight_layout()
        # plt.show()

        return (Signal(S_foreground.T, sampleRate=newSampleRate),
                Signal(S_background.T, sampleRate=newSampleRate))
Ejemplo n.º 30
0
    def predictOne(self, samples: Signal) -> Signal:
        """
        TODO
        """
        # Structural Features params
        # Mp = self.parameters["Mp_adaptive"].value  # Size of the adaptive threshold for
        # peak picking
        # od = self.parameters["offset_thres"].value  # Offset coefficient for adaptive
        # thresholding

        M = self.parameters[
            "M_gaussian"].value  # Size of gaussian kernel in beats
        m = self.parameters[
            "m_embedded"].value  # Number of embedded dimensions
        k = self.parameters["k_nearest"].value  # k*N-nearest neighbors for the
        # recurrence plot

        # Preprocess to obtain features, times, and input boundary indeces
        F = np.array(samples.values)
        if F.ndim == 1:
            F = np.array([F]).T

        if len(F.shape) == 2:
            F = np.concatenate((np.zeros(
                (m // 2, F.shape[1])), F, np.zeros((m // 2, F.shape[1]))))
        else:
            F = np.concatenate((np.zeros(m // 2), F, np.zeros(m // 2)))
        # Normalize
        # F_norm = Normalize().predictOne(F)
        # F = U.normalize(F, norm_type=self.parameters["bound_norm_feats"].value)

        # Check size in case the track is too short
        if F.shape[0] > 20:

            # if self.framesync: # Whether to use frame-synchronous or beat-synchronous features.
            #     red = 0.1
            #     F_copy = np.copy(F)
            #     F = librosa.util.utils.sync(F.T, np.linspace(0, F.shape[0], num=F.shape[0] * red), pad=False).T

            # Emedding the feature space (i.e. shingle)
            # E[i] = F[i]+F[i+1]+F[i+2]
            E = embedded_space(F, m)
            # plt.imshow(E.T, interpolation="nearest", aspect="auto"); plt.show()

            # Recurrence matrix
            R = librosa.segment.recurrence_matrix(
                E.T,
                k=k * int(F.shape[0]),
                width=1,  # zeros from the diagonal
                metric="euclidean",
                sym=True).astype(np.float32)

            # Circular shift
            L = circular_shift(R)

            # Obtain structural features by filtering the lag matrix
            SF = gaussian_filter(L.T, M=M, axis=1)
            SF = gaussian_filter(L.T, M=1, axis=0)

            # Compute the novelty curve
            nc = compute_nc(SF)
            nc = nc[m // 2:-m // 2]
            times = samples.times[:-m]
            return (Signal(nc, times=times), )
        else:
            return (None, )