Python MonoLoaderの例、essentia.standard.MonoLoader Pythonの例

コード例 #1

0

ファイルを表示

def eval_hpss(
    harm_estimates, harm_references, perc_estimates, perc_references, n_segs, seg_len
):
    total = {}
    total["harmonic_bss"] = {}
    total["percussive_bss"] = {}

    for algo_name in perc_estimates.keys():
        print()
        print("\tEVALUATING ALGO {0}".format(algo_name))

        bss_results = numpy.zeros(dtype=numpy.float32, shape=(n_segs, 4, 2))

        n_seg = 0
        for track_prefix in perc_estimates[algo_name].keys():
            if n_seg >= n_segs:
                break
            cum_est_per_algo = numpy.zeros(dtype=numpy.float32, shape=(2, seg_len, 1))
            cum_ref_per_algo = numpy.zeros(dtype=numpy.float32, shape=(2, seg_len, 1))

            harm_ref = harm_references[track_prefix]
            harm_est = harm_estimates[algo_name][track_prefix]
            loaded_harm_ref = MonoLoader(filename=harm_ref)().reshape(seg_len, 1)
            loaded_harm_est = MonoLoader(filename=harm_est)().reshape(seg_len, 1)

            cum_est_per_algo[0] = loaded_harm_est
            cum_ref_per_algo[0] = loaded_harm_ref

            perc_ref = perc_references[track_prefix]
            perc_est = perc_estimates[algo_name][track_prefix]
            loaded_perc_ref = MonoLoader(filename=perc_ref)().reshape(seg_len, 1)
            loaded_perc_est = MonoLoader(filename=perc_est)().reshape(seg_len, 1)

            cum_est_per_algo[1] = loaded_perc_est
            cum_ref_per_algo[1] = loaded_perc_ref

            bss_metrics_segs = evaluate(cum_ref_per_algo, cum_est_per_algo)
            bss_metrics = numpy.nanmedian(bss_metrics_segs, axis=2)
            bss_results[n_seg][:] = numpy.asarray(bss_metrics)
            n_seg += 1

        total["harmonic_bss"][algo_name] = {}
        total["percussive_bss"][algo_name] = {}

        harm_bss = numpy.nanmedian(bss_results[:, :, 0], axis=0)

        for i, bss_metric_name in enumerate(bss_metric_names):
            total["harmonic_bss"][algo_name][bss_metric_name] = float(harm_bss[i])

        perc_bss = numpy.nanmedian(bss_results[:, :, 1], axis=0)

        for i, bss_metric_name in enumerate(bss_metric_names):
            total["percussive_bss"][algo_name][bss_metric_name] = float(perc_bss[i])

    return total

コード例 #2

0

ファイルを表示

ファイル: Preprocessing.py プロジェクト: jdavibedoya/SE_Wave-U-Net

def VCTK(model_config):
    print("Preprocessing VCTK dataset")
    VCTK_path = model_config["raw_data_path"] + "/VCTK"
    VCTK_preprocessed_path = model_config[
        "preprocessed_data_path"] + "/VCTK_8k_DBE"

    clean_dirs = ["/clean_trainset_wav", "/clean_testset_wav"]
    noisy_dirs = ["/noisy_trainset_wav", "/noisy_testset_wav"]

    # copy clean dirs
    for clean_dir in clean_dirs:
        shutil.copytree(VCTK_path + clean_dir,
                        VCTK_preprocessed_path + clean_dir)

    # create dirs
    for noisy_dir in noisy_dirs:
        noisy8k_dir = VCTK_preprocessed_path + noisy_dir.replace(
            "noisy", "noisy8k")
        os.makedirs(noisy8k_dir)

        # preprocessing
        for root, dirs, files in os.walk(VCTK_path + noisy_dir):
            for file in files:
                if file.endswith('.wav'):
                    # read audio
                    file_name = os.path.join(root, file)
                    noisy = MonoLoader(filename=file_name, sampleRate=44100)()
                    noisy8k = MonoLoader(filename=file_name, sampleRate=8000)()

                    # resample audio
                    noisy8k_resampled = Resample(
                        inputSampleRate=8000, outputSampleRate=44100)(noisy8k)

                    # lengths
                    len_noisy = len(noisy)
                    len_noisy8k_resampled = len(noisy8k_resampled)

                    # trimming/appending
                    len_diff = len_noisy8k_resampled - len_noisy
                    if len_diff > 0:
                        noisy8k_resampled = noisy8k_resampled[:len_noisy]
                    elif len_diff < 0:
                        noisy8k_resampled = np.pad(noisy8k_resampled,
                                                   (0, abs(len_diff)),
                                                   'constant',
                                                   constant_values=(0, 0))

                    # write audio
                    output_name = noisy8k_dir + "/" + file.split(
                        ".")[0] + "_8k.wav"
                    MonoWriter(filename=output_name,
                               sampleRate=44100)(noisy8k_resampled)

コード例 #3

0

ファイルを表示

def thresholdAudio(audioPath='testDownload/', t=-30, fs=44100):
    '''
    Run thresholdAudio for trim all de mp3 audio files in a audioPath/../.. where the first .. is related
    with the queryText and the second .. is related with the location of freesound track.
    It was thougt to use after soundDownload.py from sms-tool package at freesound source.

    :param audioPath: path where sounds where download (possible path used for soundDownload.py). Default: testDownload/
    :param t: threshold (in dB) to trim audiofiles related to max value of file. Default: -30
    :param fs: fs for the output sound. Default: 44100
    :return: print(Done!!)
    '''

    thTimes = 10 ** (t/20)                                          #threshold: dB to times
    instrument = ls(audioPath)                                      #read the queryText path inside the given path
    audioTrack = [ls(str(key)) for key in instrument]               #read the different folder inside each queryText path
    a, b = np.shape(audioTrack)                                     #size of the matrix
    finalArray = [os.path.join(str(audioTrack[i][j]), arch.name)
                  for i in np.arange(a) for j in np.arange(b) for arch in
                  Path(str(audioTrack[i][j])).iterdir()
                  if arch.name.endswith('.mp3')]                    #array for each track

    for key in finalArray:
        track = MonoLoader(filename=key, sampleRate=fs)()           #read audio and transform into mono
        maximo = np.max(abs(track))                                 #set the abs maximum
        i = 0
        j = -1
        while abs(track[i]) < maximo * thTimes:                     #find the first significant value
            i += 1
        while abs(track[j]) < maximo * thTimes:                     #find the last significant value
            j -= 1
        shortTrack = track[i:j]                                     #build the trimed track
        MonoWriter(filename=key + 'computed.wav')(shortTrack)       #write the file at same location of given
    print('Done!!')

コード例 #4

0

ファイルを表示

ファイル: test_onsetdetectionglobal.py プロジェクト: vishalbelsare/essentia

    def testRegression(self):
        audio = MonoLoader(
            filename=join(testdata.audio_dir, 'recorded', 'techno_loop.wav'))()

        onsetdetectionglobal_infogain = stdOnsetDetectionGlobal(
            method='infogain')
        onsetdetectionglobal_beat_emphasis = stdOnsetDetectionGlobal(
            method='beat_emphasis')
        calculated_beat_emphasis = onsetdetectionglobal_infogain(
            audio).tolist()
        calculated_infogain = onsetdetectionglobal_beat_emphasis(
            audio).tolist()
        """
        This code stores reference values in a file for later loading.
        save('input_infogain.npy', calculated_beat_emphasis)
        save('input_beat_emphasis.npy', calculated_infogain)             
        """

        # Reference samples are loaded as expected values
        onsetdetectionglobal_infogain = load(
            join(filedir(), 'onsetdetectionglobal/infogain.npy'))
        onsetdetectionglobal_beat_emphasis = load(
            join(filedir(), 'onsetdetectionglobal/beat_emphasis.npy'))
        expected_infogain = onsetdetectionglobal_infogain.tolist()
        expected_beat_emphasis = onsetdetectionglobal_beat_emphasis.tolist()

        self.assertAlmostEqualVectorFixedPrecision(calculated_beat_emphasis,
                                                   expected_beat_emphasis, 2)
        self.assertAlmostEqualVectorFixedPrecision(calculated_infogain,
                                                   expected_infogain, 2)

コード例 #5

0

ファイルを表示

    def testRegression(self):
        audio = MonoLoader(
            filename=join(testdata.audio_dir, 'recorded', 'techno_loop.wav'))()
        expectedEstimate = 125
        estimate = PercivalBpmEstimator()(audio)

        # Tolerance tuned to 0.1 based on emperical test resulting in BPM = 125.28
        self.assertAlmostEqual(expectedEstimate, estimate, 0.1)
        # prints 125.28408813476562

        # Define Markers for significant meaningful subsections
        # to give proportional relationship with audio length.
        # Similart strategy used for  LoopBpmEstimator()-
        len90 = int(0.9 * len(audio))  # End point for 90% of loop
        len75 = int(0.75 * len(audio))  # 75% point
        len50 = int(0.5 * len(audio))  # mid point

        # If any future changes break these asserts,
        # then this will indicates something in algorithm has changed.
        expectedEstimate = 124.9
        estimate = PercivalBpmEstimator()(audio[0:len90])
        self.assertAlmostEqual(expectedEstimate, estimate, 0.1)
        # prints 124.90558624267578
        estimate = PercivalBpmEstimator()(audio[5000:len75])
        self.assertAlmostEqual(expectedEstimate, estimate, 0.1)
        # prints 124.90558624267578
        estimate = PercivalBpmEstimator()(audio[0:len50])
        self.assertAlmostEqual(expectedEstimate, estimate, 0.1)

コード例 #6

0

ファイルを表示

def spectral_features(filelist):
    """
    Given a list of files, retrieve them, analyse the first 100mS of each file and return
    a feature table.
    """
    number_of_files = len(filelist)
    number_of_features = 5
    features = np.zeros([number_of_files, number_of_features])
    sample_rate = 44100

    for file_index, url in enumerate(filelist):
        print url
        urllib.urlretrieve(url, filename='/tmp/localfile.wav')
        audio = MonoLoader(filename = '/tmp/localfile.wav', sampleRate = sample_rate)()
        zcr = ZeroCrossingRate()
        hamming_window = Windowing(type = 'hamming') # we need to window the frame to avoid FFT artifacts.
        spectrum = Spectrum()
        central_moments = CentralMoments()
        distributionshape = DistributionShape()
        spectral_centroid = Centroid()

        frame_size = int(round(0.100 * sample_rate))   # 100ms
        # Only do the first frame for now.
        # TODO we should generate values for the entire file, probably by averaging the features.
        current_frame = audio[0 : frame_size]
        features[file_index, 0] = zcr(current_frame)
        spectral_magnitude = spectrum(hamming_window(current_frame))
        centroid = spectral_centroid(spectral_magnitude)
        spectral_moments = distributionshape(central_moments(spectral_magnitude))
        features[file_index, 1] = centroid
        features[file_index, 2:5] = spectral_moments
    return features

コード例 #7

0

ファイルを表示

ファイル: extract_feats.py プロジェクト: gary109/ddc

def extract_mel_feats(audio_fp,
                      analyzers,
                      fs=44100.0,
                      nhop=512,
                      nffts=[1024, 2048, 4096],
                      log_scale=True):
    # Extract features
    loader = MonoLoader(filename=audio_fp, sampleRate=fs)
    samples = loader()
    feat_channels = []
    for nfft, (window, spectrum, mel) in zip(nffts, analyzers):
        feats = []
        for frame in FrameGenerator(samples, nfft, nhop):
            frame_feats = mel(spectrum(window(frame)))
            feats.append(frame_feats)
        feat_channels.append(feats)

    # Transpose to move channels to axis 2 instead of axis 0
    feat_channels = np.transpose(np.stack(feat_channels), (1, 2, 0))

    # Apply numerically-stable log-scaling
    # Value 1e-16 comes from inspecting histogram of raw values and picking some epsilon >2 std dev left of mean
    if log_scale:
        feat_channels = np.log(feat_channels + 1e-16)

    return feat_channels

コード例 #8

0

ファイルを表示

def file_to_hpcp(filename):
    audio = MonoLoader(filename=filename)()
    windowing = Windowing(type='blackmanharris62')
    spectrum = Spectrum()
    spectral_peaks = SpectralPeaks(orderBy='magnitude',
                                   magnitudeThreshold=0.001,
                                   maxPeaks=20,
                                   minFrequency=20,
                                   maxFrequency=8000)
    hpcp = HPCP(maxFrequency=8000)  # ,
    # normalized='unitSum') #VERIFICAR QUE ISTO E O Q FAZ SENTIDO FAZER

    spec_group = []
    hpcp_group = []

    for frame in FrameGenerator(audio, frameSize=1024, hopSize=512):
        windowed = windowing(frame)
        fft = spectrum(windowed)
        frequencies, magnitudes = spectral_peaks(fft)
        final_hpcp = hpcp(frequencies, magnitudes)

        spec_group.append(fft)
        hpcp_group.append(final_hpcp)

    mean_hpcp = np.mean(np.array(hpcp_group).T, axis=1)
    return mean_hpcp

コード例 #9

0

ファイルを表示

ファイル: libod.py プロジェクト: siyarvurucu/SAAT

def hfc(filename):
    audio = MonoLoader(filename=filename, sampleRate=44100)()
    features = []
    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
        mag, phase =CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))
        features.append(OnsetDetection(method='hfc')(mag, phase))
    return Onsets()(array([features]),[1])

コード例 #10

0

ファイルを表示

    def testRegression(self):
        audio = MonoLoader(
            filename=join(testdata.audio_dir, 'recorded', 'techno_loop.wav'))()
        rhythm = stdRhythmExtractor()

        bpm, _, _, _ = rhythm(audio)
        self.assertAlmostEqualFixedPrecision(bpm, 126,
                                             0)  # exact value= 125.726791382

コード例 #11

0

ファイルを表示

ファイル: libod.py プロジェクト: siyarvurucu/SAAT

def noveltycurve(filename):
    audio = MonoLoader(filename=filename, sampleRate=44100)()
    band_energy = []
    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
        mag, phase, = CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))
        band_energy.append(FrequencyBands()(mag))
    novelty = NoveltyCurve()(band_energy)
    return Onsets()(np.array([novelty]),[1])

コード例 #12

0

ファイルを表示

def is_single_event(audiofile, max_duration=7):
    '''
    Estimate if the audio signal contains one single event using the 'estimate_number_of_events'
    function above. We store the result of 'estimate_number_of_events' in a global variable so
    it can be reused in the different calls of 'is_single_event'.
    '''
    global _is_single_event_cache
    if _is_single_event_cache is None:
        sample_rate = 44100
        audio_file = MonoLoader(filename=audiofile, sampleRate=sample_rate)
        audio = audio_file.compute()
        if len(audio)/sample_rate > max_duration:
            # If file is longer than max duration, we don't consider it to be single event
            _is_single_event_cache = False
        else:
            _is_single_event_cache = estimate_number_of_events(audiofile, audio, sample_rate=sample_rate) == 1
    return _is_single_event_cache

コード例 #13

0

ファイルを表示

    def convert_to_wav(audiofile, samplerate=44100):
        logger.debug('{0}: converting to WAV'.format(audiofile))

        # Convert to WAV using Essentia so that timbral models always read WAV file
        output_filename = '/tmp/{0}-converted.wav'.format(str(uuid.uuid4()))
        audio = MonoLoader(filename=audiofile, sampleRate=samplerate)()
        MonoWriter(filename=output_filename, format='wav', sampleRate=samplerate)(audio)
        return output_filename

コード例 #14

0

ファイルを表示

    def estimate(cls, audio):
        if audio.is_mono():
            signal = MonoLoader(filename=audio.filename)()
        else:
            signal = StereoLoader(filename=audio.filename)()

        extractor = RhythmExtractor2013(method="multifeature")
        bpm, *_ = extractor(signal)
        return bpm

コード例 #15

0

ファイルを表示

ファイル: preprocess.py プロジェクト: kiminh/self-attention-music-tagging

 def get_melspectrogram(self, fn):
     loader = MonoLoader(filename=fn, sampleRate=self.fs)
     x = loader()
     return x, librosa.core.amplitude_to_db(
         librosa.feature.melspectrogram(x,
                                        sr=self.fs,
                                        n_fft=self.window,
                                        hop_length=self.hop,
                                        n_mels=self.mel))

コード例 #16

0

ファイルを表示

    def load(self, fname):
        '''
        Load audio file
        '''
        loader = MonoLoader(filename=fname)

        self.audio = loader()

        self.title = fname.split('/')[-1].replace('.mp3', '')

        return

コード例 #17

0

ファイルを表示

 def plot(self):
     self.discard()
     inputFile = self.filelocation.get()
     self.wf = wave.open(inputFile, 'rb')
     self.featurename.set("Audio")
     self.audio = MonoLoader(filename=inputFile, sampleRate=44100)()
     self.showfeature()
     self.cursor.set_xdata(0)
     self.canvaswidget.focus_set()
     if (self.ALoad.get() == 1):
         self.loadAnnotations()

コード例 #18

0

ファイルを表示

    def testSilentEdge(self):
        audio = MonoLoader(filename=join(testdata.audio_dir, 'recorded', 'techno_loop.wav'))()
        bpmEstimate = 125
        lenSilence = 30000 # N.B The beat period is 21168 samples for 125 bpm @ 44.1k samp. rate
        silentAudio = zeros(lenSilence)
        benchmarkConfidence = 0.96 # This figure was arrived at emperically from the min. confidence observed with test runs 

        # Test addition of non-musical silence before the loop starts
        # The length is not a beat period,
        # Nonetheless, we can stillreliably estimate the starting point because it is a hard transient.
        signal1 = numpy.append(silentAudio, audio)
        confidence = LoopBpmConfidence()(signal1, bpmEstimate)
        self.assertGreater(confidence, benchmarkConfidence)

コード例 #19

0

ファイルを表示

ファイル: Evaluate.py プロジェクト: jdavibedoya/SE_Wave-U-Net

def produce_estimate(model_config, model_path, input_path, output_path):
    print("Producing estimate for file " + input_path)

    # Read audio
    audio = MonoLoader(filename=input_path,
                       sampleRate=model_config['expected_sr'])()
    audio_8k = MonoLoader(filename=input_path, sampleRate=8000)()

    # Resample audio
    audio_nb = Resample(inputSampleRate=8000, outputSampleRate=44100)(audio_8k)

    # Lengths
    len_audio = len(audio)
    len_audio_nb = len(audio_nb)

    # Trimming/appending
    len_diff = len_audio_nb - len_audio
    if len_diff > 0:
        audio_nb = audio_nb[:len_audio]
    elif len_diff < 0:
        audio_nb = np.pad(audio_nb, (0, abs(len_diff)),
                          'constant',
                          constant_values=(0, 0))

    # Prediction
    audio_nb = np.expand_dims(audio_nb, axis=0).T  #(n_frames, n_channels)
    prediction_audio = predict(audio_nb, model_config,
                               model_path)  # Get estimate
    prediction_file_name = os.path.join(
        output_path,
        input_path.split("/")[-1]) + "_prediction.wav"

    # Save estimate as audio file
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    librosa.output.write_wav(prediction_file_name, prediction_audio,
                             model_config['expected_sr'])

コード例 #20

0

ファイルを表示

ファイル: analyze.py プロジェクト: trostli/ac-audio-extractor

def is_single_event(audiofile, max_duration=7):
    '''
    Estimate if the audio signal contains one single event using the 'estimate_number_of_events'
    function above. We store the result of 'estimate_number_of_events' in a global variable so
    it can be reused in the different calls of 'is_single_event'.
    '''
    global _is_single_event_cache
    if _is_single_event_cache is None:
        sample_rate = 44100
        try:
            audio_file = MonoLoader(filename=audiofile, sampleRate=sample_rate)
        except RuntimeError as e:
            if MORE_THAN_2_CHANNELS_EXCEPTION_MATCH_TEXT in str(e):
                converted_audiofile = convert_to_wav(audiofile)
                audio_file = MonoLoader(filename=converted_audiofile,
                                        sampleRate=sample_rate)
        audio = audio_file.compute()
        if len(audio) / sample_rate > max_duration:
            # If file is longer than max duration, we don't consider it to be single event
            _is_single_event_cache = False
        else:
            _is_single_event_cache = estimate_number_of_events(
                audiofile, audio, sample_rate=sample_rate) == 1
    return _is_single_event_cache

コード例 #21

0

ファイルを表示

def split_audio(audio_path, split_list):
    audio = MonoLoader(filename=audio_path)()
    start = 0
    res = []
    #split list es una secuencia de segundos. cada elemento debe ser mayor que el anterior
    for i in split_list:
        # end = int(i * 44100) # porque es la frecuencia con la que se muestrea el audio
        end = int(i) + start
        # end = int(i)
        if end > audio.size or end <= start:
            return res, audio
        res.append(audio[start:end])
        start = end
    return res, audio

# segmentate("/home/migue/sonidos animales")

コード例 #22

0

ファイルを表示

def lowLevel(songName):
    global dataset
    global lock
    print songName
    #REMOVE ; AND , FROM SONGNAMES
    key = re.sub(r',', "", songName.split('/')[-1])
    key = re.sub(r';', "", key)
    #DONT HAVE TO EXTRACT IF IT IS ALREADY EXTRACTED
    if key in dataset.keys():
        feature = dataset[key]
        return feature
    else:
        loader = MonoLoader(filename=songName)
        audio = loader()
        extractor = LowLevelSpectralEqloudExtractor()
        feature = list(extractor(audio))
        del feature[1]
        del feature[1]
        extractor = LowLevelSpectralExtractor()
        featureTwo = list(extractor(audio))
        del featureTwo[0]
        del featureTwo[-2]
        featureTwo[4] = feature[4][1]
        feature.extend(featureTwo)
        extractor = Loudness()
        feature.append(extractor(audio))
        extractor = LogAttackTime()
        feature.append(extractor(audio)[0])
        extractor = KeyExtractor()
        feature.append(extractor(audio)[2])
        extractor = RhythmExtractor2013()
        data = extractor(audio)
        feature.append(data[0])
        feature.append(data[2])
        for x in range(len(feature)):
            if type(feature[x]) is np.ndarray:
                #feature[x] = avg(feature[x])
                mean, std = stdDev(feature[x])
                feature[x] = mean
                feature.append(std)
        arr = key + "," + str(feature)[1:-1] + "\n"
        f = open('data.csv', 'a')
        lock.acquire()
        f.write(arr)
        lock.release()
        f.close()
        return feature

コード例 #23

0

ファイルを表示

def rms_centroids(filename, frameSize=1024, hopSize=512, sampleRate=44100):
    # load our audio into an array
    audio = MonoLoader(filename=filename, sampleRate=44100)()

    # create the pool and the necessary algorithms
    w = Windowing()
    spec = Spectrum()
    rms = RMS()
    centroid = Centroid(range=int(sampleRate / 2))
    cs = []
    rmss = []
    # compute the centroid for all frames in our audio and add it to the pool
    for frame in FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize):
        sf = spec(w(frame))
        cs.append(centroid(sf))
        rmss.append(rms(sf))
    return np.array(rmss), np.array(cs)

コード例 #24

0

ファイルを表示

    def testRegressionTechnoloop(self):
        audio = MonoLoader(
            filename=join(testdata.audio_dir, 'recorded', 'techno_loop.wav'))()

        # This test case will use peak parameters slighlt ifferent from default from recording techno_loop.wav
        onsets = SuperFluxExtractor(combine=20,
                                    frameSize=2048,
                                    hopSize=256,
                                    ratioThreshold=8,
                                    sampleRate=44100,
                                    threshold=0.25)(audio)

        # This commented out code was used to obtain reference samples for storing in a file.
        # save('superfluxtechno', onsets)

        # Reference samples are loaded as expected values
        expected_superflux = load(
            join(filedir(), 'superflux/superfluxtechno.npy'))
        self.assertAlmostEqualVector(onsets, expected_superflux, 1e-5)

コード例 #25

0

ファイルを表示

    def testRegressionDubstep(self):
        audio = MonoLoader(
            filename=join(testdata.audio_dir, 'recorded', 'dubstep.wav'))()

        # This test case will use the documented default parameters from recording dubstep.wav
        onsets = SuperFluxExtractor(combine=30,
                                    frameSize=2048,
                                    hopSize=256,
                                    ratioThreshold=16,
                                    sampleRate=44100,
                                    threshold=0.5)(audio)

        # This commented out code was used to obtain reference samples for storing in a file.
        # save('superfluxdub', onsets)

        # Reference samples are loaded as expected values
        expected_superflux = load(join(filedir(),
                                       'superflux/superfluxdub.npy'))
        self.assertAlmostEqualVector(onsets, expected_superflux, 1e-5)

コード例 #26

0

ファイルを表示

def get_mono_loaded_song(song_path: str):
    """Loads the file given at the path and returns the raw audio data

    Parameters
    ----------
    song_path : str
        The file path of the song

    Returns
    -------
    vector_real
        The file's audio downmixed to mono

    """

    path = get_absolute_path(song_path)
    loader = MonoLoader(filename=path)

    return loader()

コード例 #27

0

ファイルを表示

ファイル: main.py プロジェクト: SwarajKR/MIR

def lowLevel(songName):
    global dataset
    global lock
    print songName
    key = re.sub(r',', "", songName.split('/')[-1])
    #IF already present in dataset dont extract
    if dataset.has_key(key):
        feature = dataset[key]
        return feature
    else:
        #Loading song and using Extractors
        loader = MonoLoader(filename=songName)
        audio = loader()
        extractor = LowLevelSpectralEqloudExtractor()
        feature = list(extractor(audio))
        del feature[1]
        del feature[1]
        extractor = LowLevelSpectralExtractor()
        featureTwo = list(extractor(audio))
        del featureTwo[0]
        del featureTwo[-2]
        featureTwo[4] = feature[4][1]
        feature.extend(featureTwo)
        extractor = Loudness()
        feature.append(extractor(audio))
        extractor = LogAttackTime()
        feature.append(extractor(audio)[0])
        extractor = KeyExtractor()
        feature.append(extractor(audio)[2])
        extractor = RhythmExtractor2013()
        data = extractor(audio)
        feature.append(data[0])
        feature.append(data[2])
        for x in range(len(feature)):
            if type(feature[x]) is np.ndarray:
                feature[x] = avg(feature[x])
        arr = key + "," + str(feature)[1:-1] + "\n"
        f = open('data.csv', 'a')
        lock.acquire()
        f.write(arr)
        lock.release()
        f.close()
        return feature

コード例 #28

0

ファイルを表示

def hcdf(filename):
    audio = MonoLoader(filename=filename)()
    windowing = Windowing(type='hann')

    for frame in FrameGenerator(audio, frameSize=32768, hopSize=4096):
        windowed = windowing(frame)
        print('window', windowed)
        # ConstantQ transform
        # constant_q = ConstantQ(binsPerOctave=36, minFrequency=110, maxFrequency=3520, sampleRate=11025)
        # kk = constant_q(windowed)
        # 12 bin tunned Chromagram
        # pedirle al ruso que lo ponga
        chroma = Chromagram(numberBins=12,
                            binsPerOctave=36,
                            minFrequency=110,
                            windowType='hann')  # maxFrequency=3520

        pitch_class_vectors = chroma(frame)
        print('pitch_class_vectors', pitch_class_vectors)

コード例 #29

0

ファイルを表示

    def testRegression(self):
        # Regression tests on calculation of the positions and Peaks.
        inputSize = 21168  # N.B The beat period is 21168 samples for 125 bpm @ 44.1k samp. rate
        audio = MonoLoader(
            filename=join(testdata.audio_dir, 'recorded', 'techno_loop.wav'))()

        # Calculates the positions and Peaks
        pdetect = PeakDetection()

        # Calculates the OSS
        fc = FrameCutter(frameSize=inputSize, hopSize=inputSize)
        windower = Windowing(type='blackmanharris62')
        specAlg = Spectrum(size=4096)
        fluxAlg = Flux()

        # Calculate the average flux over all frames of audio
        frame = fc(audio)
        fluxArray = []

        for frame in FrameGenerator(audio,
                                    frameSize=inputSize,
                                    hopSize=inputSize):
            spectrum = specAlg(windower(frame))
            fluxArray.append(fluxAlg(spectrum))
            frame = fc(audio)
        filteredSignal = LowPass(cutoffFrequency=8000)(fluxArray)

        # Calculate PercivalEvaluatePulseTrains on fluxArray
        aSignal = AutoCorrelation()(fluxArray)
        pHarm = PercivalEnhanceHarmonics()(aSignal)
        oss, posis = pdetect(pHarm)
        lag = PercivalEvaluatePulseTrains()(fluxArray, posis)
        # Based on previous observations with fluxArray output originating from techno_loop
        self.assertEqual(8.0, lag)

        # Calculate PercivalEvaluatePulseTrains on filtered fluxArray
        aSignal = AutoCorrelation()(filteredSignal)
        pHarm = PercivalEnhanceHarmonics()(aSignal)
        oss, posis = pdetect(pHarm)
        lag = PercivalEvaluatePulseTrains()(filteredSignal, posis)
        # Based on previous observations with ffiltered luxArray output originating from techno_loop
        self.assertEqual(7.0, lag)

コード例 #30

0

ファイルを表示

    def testExactAudioLengthMatch(self):
        audio = MonoLoader(filename=join(testdata.audio_dir, 'recorded', 'techno_loop.wav'))()
        bpmEstimate = 125
        beatPeriod = 21168 # N.B The beat period is 21168 samples for 125 bpm @ 44.1k samp. rate
        silentAudio = zeros(beatPeriod)

        # Add non-musical silence to the beginning of the audio
        signal1 = numpy.append(silentAudio, audio)
        confidence = LoopBpmConfidence()(signal1, bpmEstimate)
        self.assertEquals(confidence, 1.0)

        # Add non-musical silence to the end of the audio
        signal2 = numpy.append(audio, silentAudio)
        confidence = LoopBpmConfidence()(signal2, bpmEstimate)
        self.assertEquals(confidence, 1.0)

        # Concatenate silence at both ends
        signal3 = numpy.append(signal1, silentAudio)
        confidence = LoopBpmConfidence()(signal3, bpmEstimate)
        self.assertEquals(confidence, 1.0)