def shortTermAnalyses(sound_type, filename, patient_name):
    fs, signal = wavfile.read(filename)
    window.refresh()
    if sound_type == 'speech':
        s = audioSegmentation.silence_removal(signal, fs, 0.5, 0.1, weight=0.2)
        signal2 = np.concatenate([signal[int((i[0]+0.1)*fs):int((i[1]+0.1)*fs)] for i in s])
        wavfile.write("database/{0}/speechFileSegmented.wav".format(patient_name), fs, signal2)
        s1 = ShortTermFeatures.feature_extraction(signal[:, 0], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8]
        window.refresh()
        s2 = ShortTermFeatures.feature_extraction(signal[:, 1], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8]
        window.refresh()
        filename = filename[:-4] + "1.wav"
        fs, signal = wavfile.read(filename)
        s = audioSegmentation.silence_removal(signal, fs, 0.5, 0.1, weight=0.2)
        signal2 = np.concatenate([signal[int((i[0]+0.1)*fs):int((i[1]+0.1)*fs)] for i in s])
        wavfile.write("database/{0}/speechFileSegmented1.wav".format(patient_name), fs, signal2)
        s3 = ShortTermFeatures.feature_extraction(signal[:, 0], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8]
        window.refresh()
        s4 = ShortTermFeatures.feature_extraction(signal[:, 1], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8]
        window.refresh()
        n = min(s1.shape[0], s2.shape[0], s3.shape[0], s4.shape[0])
        m = min(s1.shape[1], s2.shape[1], s3.shape[1], s4.shape[1])
        return (s1[:n, :m]+s2[:n, :m]+s3[:n, :m]+s4[:n, :m])/4
    else:
        return ShortTermFeatures.feature_extraction(signal, fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8]
Exemple #2
0
def extract(x, sr=16000):
    f_global = []

    # 34D short-term feature
    f = ShortTermFeatures.feature_extraction(x, sr, globalvars.frame_size * sr,
                                             globalvars.step * sr)

    # for pyAudioAnalysis which support python3
    if type(f) is tuple:
        f = f[0]

    # Harmonic ratio and pitch, 2D
    hr_pitch = ShortTermFeatures.speed_feature(x, int(sr),
                                               int(globalvars.frame_size * sr),
                                               int(globalvars.step * sr))
    f = np.append(f, hr_pitch.transpose(), axis=0)

    # Z-normalized
    f = stats.zscore(f, axis=0)

    f = f.transpose()

    f_global.append(f)

    f_global = sequence.pad_sequences(
        f_global,
        maxlen=globalvars.max_len,
        dtype="float32",
        padding="post",
        value=globalvars.masking_value,
    )

    return f_global
Exemple #3
0
def preprocess_audio(data_type):
    files_dir = os.path.join(path, data_type)
    files_name = os.listdir(files_dir)
    mp3_files = filter(lambda file: file.split(".")[-1] == "mp3",
                       files_name)  # filter out files in mp3 format
    mp3_files = list(mp3_files)

    # pdb.set_trace()

    data = dict()
    lens = []
    for file in mp3_files:
        [Fs, x] = audioBasicIO.read_audio_file(os.path.join(files_dir, file))
        try:
            F0, _ = ShortTermFeatures.feature_extraction(
                x[:, 0], Fs, 0.050 * Fs, 0.025 * Fs)
            F1, _ = ShortTermFeatures.feature_extraction(
                x[:, 1], Fs, 0.050 * Fs, 0.025 * Fs)
        except IndexError:
            F0, _ = ShortTermFeatures.feature_extraction(
                x, Fs, 0.050 * Fs, 0.025 * Fs)
            F1 = np.zeros(F0.shape)
        feature = np.concatenate([F0, F1], axis=0)

        seq_len = feature.shape[1]
        lens.append(seq_len)
        if seq_len < 611:  # if seq_len < 611, pad to 611
            new_feature = np.zeros((68, 611))
            new_feature[:, :seq_len] = feature
            feature = new_feature.transpose(0, 1)  # (611, 68)

        utterance_id = file[:-4]
        data[utterance_id] = {'feature': feature, 'seq_len': seq_len}

    return lens, data
def generate_data(output_csv):
    '''
	This function will read in the entire liste of audio files and extract features from them and append to output_csv
	'''
    l1 = []
    for i in range(1, 822, 1):
        print("f", i)
        l = []
        [Fs, x] = audioBasicIO.read_audio_file("f" + str(i) + ".wav")
        F, fm = ShortTermFeatures.feature_extraction(x, Fs, 0.05 * Fs,
                                                     0.025 * Fs)
        for j in range(34):
            l.append(min(F[j]))
            l.append(max(F[j]))
            l.append(mean(F[j]))
            l.append(stdev(F[j]))
        l.append(1)
        l1.append(l)
    for i in range(1, 822, 1):
        print("m", i)
        l = []
        [Fs, x] = audioBasicIO.read_audio_file("m" + str(i) + ".wav")
        F, fm = ShortTermFeatures.feature_extraction(x, Fs, 0.05 * Fs,
                                                     0.025 * Fs)
        for j in range(34):
            l.append(min(F[j]))
            l.append(max(F[j]))
            l.append(mean(F[j]))
            l.append(stdev(F[j]))
        l.append(0)
        l1.append(l)
    with open(output_csv, "w") as f:
        writer = csv.writer(f)
        writer.writerows(l1)
Exemple #5
0
    def _get_batches_of_transformed_samples(self, index_array):
        batch_x = []
        for i, j in enumerate(index_array):
            x = self.x[j]

            # Augmentation
            if self.audio_data_generator.white_noise_:
                x = self.audio_data_generator.white_noise(x)
            if self.audio_data_generator.shift_:
                x = self.audio_data_generator.shift(x)
            if self.audio_data_generator.stretch_:
                x = self.audio_data_generator.stretch(x)

            # 34D short-term feature
            f = ShortTermFeatures.feature_extraction(
                x, self.sr, globalvars.frame_size * self.sr,
                globalvars.step * self.sr)

            # Harmonic ratio and pitch, 2D
            hr_pitch = ShortTermFeatures.speed_feature(
                x, self.sr, globalvars.frame_size * self.sr,
                globalvars.step * self.sr)
            x = np.append(f, hr_pitch.transpose(), axis=0)

            # Z-normalized
            x = stats.zscore(x, axis=0)

            x = x.transpose()

            batch_x.append(x)

        batch_x = sequence.pad_sequences(
            batch_x,
            maxlen=globalvars.max_len,
            dtype="float32",
            padding="post",
            value=globalvars.masking_value,
        )

        batch_u = np.full(
            (
                len(index_array),
                globalvars.nb_attention_param,
            ),
            globalvars.attention_init_value,
            dtype=np.float32,
        )

        if self.y is None:
            return [batch_u, batch_x]
        batch_y = self.y[index_array]

        return [batch_u, batch_x], batch_y
Exemple #6
0
def feature_extraction(INPUTPATH, OUTPATH):
    try:
        [Fs, x] = audioBasicIO.read_audio_file(INPUTPATH)
        try:
            CH = x.shape[1]
        except:
            CH = 1
        
        if CH == 1:
            c1 = ShortTermFeatures.feature_extraction(x[:,], Fs, 0.050*Fs, 0.025*Fs)

            channel1 = {}
            for i in range(0, len(c1[1])):
                channel1[c1[1][i]] = c1[0][i]

            channel1 = pd.DataFrame(channel1)
            channel1.to_json(OUTPATH + "channel1_features.json")
            result = {
                'channel1': json.loads(channel1.to_json())
            }
        
        if CH == 2:
            c1 = ShortTermFeatures.feature_extraction(x[:,0], Fs, 0.050*Fs, 0.025*Fs)
            c2 = ShortTermFeatures.feature_extraction(x[:,1], Fs, 0.050*Fs, 0.025*Fs)

            channel1 = {}
            channel2 = {}
            for i in range(0, len(c1[1])):
                channel1[c1[1][i]] = c1[0][i]
            
            for i in range(0, len(c2[1])):
                channel2[c2[1][i]] = c2[0][i]

            channel1 = pd.DataFrame(channel1)
            channel1.to_json(OUTPATH + "channel1_features.json")

            channel2 = pd.DataFrame(channel2)
            channel2.to_json(OUTPATH + "channel2_features.json")
            result = {
                'channel1': json.loads(channel1.to_json()),
                'channel2': json.loads(channel2.to_json())
            }


        return json.dumps(result)

    except Exception as e:
        return "Error: " + str(e)
Exemple #7
0
def generateFeaturesData(outputData):
    l1 = []
    for i in range(1, 1501, 1):
        print("Rej", i)
        try:
            [Fs, x] = audioBasicIO.read_audio_file("rej_" + str(i) + ".wav")
            F, f_names = ShortTermFeatures.feature_extraction(
                x, Fs, 0.05 * Fs, 0.025 * Fs)
        except:
            continue
        k = 0
        while k < len(F[0]):
            l = []
            for j in range(34):
                l.append(np.percentile(F[j, k:k + 399], 25))
                l.append(np.percentile(F[j, k:k + 399], 50))
                l.append(np.percentile(F[j, k:k + 399], 75))
                l.append(np.percentile(F[j, k:k + 399], 95))

            l.append(len(F[j]) / 399)
            l.append(1)
            l1.append(l)
            k = k + 399
    for i in range(1, 1501, 1):
        print("Acc", i)
        try:
            [Fs, x] = audioBasicIO.read_audio_file("acc_" + str(i) + ".wav")
            F, f_names = ShortTermFeatures.feature_extraction(
                x, Fs, 0.05 * Fs, 0.025 * Fs)
        except:
            continue
        k = 0
        while k < len(F[0]):
            l = []
            for j in range(34):
                l.append(np.percentile(F[j, k:k + 399], 25))
                l.append(np.percentile(F[j, k:k + 399], 50))
                l.append(np.percentile(F[j, k:k + 399], 75))
                l.append(np.percentile(F[j, k:k + 399], 95))

            l.append(len(F[j]) / 399)
            l.append(2)
            l1.append(l)
            k = k + 399

    with open(outputData, "w") as f:
        writer = csv.writer(f)
        writer.writerows(l1)
Exemple #8
0
def zcr_sigenergy(INPUTPATH, OUTPATH):
    try:
        [Fs, x] = audioBasicIO.read_audio_file(INPUTPATH)

        try:
            CH = x.shape[1]
        except:
            CH = 1
        
        if CH == 1:
            F_0, f_names_0 = ShortTermFeatures.feature_extraction(x[:,], Fs, 0.050*Fs, 0.025*Fs)
            fig = plt.figure(figsize=(18, 8), dpi=200)
            ax1 = fig.add_subplot(111)
            ax1.plot(F_0[0,:], label=f_names_0[0])
            ax1.plot(F_0[1,:], label=f_names_0[1])
            ax1.legend()
            # Set common labels
            fig.text(0.5, 0.01, 'Frame no.', ha='center', va='center')
            fig.text(0.004, 0.5, 'Zero Crossing Rate / Signal Energy', ha='center', va='center', rotation='vertical')
            ax1.set_title('Channel 1')
            fig.tight_layout()
            plt.savefig(OUTPATH + 'zcr_energy.png')
            plt.close()
            return "Complete"

        if CH==2:
            F_0, f_names_0 = ShortTermFeatures.feature_extraction(x[:,0], Fs, 0.050*Fs, 0.025*Fs)
            F_1, f_names_1 = ShortTermFeatures.feature_extraction(x[:,1], Fs, 0.050*Fs, 0.025*Fs)
            fig = plt.figure(figsize=(18, 8), dpi=200)
            ax1 = fig.add_subplot(211)
            ax2 = fig.add_subplot(212)
            ax1.plot(F_0[0,:], label=f_names_0[0])
            ax1.plot(F_0[1,:], label=f_names_0[1])
            ax2.plot(F_1[0,:], label=f_names_1[0])
            ax2.plot(F_1[1,:], label=f_names_1[1])
            ax1.legend()
            ax2.legend()
            # Set common labels
            fig.text(0.5, 0.01, 'Frame no.', ha='center', va='center')
            fig.text(0.004, 0.5, 'Zero Crossing Rate / Signal Energy', ha='center', va='center', rotation='vertical')
            ax1.set_title('Channel 1')
            ax2.set_title('Channel 2')
            fig.tight_layout()
            plt.savefig(OUTPATH + 'zcr_energy.png')
            plt.close()
            return "Complete"
    except Exception as e:
        return "Error: " + str(e)
Exemple #9
0
def test_shortTermFeatures(wav_file, plot):
    [fs, data] = audioBasicIO.read_audio_file(wav_file)
    print(f'FS={fs} win={0.050*fs} step={0.025*fs}')
    F, f = STF.feature_extraction_lengthwise(data, fs, 0.050 * fs, 0.025 * fs)

    if plot:
        fig = plt.figure(figsize=(12, 6))
        ax1 = fig.subplots()
        ax2 = ax1.twinx()
        ax3 = ax2.twinx()

        ax1.plot(F[1, :], color='red', label=f[1])
        ax2.plot(F[0, :], color='green', label=f[0])
        ax3.plot(data, color='blue', label='data', alpha=0.5)

        lines1, labels1 = ax1.get_legend_handles_labels()
        lines2, labels2 = ax2.get_legend_handles_labels()
        lines3, labels3 = ax3.get_legend_handles_labels()
        ax3.set_xlabel('time (s)')
        ax3.legend(lines1 + lines2 + lines3,
                   labels1 + labels2 + labels3,
                   loc=0)
        ax1.axis('off')
        ax2.axis('off')
        #fig.savefig('recording1_shortTermFeatures.png', dpi=200)
        plt.show()

    return fig
Exemple #10
0
 def exp1():
     fs, s = aIO.read_audio_file(AfeExp.wav_file)
     #IPython.display.display(IPython.display.Audio(wav_file))
     duration = len(s) / float(fs)
     print(f'duration = {duration} seconds')
     win, step = 0.050, 0.050
     [f, fn] = aSF.feature_extraction(s, fs, int(fs * win), int(fs * step))
     print(f'{f.shape[1]} frames, {f.shape[0]} short-term features')
     print('Feature names:')
     for i, nam in enumerate(fn):
         print(f'{i}:{nam}')
     time = np.arange(0, duration - step, win)
     energy = f[fn.index('energy'), :]
     mylayout = go.Layout(yaxis=dict(title="frame energy value"),
                          xaxis=dict(title="time (sec)"))
     '''
     plotly.offline.iplot(go.Figure(data=[go.Scatter(x=time, 
                                                     y=energy)], 
                                 layout=mylayout))
     '''
     plotly.offline.plot(
         {
             'data': [go.Scatter(x=time, y=energy)],
             'layout': mylayout
         },
         auto_open=True)
Exemple #11
0
def get_spectrogram(path, win, step, disable_caching=True, smooth=True):
    """
    get_spectrogram() is a wrapper to
    pyAudioAnalysis.ShortTermFeatures.spectrogram() with a caching functionality

    :param path: path of the WAV file to analyze
    :param win: short-term window to be used in spectrogram calculation
    :param step: short-term step to be used in spectrogram calculation
    :return: spectrogram matrix, time array, freq array and sampling freq
    """
    fs, s = io.read_audio_file(path)
    cache_name = path + "_{0:.6f}_{1:.6f}.npz".format(win, step)
    if not disable_caching and os.path.isfile(cache_name):
        print("Loading cached spectrogram")
        npzfile = np.load(cache_name)
        spec_val = npzfile["arr_0"]
        spec_time = npzfile["arr_1"]
        spec_freq = npzfile["arr_2"]
    else:
        print("Computing spectrogram")
        spec_val, spec_time, spec_freq = sF.spectrogram(
            s, fs, round(fs * win), round(fs * step), False, True)
        if not disable_caching:
            np.savez(cache_name, spec_val, spec_time, spec_freq)
    #    f, f_n  = sF.feature_extraction(s, fs, win * fs / 1000.0,
    #                                    step * fs / 1000.0, deltas=True)
    if smooth:
        spec_val = ndimage.median_filter(spec_val, (2, 3))

    return spec_val, np.array(spec_time), np.array(spec_freq), fs
Exemple #12
0
def getTXT(file):
    pattern = re.compile(r'([^<>/\\\|:""\*\?]+)\.\w+$')
    fileName = pattern.findall(file)[0]

    # mp4 to wav
    wav_filename = fileName + '.wav'
    AudioSegment.from_file(file).export('store/audioStore/' + wav_filename,
                                        format='wav')

    # wav to txt
    Fs, x = loadAudio('store/audioStore/' + wav_filename)
    print(Fs, x)
    st_features, st_features_name = sF.feature_extraction(x,
                                                          Fs,
                                                          0.050 * Fs,
                                                          0.025 * Fs,
                                                          deltas=False)
    outputFile = open('store/audioEvaluationTxt/' + fileName + '.txt', 'w')
    for col in range(st_features.shape[1]):
        sampleFeature = []
        for row in range(st_features.shape[0]):
            feature = st_features[row][col]
            sampleFeature.append(feature)
        sampleString = str(sampleFeature).replace('[', '').replace(']', '')
        outputFile.write(sampleString + '\n')
    outputFile.close()
    outPath = 'store/audioEvaluationTxt/' + fileName + '.txt'
    return outPath
Exemple #13
0
def fileChromagramWrapper(wav_file):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.read_audio_file(wav_file)
    x = audioBasicIO.stereo_to_mono(x)
    specgram, TimeAxis, FreqAxis = sF.chromagram(x, fs, round(fs * 0.040),
                                                 round(fs * 0.040), True)
def get_features(input_file):
	'''
	Given an input .wav file, this function will return a list of lists corresponding to features of each of its chunks
	reject is 1
	accept is 2;
	we will not append any target label and just use svm_score to get accept(2) or reject(1)
	here there is no need to break into chunks; this was required when time was a priority
	'''
	data, samplerate = sf.read(input_file)
	l1 = []
	[Fs, x] = audioBasicIO.read_audio_file(input_file)
	F,f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.05*Fs, 0.025*Fs)
	k = 0
	l = []
	for j in range(34):
		l.append(np.percentile(F[j, :], 25))
		l.append(np.percentile(F[j, :], 50))
		l.append(np.percentile(F[j, :], 75))
		l.append(np.percentile(F[j, :], 95))
	
	l.append(len(F[j])/399)
	# if fname.startswith("acc"):
	# 	l.append(2)
	# else:
	# 	l.append(1)
	l1.append(l)
	return l1
Exemple #15
0
def ExtractSpec(id):
    [Fs, x] = audioBasicIO.read_audio_file(
        "/Volumes/Macintosh HD - Data/Users/admin/Documents/HD Drive/DataProjects/DepressionData/audio/{}_AUDIO.wav"
        .format(id))
    F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.050 * Fs,
                                                      0.025 * Fs)
    return F
def ExtractSpec(id):
    with ZipFile("/Users/aravind/Downloads/{}_P.zip".format(id), 'r') as zip:
        audio = zip.extract("{}_AUDIO.wav".format(id), 'audio')
        [Fs, x] = audioBasicIO.read_audio_file("audio/{}_AUDIO.wav".format(id))
        F, f_names, time = ShortTermFeatures.spectrogram(
            x, Fs, 0.050 * Fs, 0.025 * Fs)
        return F
def test_feature_extraction_short():
    [fs, x] = audioBasicIO.read_audio_file("test_data/1_sec_wav.wav")
    F, f_names = ShortTermFeatures.feature_extraction(x, fs, 0.050 * fs,
                                                      0.050 * fs)
    assert F.shape[1] == 20, "Wrong number of mid-term windows"
    assert F.shape[0] == len(f_names), "Number of features and feature " \
                                       "names are not the same"
Exemple #18
0
def extract_feature(file_name):
    [Fs, x] = audioBasicIO.read_audio_file(file_name)
    if x.ndim == 2:
        x = x[:, 0]
    F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.025 * Fs,
                                                      0.010 * Fs)
    return F.T
Exemple #19
0
def extract_extract_audioAnalysis(audio_file, chuncksize=1):
    [Fs, x] = audioBasicIO.read_audio_file(audio_file)
    x = audioBasicIO.stereo_to_mono(x)
    overlap = chuncksize * Fs
    F, f_names = ShortTermFeatures.feature_extraction(x, Fs, Fs, overlap)  # takes approx. 2.5 mins to comple

    # return Zero Crossing Rate, Spectral Centroid, Spectral Spread, Spectral Entropy, Spectral Flux, Spectral Rolloff
    return F[0], F[3], F[4], F[5], F[6], F[7]
Exemple #20
0
def beatExtractionWrapper(wav_file, plot):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.read_audio_file(wav_file)
    F, _ = sF.feature_extraction(x, fs, 0.050 * fs, 0.050 * fs)
    bpm, ratio = aF.beat_extraction(F, 0.050, plot)
    print("Beat: {0:d} bpm ".format(int(bpm)))
    print("Ratio: {0:.2f} ".format(ratio))
Exemple #21
0
def extract_dataset(data, nb_samples, dataset, save=True):
    f_global = []

    i = 0
    for (x, Fs) in data:
        # 34D short-term feature
        f = ShortTermFeatures.feature_extraction(x, Fs,
                                                 globalvars.frame_size * Fs,
                                                 globalvars.step * Fs)

        # for pyAudioAnalysis which support python3
        if type(f) is tuple:
            f = f[0]

        # Harmonic ratio and pitch, 2D
        hr_pitch = ShortTermFeatures.speed_feature(x, Fs,
                                                   globalvars.frame_size * Fs,
                                                   globalvars.step * Fs)
        f = np.append(f, hr_pitch.transpose(), axis=0)

        # Z-normalized
        f = stats.zscore(f, axis=0)

        f = f.transpose()

        f_global.append(f)

        sys.stdout.write("\033[F")
        i = i + 1
        print("Extracting features " + str(i) + "/" + str(nb_samples) +
              " from data set...")

    f_global = sequence.pad_sequences(
        f_global,
        maxlen=globalvars.max_len,
        dtype="float32",
        padding="post",
        value=globalvars.masking_value,
    )

    if save:
        print("Saving features to file...")
        pickle.dump(f_global, open(dataset + "_features.p", "wb"))

    return f_global
def pyaudioextraction(path, fs_factor, overlap_factor, stereo=False):
    try:
        [Fs, x] = audioBasicIO.read_audio_file(path)
        if stereo:
            x = audioBasicIO.stereo_to_mono(x)
        F, f_names = ShortTermFeatures.feature_extraction(x, Fs, fs_factor * Fs, overlap_factor * Fs)
        return F.T.flatten()
    except:
        return None
Exemple #23
0
def function(row, column):
    global interval
    interval += 1

    url = df["Episode {}".format(column)][row]

    if url is None:
        return

    if row == 11861:
        return

    mp3 = '{}{}.mp3'.format(row, column)
    wav = '{}{}.wav'.format(row, column)

    r = requests.get(url, allow_redirects=True)
    open(mp3, 'wb').write(r.content)

    # Export mp3 to wav and remove mp3
    sound = AudioSegment.from_mp3(mp3)
    sound.export(wav, format="wav")
    os.remove(mp3)

    # Read wav info and remove it
    [Fs, x] = audioBasicIO.read_audio_file(wav)
    if len(x.shape) == 2:
        x = np.mean(x, axis=1)
    os.remove(wav)

    # Extract features
    print("Start {}{} at {}".format(row, column,
                                    datetime.datetime.now().time()))
    F = 0
    f_names = 0
    if len(x) > 6 * Fs * 60:
        x = x[5 * Fs * 60:6 * Fs * 60]

    F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.050 * Fs,
                                                      0.025 * Fs)

    _var = []
    _mean = []
    for f in F:
        _var.append(f.var())
        _mean.append(f.mean())

    var_list[row - offset] = _var
    mean_list[row - offset] = _mean

    print("End {}{} at {}".format(row, column, datetime.datetime.now().time()))

    if interval % 2 == 0:
        pd.DataFrame(var_list, columns=column_names).to_csv(
            r'./vars{}.csv'.format(offset), index=False, header=True)
        pd.DataFrame(mean_list, columns=column_names).to_csv(
            r'./means{}.csv'.format(offset), index=False, header=True)
def extract_feature(file_name):
    [Fs, x] = audioBasicIO.read_audio_file(file_name)
    if x.ndim == 2:
        x = x[:, 0]
    F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.025 * Fs,
                                                      0.010 * Fs)
    F_mean = F.mean(axis=1)
    F_std = F.mean(axis=1)
    hfs = np.hstack([F_mean, F_std])
    return hfs.T
def AnalyzeData(data):
    F, f_names = ShortTermFeatures.feature_extraction(data,
                                                      RATE,
                                                      SAMPLE_DUR * RATE,
                                                      (SAMPLE_DUR / 2) * RATE,
                                                      deltas=False)
    # F is the data, so save that to the CSV file for audio data
    #for i in range(len(F)):
    #    print(f_names[i],F[i])
    return F, f_names
def preProcess(fileName):

    [Fs, x] = audioBasicIO.read_audio_file(fileName)  #A

    if (len(x.shape) > 1 and x.shape[1] == 2):
        x = np.mean(x, axis=1, keepdims=True)
    else:
        x = x.reshape(x.shape[0], 1)

    F, f_names = ShortTermFeatures.feature_extraction(x[:, 0], Fs, 0.050 * Fs,
                                                      0.025 * Fs)
    return (f_names, F)
Exemple #27
0
def get_spectrogram_buffer(s, fs, win, step, smooth=True):
    """
    get_spectrogram_buffer() same as get_spectrogram() but input is an audio
    buffer, instead of an audio file
    """
    spec_val, spec_time, spec_freq = sF.spectrogram(s, fs, round(fs * win),
                                                    round(fs * step), False,
                                                    True)
    if smooth:
        spec_val = ndimage.median_filter(spec_val, (2, 3))

    return spec_val, np.array(spec_time), np.array(spec_freq), fs
Exemple #28
0
def generate_CompareGraph():

    [Fs, x] = audio.audioBasicIO.read_audio_file(
        "/Users/zhouhan/Downloads/河图 - 风起天阑.mp3")
    # 先合并成单声道
    x = aio.stereo_to_mono(x)
    # F 是n*...的,一行是一个feature
    F, _ = short.feature_extraction(x, Fs, 0.50 * Fs, 0.25 * Fs)
    F = np.transpose(F)

    frame = pd.DataFrame(F)
    frame.head()
    fig = px.line(frame)
    fig.show()
Exemple #29
0
def FindAudioShots(framechange_array, audio_path):
    features = [1]
    [Fs, x] = audioBasicIO.read_audio_file(audio_path)
    x = audioBasicIO.stereo_to_mono(x)
    frame_size = (Fs // 30)
    F, f_names = ShortTermFeatures.feature_extraction(x, Fs, frame_size, frame_size, deltas=False)

    astd = []
    aave = []
    for i in range(len(features)):
        astd.append(np.std(F[features[i],:]))
        aave.append(np.average(F[features[i],:]))

    which_shots = np.zeros(len(F[features[0],:])).flatten()
    # print(which_shots.shape)

    for i in range(len(F[features[0],:])):
        for j in range(len(features)):
            if (abs(F[features[j],:][i]-aave[j]) > astd[j] * 3.5):
                which_shots[i] += F[features[j],:][i]
    
    audioshotchange_list = []

    prev_val = 0.0
    last_start = 0
    for i in range(len(F[1,:])):
        # print(which_shots[i])
        if (prev_val == 0.0 and which_shots[i] > 0.0):
            last_start = i
        if (prev_val > 0.0 and which_shots[i] == 0.0):
            audioshotchange_list.append([last_start, i, which_shots[last_start]])

        prev_val = which_shots[i]
    
    audio_array = np.zeros(len(framechange_array)-1)

    for x in range (0, len(framechange_array)-1):
        first_frame = framechange_array[x]
        last_frame = framechange_array[x+1]
        for y in range(len(audioshotchange_list)):
            if audioshotchange_list[y][0] >= first_frame and audioshotchange_list[y][0] < last_frame:
                audio_array[x] += audioshotchange_list[y][2]
        audio_array[x] /= (last_frame - first_frame)

    audio_array = preprocessing.minmax_scale(audio_array, feature_range=(0, 1))
    audio_array = [round(num, 3) for num in audio_array]
    return(audio_array)
Exemple #30
0
    def get_short_features(self, file_path):
        try:
            Fs, x = self.read_wav(file_path)
        except:
            print('fail to extract short ' + file_path + ',passed.')
            return pd.DataFrame()
        audio_name = file_path.split('/')[-1]
        #emotion=file_path.split('/')[-2]
        #outpath=out_path+emotion+'_'+audio_name[:-4]+'.txt'
        #[Fs, x] = audioBasicIO.read_audio_file(file_path)
        F_s, F_name = ShortTermFeatures.feature_extraction(
            x, Fs, 0.05 * Fs, 0.025 * Fs)
        #F_m,F_s,F_name=self.mid_feature_extraction(x,Fs,1.0*Fs,0.5*Fs,0.05*Fs,0.025*Fs)

        short = pd.DataFrame(F_s.T)
        short['id'] = file_path.split('/')[-1]
        return short