Exemple #1
0
 def spawn(self,
           data,
           overrides: dict = {}):  # just make pydub and PEP8 happy :P
     if isinstance(data, list):
         data = array(get_array_type(self.sample_width * 8), data)
     if isinstance(data, np.ndarray):
         data = Audio.get_flatten_samples(data)
         data = stretch_samples(data, self.sample_width).tolist()
         data = array(get_array_type(self.sample_width * 8), data)
     return self._spawn(data, overrides)
Exemple #2
0
    def createSpectrogram(sceneAudioSegment, freqResolution, timeResolution,
                          windowLength, windowOverlap):
        highestFreq = sceneAudioSegment.frame_rate / 2
        height = highestFreq // freqResolution
        width = sceneAudioSegment.duration_seconds * 1000 // timeResolution

        # Set figure settings to remove all axis
        spectrogram = plt.figure(frameon=False)
        spectrogram.set_size_inches(width / 100, height / 100)
        ax = plt.Axes(spectrogram, [0., 0., 1., 1.])
        ax.set_axis_off()
        spectrogram.add_axes(ax)

        # Generate the spectrogram
        # See https://matplotlib.org/api/_as_gen/matplotlib.pyplot.specgram.html?highlight=matplotlib%20pyplot%20specgram#matplotlib.pyplot.specgram
        Pxx, freqs, bins, im = ax.specgram(
            x=np.frombuffer(sceneAudioSegment._data,
                            dtype=get_array_type(
                                8 * sceneAudioSegment.frame_width)),
            Fs=sceneAudioSegment.frame_rate,
            window=matplotlib.mlab.window_hanning,
            NFFT=windowLength,
            noverlap=windowOverlap,
            scale='dB')

        return spectrogram
    def split_song(self, song):
        mydict = []
        convers = []

        for i in range(3000, len(song) + 3000, 3000):
            # print i
            try:
                splitting = song[i - 3000:i]  # first three seconds
                bit_depth = splitting.sample_width * self.BIT_PRECISION
                # print splitting.frame_rate
                array_type = get_array_type(bit_depth)
                print(len(splitting._data))
                print(array_type)
                numeric_array = array.array(array_type, splitting._data)
                numeric_array = numeric_array.tolist()
                #print(splitting.frame_rate)
                features = self.extract_features2(self.sampling_rate,
                                                  np.asarray(numeric_array))[0]
                features_transformed = (features -
                                        self.mean_train) / self.sd_train
                convers.append(features_transformed)
            except:
                continue
                #if len(convers) == 3:
            #    prediction = self.my_attention_network.predict(np.array([convers]))[0]
            # print prediction
            #    mydict.append({"Anger": prediction[0], "Disgust": prediction[1], "Fear": prediction[3],
            #                   "Happiness": prediction[5], "Neutral": prediction[6], "Sadness": prediction[2],
            #                   "Surprise": prediction[4]})
            #    convers.pop(0)

        return convers
Exemple #4
0
        def split_song_get_emotion(self, song,len_sequence,byte_depth):
            mydict = []
            convers = []
            
            increment = len(song)/len_sequence

            for i in range(increment, len(song)+increment, increment):
                #print i
                splitting = song[i - increment:i]  # first three seconds
                bit_depth = splitting.sample_width * byte_depth
                # print splitting.frame_rate
                array_type = get_array_type(bit_depth)
                numeric_array = array.array(array_type, splitting._data)
                numeric_array = numeric_array.tolist()
                features = self.extract_features3(splitting.frame_rate, np.asarray(numeric_array))[0]
                features_transformed = (features - self.mean_train) / self.sd_train
                convers.append(features_transformed)
                #print len(convers)
                if len(convers) == len_sequence:
                    prediction = self.my_attention_network.predict(np.array([convers]))[0]
                    #print prediction
                    mydict.append({"Anger": prediction[0], "Disgust": prediction[1], "Fear": prediction[3],
                                   "Happiness": prediction[5], "Neutral": prediction[6], "Sadness": prediction[2],
                                   "Surprise": prediction[4]})
                    convers.pop(0)

            data_frame_emotions = pd.DataFrame.from_dict(mydict)
            return data_frame_emotions
Exemple #5
0
        def split_song_get_features(self, song):
          
            mydict = []
            convers = []
           
            #increment = 3000
            #if len(song)< 9000:
            increment = int(float(len(song))/3)

            for i in range(increment,len(song)+increment, increment):
                # print i
                splitting = song[i-increment:i]  # first incremement seconds
                bit_depth = splitting.sample_width * 8
                
                # print splitting.frame_rate
                array_type = get_array_type(bit_depth)
              
                
                numeric_array = array.array(array_type, splitting._data)
                numeric_array = numeric_array.tolist()
                features = self.extract_features2(splitting.frame_rate, np.asarray(numeric_array))[0]
                features_transformed = (features - self.mean_train) / self.sd_train
                convers.append(features_transformed)

            return convers
Exemple #6
0
def frequency_spectrum(sample, max_frequency=800):
    # Convert pydub.AudioSample to raw audio data
    bit_depth = sample.sample_width * 8
    array_type = get_array_type(bit_depth)
    raw_audio_data = array.array(array_type, sample._data)
    n = len(raw_audio_data)

    # Compute FFT and frequency value for each index in FFT array
    freq_array = np.arange(n) * (float(sample.frame_rate) / n
                                 )  # two sides frequency range
    freq_array = freq_array[:(n // 2)]  # one side frequency range

    raw_audio_data = raw_audio_data - np.average(
        raw_audio_data)  # zero-centering
    freq_magnitude = np.fft.fft(
        raw_audio_data)  # fft computing and normalization
    freq_magnitude = freq_magnitude[:(n // 2)]  # one side

    if max_frequency:
        max_index = int(max_frequency * n / sample.frame_rate) + 1
        freq_array = freq_array[:max_index]
        freq_magnitude = freq_magnitude[:max_index]

    freq_magnitude = abs(freq_magnitude)
    freq_magnitude = freq_magnitude / np.sum(freq_magnitude)
    return freq_array, freq_magnitude
Exemple #7
0
def frequency_spectrum(sample, max_frequency=800):
    """
    Derive frequency spectrum of a signal pydub.AudioSample
    Returns an array of frequencies and an array of how prevelant that frequency is in the sample
    """
    # Convert pydub.AudioSample to raw audio data
    # Copied from Jiaaro's answer on https://stackoverflow.com/questions/32373996/pydub-raw-audio-data
    bit_depth = sample.sample_width * 8
    array_type = get_array_type(bit_depth)
    raw_audio_data = array.array(array_type, sample._data)
    n = len(raw_audio_data)

    # Compute FFT and frequency value for each index in FFT array
    # Inspired by Reveille's answer on https://stackoverflow.com/questions/53308674/audio-frequencies-in-python
    freq_array = np.arange(n) * (float(sample.frame_rate) / n
                                 )  # two sides frequency range
    freq_array = freq_array[:(n // 2)]  # one side frequency range

    raw_audio_data = raw_audio_data - np.average(
        raw_audio_data)  # zero-centering
    freq_magnitude = scipy.fft.fft(
        raw_audio_data)  # fft computing and normalization
    freq_magnitude = freq_magnitude[:(n // 2)]  # one side

    if max_frequency:
        max_index = int(max_frequency * n / sample.frame_rate) + 1
        freq_array = freq_array[:max_index]
        freq_magnitude = freq_magnitude[:max_index]

    freq_magnitude = abs(freq_magnitude)
    freq_magnitude = freq_magnitude / np.sum(freq_magnitude)
    return freq_array, freq_magnitude
Exemple #8
0
 def configure(self, sample_rate, buffer_size):
     self.mono = self.sound.split_to_mono()[0]
     self.sample_rate = sample_rate
     self.mono = self.mono.set_frame_rate(sample_rate)
     self.buffer_size = buffer_size
     bit_depth = self.mono.sample_width * 8
     array_type = get_array_type(bit_depth)
     self.numeric_array = np.array(array.array(array_type, self.mono._data))
Exemple #9
0
def getMixedChannels(sound):
    # Combines two channels of a loaded song into a single array
    (left, right) = (sound.split_to_mono()[0], sound.split_to_mono()[1])
    bit_depth = left.sample_width * 8
    array_type = get_array_type(bit_depth)
    (signalL, signalR) = (array.array(array_type, left._data),
                          array.array(array_type, right._data))
    mix = [signalL[i] + signalR[i] for i in range(len(signalL))]
    return mix
Exemple #10
0
    def __init__(self, file):
        self.filename = os.path.splitext(os.path.basename(file))[0]
        sound = AudioSegment.from_file(
            file=file, format=file.split('.')[1]).set_channels(1)
        self.sound_raw = np.frombuffer(
            sound._data,
            dtype=get_array_type(sound.sample_width * 8)).astype(np.float64,
                                                                 copy=False)
        self.sound_raw.setflags(write=1)

        self.raw_length = len(self.sound_raw)
        self.raw_increment = int(
            MS_INCREMENT *
            (len(self.sound_raw) / sound.duration_seconds / 1000))
        self.sample_rate = sound.frame_rate
        self.mpm = Mpm()
def generate_random_noise(duration, gain, frame_width, sample_rate):
    bit_depth = 8 * frame_width
    minval, maxval = get_min_max_value(bit_depth)
    sample_width = get_frame_width(bit_depth)
    array_type = get_array_type(bit_depth)

    gain = db_to_float(gain)
    sample_count = int(sample_rate * (duration / 1000.0))

    data = ((np.random.rand(sample_count, 1) * 2) - 1.0) * maxval * gain

    return AudioSegment(data=data.astype(array_type).tobytes(),
                        metadata={
                            "channels": 1,
                            "sample_width": sample_width,
                            "frame_rate": sample_rate,
                            "frame_width": sample_width,
                        })
Exemple #12
0
    def split_song2(self,song,padding_length):
        bit_depth = song.sample_width * self.BIT_PRECISION
        array_type = get_array_type(bit_depth)
        numeric_array = array.array(array_type, song._data)
        numeric_array = numeric_array.tolist()
        features = self.extract_features3(song.frame_rate, np.asarray(numeric_array))
        #print("$$")
        #print(len(features))

        while len(features)<padding_length:
            features=np.append(features,(np.zeros((1,34)))) #padding

        if len(features)>padding_length:
            features= features[len(features)-padding_length:len(features)]
        #print(np.shape(features))

        print(len(features))
        #print("$$")
        return features
Exemple #13
0
def mp3preprocess(path):
    try:
        print(path)
        sound = AudioSegment.from_file(file=path)
        mono = sound.split_to_mono()[0] # TODO maybe concat both sides to have more mono data
        mono = mono.set_frame_rate(SAMPLE_RATE)
        bit_depth = mono.sample_width * 8
        array_type = get_array_type(bit_depth)
        numeric_array = np.array(array.array(array_type, mono._data))
        numeric_array = numeric_array[:-(len(numeric_array)%NSAMPLES)]
        frames = np.array_split(numeric_array, len(numeric_array)/NSAMPLES)
        frames = map(partial(np.fft.fft, norm="ortho"), frames)
        frames = [f[:NSAMPLES//2 +1] for f in frames]
        frames = map(np.absolute, frames)
        frames = [f/NSAMPLES for f in frames]
        return np.array(list(frames))
    except:
        print("Error "+path)
        return None
def create_folder_raw_array(folder):
    folder_files = listdir(os.path.join(CUT_DIR, folder))
    folder_files = natural_sort(folder_files)

    create_directory(os.path.join(UNFILTERED_PATH))

    array_file = h5py.File(os.path.join(UNFILTERED_PATH, folder + '.hdf5'),
                           'w')

    number_of_images = len(folder_files)
    all_data = []
    for i, file in enumerate(folder_files):

        # print progress
        print('\rFolder: %s %d/%d\r' % (folder, i, number_of_images))

        # read in a wav file
        data = AudioSegment.from_file(os.path.join(CUT_DIR, folder, file),
                                      format='wav')

        bit_depth = data.sample_width * 8
        array_type = get_array_type(bit_depth)

        numeric_array = array.array(array_type, data._data)
        all_data.append(numeric_array)

    amplitudes = array_file.create_dataset('waveform',
                                           data=all_data,
                                           dtype='i')

    folder_index = get_folder_class_index(CUT_DIR, folder)
    if is_random_forest:
        # PY3 unicode
        dt = h5py.special_dtype(vlen=str)
        array_file.create_dataset('labels',
                                  data=np.transpose([folder.encode('utf8')] *
                                                    len(all_data)),
                                  dtype=dt)
    else:
        array_file.create_dataset('labels',
                                  data=np.transpose([folder_index] *
                                                    len(all_data)),
                                  dtype='i')
    def split_single_song(self, song, splits):

        mydict = []
        convers = []

        #increment = 3000
        #if len(song)< 9000:
        increment = int(float(len(song)) / splits)

        for i in range(increment, len(song) + increment, increment):
            # print i
            splitting = song[i - increment:i]  # first incremement seconds
            bit_depth = splitting.sample_width * 8

            # print splitting.frame_rate
            array_type = get_array_type(bit_depth)

            numeric_array = array.array(array_type, splitting._data)
            numeric_array = numeric_array.tolist()
            features = self.extract_features2(splitting.frame_rate,
                                              np.asarray(numeric_array))[0]
            features_transformed = (features - self.mean_train) / self.sd_train
            convers.append(features_transformed)

            if len(convers) == 3:

                prediction = self.my_attention_network.predict(
                    np.array([convers]))[0]

                mydict.append({
                    "Anger": prediction[0],
                    "Disgust": prediction[1],
                    "Fear": prediction[3],
                    "Happiness": prediction[5],
                    "Neutral": prediction[6],
                    "Sadness": prediction[2],
                    "Surprise": prediction[4]
                })
                #convers.pop(0)

        data_frame_emotions = pd.DataFrame.from_dict(mydict)
        return data_frame_emotions
Exemple #16
0
    def load_sample(file_path, frame_rate_output_Hz=None):
        """
        :param file_path:              Full path to the audio file.
        :param frame_rate_output_Hz:   Change the frame rate of the audio. Keep original if None.
        :return:                       Normalised raw waveform [-1, 1].
        """
        audio_seg = AudioSegment.from_file(file_path)
        bit_depth = audio_seg.sample_width * 8
        array_type = get_array_type(bit_depth)

        if frame_rate_output_Hz is not None and frame_rate_output_Hz != audio_seg.frame_rate:
            audio_seg = audio_seg.set_frame_rate(frame_rate_output_Hz)

        raw = np.array(array.array(array_type, audio_seg.raw_data),
                       dtype=np.float64)
        raw /= math.pow(
            2, bit_depth
        ) / 2  # Divide through maximum possible positive or negative number.

        return raw
Exemple #17
0
    def __init__(self,
                 path,
                 pitch_detector=None,
                 plotter=None,
                 ms_increment=100):
        self.filename = os.path.splitext(os.path.basename(path))[0]
        sound = AudioSegment.from_file(file=path, format="wav").set_channels(1)
        self.sound_raw = numpy.frombuffer(
            sound._data,
            dtype=get_array_type(sound.sample_width * 8)).astype(numpy.float64,
                                                                 copy=False)
        self.sound_raw.setflags(write=1)

        self.raw_length = len(self.sound_raw)
        self.ms_increment = ms_increment
        self.raw_increment = int(
            self.ms_increment *
            (len(self.sound_raw) / sound.duration_seconds / 1000))
        self.sample_rate = sound.frame_rate
        self.pitch_detector = pitch_detector
        self.plotter = plotter
Exemple #18
0
import array
from pydub import AudioSegment
from pydub.utils import get_array_type

sound = AudioSegment.from_file(file="a.mp3")
left = sound.split_to_mono()[0]

bit_depth = left.sample_width * 8
array_type = get_array_type(bit_depth)

numeric_array = array.array(array_type, left._data)
def getNumbericArray(sound_clip):
    bit_depth = sound_clip.sample_width * 8
    array_type = get_array_type(bit_depth)

    numeric_array = array.array(array_type, sound_clip._data)
Exemple #20
0
def cleanPredictions(array_type, pred):
    if array_type == 'h':
        for i, x in enumerate(pred):
            if x < -32768:
                pred[i] = -32768
            elif (x > 32767):
                pred[i] = 32767


#encoder matters!!
#load raw audio data
audio_X = AudioSegment.from_mp3("lq_spaceoddity.mp3")
audio_y = AudioSegment.from_mp3("hq_spaceoddity.mp3")
audio_Z = AudioSegment.from_mp3("lq_cc.mp3")

array_type = utils.get_array_type(audio_X.sample_width * 8)
#get sample array turn into numpy array
X = np.array(audio_X.get_array_of_samples())
y = np.array(audio_y.get_array_of_samples())
#make 2d
X = np.reshape(X, (-1, 1))
y = np.reshape(y, (-1, 1))

#split the data 70/30
split = np.round(X.size * .6).astype(int)

# we need to split so that we know how to reassemble
X_train = X[0:split]
X_test = X[split:]
y_train = y[0:split]
y_test = y[split:]
#!/usr/bin/env python3

import numpy
from pydub import AudioSegment
from pydub.utils import get_array_type
import sys


if __name__ == '__main__':
    try:
        inputfile = sys.argv[1]
        outputfile = sys.argv[2]
    except IndexError:
        print('usage: get_raw_audio.py infile.ext outfile.txt', file=sys.stderr)
        sys.exit(1)

    sound = AudioSegment.from_file(
            file=inputfile,
            format=inputfile.split('.')[-1]).set_channels(1)

    sound_raw = numpy.frombuffer(sound._data,
            dtype=get_array_type(sound.sample_width * 8))

    print('sample rate: {0}'.format(sound.frame_rate), file=sys.stderr)
    
    with open(outputfile, 'w') as fwrite:
        for sample in sound_raw:
            fwrite.write('{0}\n'.format(sample))
Exemple #22
0
    def _split_recording(self, segments: pd.DataFrame) -> list:

        #from raw sound data and sampling rate, build the spectrogram as a matplotlib figure and return it
        def _create_spectrogram(data, sr):
            snd = Sound(data, sampling_frequency=sr)
            # this parameters were chosen to output a spectrogram useful for zooniverse applications (short sounds from babies) we did not feel the need to have flexibility on them
            spectrogram = snd.to_spectrogram(
                window_length=0.0075,
                maximum_frequency=8000,
                time_step=0.0001,
                frequency_step=0.1,
                window_shape=SpectralAnalysisWindowShape.GAUSSIAN)

            fig = plt.figure(
                figsize=(12, 6.75)
            )  #size of the image, we chose 1200x675 pixels for a better display on zooniverse
            gs = fig.add_gridspec(2, hspace=0, height_ratios=[
                1, 3
            ])  #2 plots (spectrogram 3x bigger than oscillogram)
            axs = gs.subplots(sharex=True)

            #scpectrogram plot
            dynamic_range = 65
            X, Y = spectrogram.x_grid(), spectrogram.y_grid()
            sg_db = 10 * log10(spectrogram.values)
            axs[1].pcolormesh(X,
                              Y,
                              sg_db,
                              vmin=sg_db.max() - dynamic_range,
                              cmap='Greys')
            axs[1].set_ylim([spectrogram.ymin, spectrogram.ymax])
            axs[1].set_xlabel("time [s]")
            axs[1].set_ylabel("frequency [Hz]")
            axs[1].tick_params(labelright=True)
            axs[1].set_xlim([snd.xmin, snd.xmax])

            #oscillogram plot
            axs[0].plot(snd.xs(), snd.values.T, linewidth=0.5)
            axs[0].set_xlim([snd.xmin, snd.xmax])
            axs[0].set_ylabel("amplitude")

            #remove overlapping labels
            ticks = axs[0].yaxis.get_major_ticks()
            if len(ticks): ticks[0].label1.set_visible(False)
            if len(ticks) > 1: ticks[1].label1.set_visible(False)

            fig.tight_layout()

            return fig

        segments = segments.to_dict(orient="records")
        chunks = []

        recording = segments[0]["recording_filename"]
        source = self.project.get_recording_path(recording, self.profile)

        audio = AudioSegment.from_file(source)

        print("extracting chunks from {}...".format(source))

        for segment in segments:
            original_onset = int(segment["segment_onset"])
            original_offset = int(segment["segment_offset"])
            onset = original_onset
            offset = original_offset

            if self.chunks_length > 0:
                onset, offset = pad_interval(onset, offset, self.chunks_length,
                                             self.chunks_min_amount)

                if onset < 0:
                    print("skipping chunk with negative onset ({})".format(
                        onset))
                    continue

                intervals = [(a, a + self.chunks_length)
                             for a in range(onset, offset, self.chunks_length)]
            else:
                intervals = [(onset, offset)]

            for (onset, offset) in intervals:
                chunk = Chunk(
                    segment["recording_filename"],
                    onset,
                    offset,
                    original_onset,
                    original_offset,
                )
                chunk_audio = audio[chunk.onset:chunk.offset].fade_in(
                    10).fade_out(10)

                wav = os.path.join(self.destination, "chunks",
                                   chunk.getbasename("wav"))
                mp3 = os.path.join(self.destination, "chunks",
                                   chunk.getbasename("mp3"))

                if os.path.exists(wav) and os.path.getsize(wav) > 0:
                    print(
                        "{} already exists, exportation skipped.".format(wav))
                else:
                    chunk_audio.export(wav, format="wav")

                if os.path.exists(mp3) and os.path.getsize(mp3) > 0:
                    print(
                        "{} already exists, exportation skipped.".format(mp3))
                else:
                    chunk_audio.export(mp3, format="mp3")

                if self.spectro:
                    png = os.path.join(self.destination, "chunks",
                                       chunk.getbasename("png"))

                    #convert pydub sound data into raw data that the parselmouth library can use
                    bit_depth = chunk_audio.sample_width * 8
                    array_type = get_array_type(bit_depth)

                    sound = array.array(array_type, chunk_audio._data)
                    sr = chunk_audio.frame_rate
                    fig = _create_spectrogram(sound,
                                              sr)  #create the plot figure

                    if os.path.exists(png) and os.path.getsize(png) > 0:
                        print("{} already exists, exportation skipped.".format(
                            png))
                    else:
                        fig.savefig(png)
                    plt.close(fig)

                chunks.append(chunk)

        return chunks
Exemple #23
0
def getAudioData(name):
    '''
    sound._data is a bytestring. I'm not sure what input Mpm expects, but you may need to convert the bytestring to an array like so:
    '''
    sound = AudioSegment.from_mp3(retrieveBeat[name])

    bytes_per_sample = sound.sample_width  #1 means 8 bit, 2 meaans 16 bit
    print("BYTES PER SAMPLE: ")
    print(bytes_per_sample)

    bit_depth = sound.sample_width * 8

    frame_rate = sound.frame_rate
    print("FRAME RATE IS: " + str(frame_rate))

    number_of_frames_in_sound = sound.frame_count()
    number_of_frames_in_sound_200ms = sound.frame_count(ms=200)

    print("NUMBER OF FRAMES IS " + str(number_of_frames_in_sound))
    print("NUMBER OF FRAMES IN SOUND PER 200 MS: " +
          str(number_of_frames_in_sound_200ms))

    array_type = get_array_type(bit_depth)
    print(array_type)
    numeric_array = array.array(array_type, sound.raw_data)
    channel_count = sound.channels
    print("Number of channels in the audio is: ")
    print(channel_count)

    #audio get array of samples

    samples = sound.get_array_of_samples()
    print("SAMPLES ARE")
    print(len(samples))

    left_sound, right_sound = sound.split_to_mono()  #Split it
    print("FRAMES IN LEFT SOUND " + str(left_sound.frame_count()))
    print("FRAMES IN Right SOUND " + str(right_sound.frame_count()))

    print("LEngth of sample left: " +
          str(len(left_sound.get_array_of_samples())))
    print("LEngth of sample right: " +
          str(len(left_sound.get_array_of_samples())))

    #number_of_frames_in_sound_for_every_20s = sound.frame_count(ms=20000)
    #print("length of song is: " + str(len(samples)/number_of_frames_in_sound_for_every_20s * 20) + " seconds")
    '''
    COLLECTED DATA:
    BYTES PER SAMPLE: 
    2
    FRAME RATE IS: 48000
    NUMBER OF FRAMES IS 7688495.0
    NUMBER OF FRAMES IN SOUND PER 200 MS: 9600.0
    h
    Number of channels in the audio is: 
    2
    SAMPLES ARE
    15376990
    FRAMES IN LEFT SOUND 7688495.0
    FRAMES IN Right SOUND 7688495.0
    LEngth of sample left: 7688495
    LEngth of sample right: 7688495
    15376990
    '''

    counter = 0
    for i in range(0, len(samples) - 1):
        if (samples[i] + counter < 10000):
            samples[i] += counter
        if (counter < 500):
            counter += 2
        else:
            counter = 0

        #print(samples[i])
    #  if(i % 2 == 0):
    #     samples[i] = samples[len(samples) - i] #int(samples[i]/2)
    # else:
    #    samples[i] = samples[len(samples) - i] #int(samples[i] - 0.7*samples[i])
    #samples[i] = 10000    #This mutes the sound
    #samples[i+1] = 500

    new_sound = sound._spawn(samples)
    new_sound.export("aaay", format='mp3')
    '''
    note that when using numpy or scipy you will need to convert back to an array before you spawn:

    import array
    import numpy as np
    from pydub import AudioSegment

    sound = AudioSegment.from_file(“sound1.wav”)
    samples = sound.get_array_of_samples()

    shifted_samples = np.right_shift(samples, 1)

    # now you have to convert back to an array.array
    shifted_samples_array = array.array(sound.array_type, shifted_samples)

    new_sound = sound._spawn(shifted_samples_array)
    '''

    return numeric_array
Exemple #24
0
def get_array_from_pydub_obj(pydub_obj):
    bit_depth = pydub_obj.sample_width * 8      # 带宽  eg 2*8
    array_type = get_array_type(bit_depth)
    numeric_array = array.array(array_type, pydub_obj._data)
    return numeric_array
Exemple #25
0
# decoding
decoded_bytes = dft_decode(
    DFT_marked_with_audio, {
        'type': 'DFT',
        'key': {
            'random_key': DFT_marked_with_audio.key['key']['random_key'],
            'original_audio': sound
        }
    })
mark_sample_width = DFT_marked_with_audio.key['key']['metadata'][
    'sample_width']
mark_frame_rate = DFT_marked_with_audio.key['key']['metadata']['frame_rate']
mark_channels = DFT_marked_with_audio.key['key']['metadata']['channels']
mark_tags = mark.tags
decoded_samples = array(get_array_type(mark_sample_width * 8),
                        decoded_bytes)  # should be packaged into Audio
audiowrite(decoded_samples, DFT_marked_with_audio.key, mark_frame_rate,
           mark_sample_width, mark_channels, "./test/DFT_AUDIO.json",
           "./test/extracted.flac", "flac", mark_tags)

# calculate BER
from adwtmk.utilities import get_all_bits

total_bits = get_all_bits(decoded_bytes)

total_bits_len = len(total_bits)

BER = np.sum(
    np.array(
        np.array(total_bits) -
Exemple #26
0
#!/usr/bin/env python3

import numpy
from pydub import AudioSegment
from pydub.utils import get_array_type
import sys

if __name__ == '__main__':
    try:
        inputfile = sys.argv[1]
        outputfile = sys.argv[2]
    except IndexError:
        print('usage: get_raw_audio.py infile.ext outfile.txt',
              file=sys.stderr)
        sys.exit(1)

    sound = AudioSegment.from_file(
        file=inputfile, format=inputfile.split('.')[-1]).set_channels(1)

    sound_raw = numpy.frombuffer(sound._data,
                                 dtype=get_array_type(sound.sample_width * 8))

    print('sample rate: {0}'.format(sound.frame_rate), file=sys.stderr)

    with open(outputfile, 'w') as fwrite:
        for sample in sound_raw:
            fwrite.write('{0}\n'.format(sample))