def harmoniccomplex(f0, duration, amplitude=1, phase=0, samplerate=None, nchannels=1): ''' Returns a harmonic complex composed of pure tones at integer multiples of the fundamental frequency ``f0``. The ``amplitude`` and ``phase`` keywords can be set to either a single value or an array of values. In the former case the value is set for all harmonics, and harmonics up to the sampling frequency are generated. In the latter each harmonic parameter is set separately, and the number of harmonics generated corresponds to the length of the array. ''' samplerate=get_samplerate(samplerate) phases = np.array(phase).flatten() amplitudes = np.array(amplitude).flatten() if len(phases)>1 or len(amplitudes)>1: if (len(phases)>1 and len(amplitudes)>1) and (len(phases) != len(amplitudes)): raise ValueError('Please specify the same number of phases and amplitudes') Nharmonics = max(len(phases),len(amplitudes)) else: Nharmonics = int(np.floor( samplerate/(2*f0) ) ) if len(phases) == 1: phases = np.tile(phase, Nharmonics) if len(amplitudes) == 1: amplitudes = np.tile(amplitude, Nharmonics) x = amplitudes[0]*tone(f0, duration, phase = phases[0], samplerate = samplerate, nchannels = nchannels) for i in range(1,Nharmonics): x += amplitudes[i]*tone((i+1)*f0, duration, phase = phases[i], samplerate = samplerate, nchannels = nchannels) return Sound(x,samplerate)
def tone(frequency, duration, phase=0, samplerate=None, nchannels=1): ''' Returns a pure tone at frequency for duration, using the default samplerate or the given one. The ``frequency`` and ``phase`` parameters can be single values, in which case multiple channels can be specified with the ``nchannels`` argument, or they can be sequences (lists/tuples/arrays) in which case there is one frequency or phase for each channel. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) frequency = array(frequency) phase = array(phase) if frequency.size>nchannels and nchannels==1: nchannels = frequency.size if phase.size>nchannels and nchannels==1: nchannels = phase.size if frequency.size==nchannels: frequency.shape = (1, nchannels) if phase.size==nchannels: phase.shape =(nchannels, 1) t = arange(0, duration, 1)/samplerate t.shape = (t.size, 1) # ensures C-order (in contrast to tile(...).T ) x = sin(phase + 2.0 * pi * frequency * tile(t, (1, nchannels))) return Sound(x, samplerate)
def silence(duration, samplerate=None, nchannels=1): ''' Returns a silent, zero sound for the given duration. Set nchannels to set the number of channels. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) x=numpy.zeros((duration,nchannels)) return Sound(x, samplerate)
def powerlawnoise(duration, alpha, samplerate=None, nchannels=1,normalise=False): ''' Returns a power-law noise for the given duration. Spectral density per unit of bandwidth scales as 1/(f**alpha). Sample usage:: noise = powerlawnoise(200*ms, 1, samplerate=44100*Hz) Arguments: ``duration`` Duration of the desired output. ``alpha`` Power law exponent. ``samplerate`` Desired output samplerate ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) # Adapted from http://www.eng.ox.ac.uk/samp/software/powernoise/powernoise.m # Little MA et al. (2007), "Exploiting nonlinear recurrence and fractal # scaling properties for voice disorder detection", Biomed Eng Online, 6:23 n=duration n2=floor(n/2) f=array(fftfreq(n,d=1.0/samplerate), dtype=complex) f.shape=(len(f),1) f=tile(f,(1,nchannels)) if n%2==1: z=(randn(n2,nchannels)+1j*randn(n2,nchannels)) a2=1.0/( f[1:(n2+1),:]**(alpha/2.0)) else: z=(randn(n2-1,nchannels)+1j*randn(n2-1,nchannels)) a2=1.0/(f[1:n2,:]**(alpha/2.0)) a2*=z if n%2==1: d=vstack((ones((1,nchannels)),a2, flipud(conj(a2)))) else: d=vstack((ones((1,nchannels)),a2, 1.0/( abs(f[n2])**(alpha/2.0) )* randn(1,nchannels), flipud(conj(a2)))) x=real(ifft(d.flatten())) x.shape=(n,nchannels) if normalise: for i in range(nchannels): #x[:,i]=normalise_rms(x[:,i]) x[:,i] = ((x[:,i] - amin(x[:,i]))/(amax(x[:,i]) - amin(x[:,i])) - 0.5) * 2; return Sound(x,samplerate)
def whitenoise(duration, samplerate=None, nchannels=1): ''' Returns a white noise. If the samplerate is not specified, the global default value will be used. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) x = randn(duration,nchannels) return Sound(x, samplerate)
def __new__(cls, data, samplerate=None, duration=None): if isinstance(data, numpy.ndarray): samplerate = get_samplerate(samplerate) # if samplerate is None: # raise ValueError('Must specify samplerate to initialise Sound with array.') if duration is not None: raise ValueError('Cannot specify duration when initialising Sound with array.') x = array(data, dtype=float) elif isinstance(data, str): if duration is not None: raise ValueError('Cannot specify duration when initialising Sound from file.') if samplerate is not None: raise ValueError('Cannot specify samplerate when initialising Sound from a file.') x = Sound.load(data) samplerate = x.samplerate elif callable(data): samplerate = get_samplerate(samplerate) # if samplerate is None: # raise ValueError('Must specify samplerate to initialise Sound with function.') if duration is None: raise ValueError('Must specify duration to initialise Sound with function.') L = int(rint(duration * samplerate)) t = arange(L, dtype=float) / samplerate x = data(t) elif isinstance(data, (list, tuple)): kwds = {} if samplerate is not None: kwds['samplerate'] = samplerate if duration is not None: kwds['duration'] = duration channels = tuple(Sound(c, **kwds) for c in data) x = hstack(channels) samplerate = channels[0].samplerate else: raise TypeError('Cannot initialise Sound with data of class ' + str(data.__class__)) if len(x.shape)==1: x.shape = (len(x), 1) x = x.view(cls) x.samplerate = samplerate x.buffer_init() return x
def click(duration, peak=None, samplerate=None, nchannels=1): ''' Returns a click of the given duration. If ``peak`` is not specified, the amplitude will be 1, otherwise ``peak`` refers to the peak dB SPL of the click, according to the formula ``28e-6*10**(peak/20.)``. ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration,samplerate) if peak is not None: if not isinstance(peak, dB_type): raise dB_error('Peak must be given in dB') amplitude = 28e-6*10**(float(peak)/20.) else: amplitude = 1 x = amplitude*ones((duration,nchannels)) return Sound(x, samplerate)
def irno(delay, gain, niter, duration, samplerate=None, nchannels=1): ''' Returns an IRN_O noise. The iterated ripple noise is obtained many attenuated and delayed version of the original broadband noise. For more details: see Yost 1996 or chapter 15 in Hartman Sound Signal Sensation. ''' samplerate = get_samplerate(samplerate) noise=Sound.whitenoise(duration) splrate=noise.samplerate x=array(noise.T)[0] IRNadd=np.fft.fft(x) Nspl,spl_dur=len(IRNadd),float(1.0/splrate) w=2*pi*fftfreq(Nspl,spl_dur) d=float(delay) for k in range(1,niter+1): IRNadd+=(gain**k)*IRNadd*exp(-1j*w*k*d) IRNadd = np.fft.ifft(IRNadd) x=real(IRNadd) return Sound(x, samplerate)
def irns(delay, gain, niter, duration, samplerate=None, nchannels=1): ''' Returns an IRN_S noise. The iterated ripple noise is obtained trough a cascade of gain and delay filtering. For more details: see Yost 1996 or chapter 15 in Hartman Sound Signal Sensation. ''' if nchannels!=1: raise ValueError("nchannels!=1 not supported.") samplerate = get_samplerate(samplerate) noise=Sound.whitenoise(duration) splrate=noise.samplerate x=array(noise.T)[0] IRNfft=np.fft.fft(x) Nspl,spl_dur=len(IRNfft),float(1.0/splrate) w=2*pi*fftfreq(Nspl,spl_dur) d=float(delay) for k in range(1,niter+1): nchoosek=factorial(niter)/(factorial(niter-k)*factorial(k)) IRNfft+=nchoosek*(gain**k)*IRNfft*exp(-1j*w*k*d) IRNadd = np.fft.ifft(IRNfft) x=real(IRNadd) return Sound(x,samplerate)
def vowel(vowel=None, formants=None, pitch=100*Hz, duration=1*second, samplerate=None, nchannels=1): ''' Returns an artifically created spoken vowel sound (following the source-filter model of speech production) with a given ``pitch``. The vowel can be specified by either providing ``vowel`` as a string ('a', 'i' or 'u') or by setting ``formants`` to a sequence of formant frequencies. The returned sound is normalized to a maximum amplitude of 1. The implementation is based on the MakeVowel function written by Richard O. Duda, part of the Auditory Toolbox for Matlab by Malcolm Slaney: http://cobweb.ecn.purdue.edu/~malcolm/interval/1998-010/ ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration, samplerate) if not (vowel or formants): raise ValueError('Need either a vowel or a list of formants') elif (vowel and formants): raise ValueError('Cannot use both vowel and formants') if vowel: if vowel == 'a' or vowel == '/a/': formants = (730.0*Hz, 1090.0*Hz, 2440.0*Hz) elif vowel == 'i' or vowel == '/i/': formants = (270.0*Hz, 2290.0*Hz, 3010.0*Hz) elif vowel == 'u' or vowel == '/u/': formants = (300.0*Hz, 870.0*Hz, 2240.0*Hz) else: raise ValueError('Unknown vowel: "%s"' % (vowel)) points = np.arange(0, duration - 1, samplerate / pitch) indices = np.floor(points).astype(int) y = np.zeros(duration) y[indices] = (indices + 1) - points y[indices + 1] = points - indices # model the sound source (periodic glottal excitation) a = np.exp(-250.*Hz * 2 * np.pi / samplerate) y = lfilter([1],[1, 0, -a * a], y.copy()) # model the filtering by the vocal tract bandwidth = 50.*Hz for f in formants: cft = f / samplerate q = f / bandwidth rho = np.exp(-np.pi * cft / q) theta = 2 * np.pi * cft * np.sqrt(1 - 1/(4.0 * q * q)) a2 = -2 * rho * np.cos(theta) a3 = rho * rho y = lfilter([1 + a2 + a3], [1, a2, a3], y.copy()) #normalize sound data = y / np.max(np.abs(y), axis=0) data.shape = (data.size, 1) return Sound(np.tile(data, (nchannels, 1)), samplerate=samplerate)
def powerlawnoise(duration, alpha, samplerate=None, nchannels=1, normalise=False): ''' Returns a power-law noise for the given duration. Spectral density per unit of bandwidth scales as 1/(f**alpha). Sample usage:: noise = powerlawnoise(200*ms, 1, samplerate=44100*Hz) Arguments: ``duration`` Duration of the desired output. ``alpha`` Power law exponent. ``samplerate`` Desired output samplerate ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration, samplerate) # Adapted from http://www.eng.ox.ac.uk/samp/software/powernoise/powernoise.m # Little MA et al. (2007), "Exploiting nonlinear recurrence and fractal # scaling properties for voice disorder detection", Biomed Eng Online, 6:23 n = duration n2 = int(n / 2) f = array(fftfreq(n, d=1.0 / samplerate), dtype=complex) f.shape = (len(f), 1) f = tile(f, (1, nchannels)) if n % 2 == 1: z = (randn(n2, nchannels) + 1j * randn(n2, nchannels)) a2 = 1.0 / (f[1:(n2 + 1), :]**(alpha / 2.0)) else: z = (randn(n2 - 1, nchannels) + 1j * randn(n2 - 1, nchannels)) a2 = 1.0 / (f[1:n2, :]**(alpha / 2.0)) a2 *= z if n % 2 == 1: d = vstack((ones((1, nchannels)), a2, flipud(conj(a2)))) else: d = vstack( (ones((1, nchannels)), a2, 1.0 / (abs(f[n2])**(alpha / 2.0)) * randn(1, nchannels), flipud(conj(a2)))) x = real(ifft(d.flatten())) x.shape = (n, nchannels) if normalise: for i in range(nchannels): #x[:,i]=normalise_rms(x[:,i]) x[:, i] = ((x[:, i] - amin(x[:, i])) / (amax(x[:, i]) - amin(x[:, i])) - 0.5) * 2 return Sound(x, samplerate)
def vowel(vowel=None, formants=None, pitch=100 * Hz, duration=1 * second, samplerate=None, nchannels=1): ''' Returns an artifically created spoken vowel sound (following the source-filter model of speech production) with a given ``pitch``. The vowel can be specified by either providing ``vowel`` as a string ('a', 'i' or 'u') or by setting ``formants`` to a sequence of formant frequencies. The returned sound is normalized to a maximum amplitude of 1. The implementation is based on the MakeVowel function written by Richard O. Duda, part of the Auditory Toolbox for Matlab by Malcolm Slaney: http://cobweb.ecn.purdue.edu/~malcolm/interval/1998-010/ ''' samplerate = get_samplerate(samplerate) duration = get_duration(duration, samplerate) if not (vowel or formants): raise ValueError('Need either a vowel or a list of formants') elif (vowel and formants): raise ValueError('Cannot use both vowel and formants') if vowel: if vowel == 'a' or vowel == '/a/': formants = (730.0 * Hz, 1090.0 * Hz, 2440.0 * Hz) elif vowel == 'i' or vowel == '/i/': formants = (270.0 * Hz, 2290.0 * Hz, 3010.0 * Hz) elif vowel == 'u' or vowel == '/u/': formants = (300.0 * Hz, 870.0 * Hz, 2240.0 * Hz) else: raise ValueError('Unknown vowel: "%s"' % (vowel)) points = np.arange(0, duration - 1, samplerate / pitch) indices = np.floor(points).astype(int) y = np.zeros(duration) y[indices] = (indices + 1) - points y[indices + 1] = points - indices # model the sound source (periodic glottal excitation) a = np.exp(-250. * Hz * 2 * np.pi / samplerate) y = lfilter([1], [1, 0, -a * a], y.copy()) # model the filtering by the vocal tract bandwidth = 50. * Hz for f in formants: cft = f / samplerate q = f / bandwidth rho = np.exp(-np.pi * cft / q) theta = 2 * np.pi * cft * np.sqrt(1 - 1 / (4.0 * q * q)) a2 = -2 * rho * np.cos(theta) a3 = rho * rho y = lfilter([1 + a2 + a3], [1, a2, a3], y.copy()) #normalize sound data = y / np.max(np.abs(y), axis=0) data.shape = (data.size, 1) return Sound(np.tile(data, (nchannels, 1)), samplerate=samplerate)