def auditory_spectrum(audio_file_path): ''' computes an auditory spectrogram based from an acoustic array ''' import dear.io as io decoder = io.get_decoder(name='audioread') audio = decoder.Audio(audio_file_path) st = 0 graph = 'Y5' N = 64 win = 0.025 hop = 0.010 freqs = [110., 2 * 4435.] combine = False spec = [[]] gram = getattr(auditory, graph) gram = gram(audio) for t, freq in enumerate( gram.walk(N=N, freq_base=freqs[0], freq_max=freqs[1], start=st, end=None, combine=combine, twin=win, thop=hop)): spec[0].append(freq) return np.array(spec), audio._duration
assert output is not None assert algorithm in CROP_ALGORITHMS assert 0 < length assert (end is None) or 0 <= start < end assert 0 < samplerate assert 0 < bitrate except Exception as ex: print_exc() exit_with_usage() if len(args) != 0: exit_with_usage() func = CROP_ALGORITHMS.get(algorithm) import dear.io as io decoder = io.get_decoder(name='audioread') audio = decoder.Audio(inputf) print "SampleRate: %d Hz\nChannel(s): %d\nDuration: %d sec"\ % (audio.samplerate, audio.channels, audio.duration) if start >= audio.duration: print "[error] Start time is beyond song duration. Not cropping." else: start, end = func(audio, start, end, length) duration = end - start print start, end, duration tmpfile = output + '.crop.tmp.wav' cmd = [FFMPEG,'-i',inputf,'-ss',str(start),'-t',str(duration), '-ac','1','-ar',str(samplerate),'-ab',bitrate, tmpfile]
[-h] step size, default 1024 [-f] frequency boundary, default (0, 7040) """ exit() try: opts, args = getopt.getopt(sys.argv[1:], "g:s:t:o:h:w:q:n:f:b:rc") except getopt.GetoptError as ex: print ex exit_with_usage() if len(args) != 1: #print args exit_with_usage() import dear.io as io decoder = io.get_decoder(name='audioread') audio = decoder.Audio(args[0]) print "SampleRate: %d Hz\nChannel(s): %d\nDuration: %d sec"\ % (audio.samplerate, audio.channels, audio.duration) graph = 'dft' st = 0 to = None outfile = None norm=colo.LogNorm(vmin=0.000001) for o, a in opts: if o == '-s': st = float(a) elif o == '-t': to = float(a)
assert algorithm in CROP_ALGORITHMS assert 0 < length assert (end is None) or 0 <= start < end assert 0 < samplerate assert 0 < bitrate except Exception as ex: print_exc() exit_with_usage() if len(args) != 0: exit_with_usage() func = CROP_ALGORITHMS.get(algorithm) import dear.io as io decoder = io.get_decoder(name="audioread") audio = decoder.Audio(inputf) print "SampleRate: %d Hz\nChannel(s): %d\nDuration: %d sec" % (audio.samplerate, audio.channels, audio.duration) if start >= audio.duration: print "[error] Start time is beyond song duration. Not cropping." else: start, end = func(audio, start, end, length) duration = end - start print start, end, duration tmpfile = output + ".crop.tmp.wav" cmd = [ FFMPEG, "-i", inputf,
def render_file(fin, outdir, shape = (512,512), framerate = 25, sym = 6, inv = 1, pad = True, mode = 'dft', preserve_alpha=False, params = {}, no_color=False, edge_filter=True): decoder = io.get_decoder(name = 'audioread') audio = decoder.Audio(fin) if not os.path.isdir(outdir): os.mkdir(outdir) fs = audio.samplerate nframes = audio.duration * framerate print 'fs: %d Hz, Duration: %d sec, Frames: %d'%(audio.samplerate,audio.duration,nframes) gram = None log_index = True n_octaves = None gram_args = [] gram_kwargs = {'start':0,'end':None} if mode == 'dft': win = 2 * fs / framerate hop = fs / framerate nfft = None if 'w' in params: win = params['w'] if 'n' in params: nfft = params['n'] gram = dft.PowerSpectrum(audio,nfft=nfft) gram_args = [win,hop] elif mode == 'cnt': n = 60 hop = 1.0 / framerate n_octaves = 8 log_index = False if 'n' in params: n = params['n'] if 'o' in params: n_octaves = params['o'] gram_args=[n] gram_kwargs['hop'] = hop gram_kwargs['freq_base'] = cqt.A0 * 4 gram_kwargs['freq_max'] = cqt.A0 * 2**n_octaves gram = cqt.CNTPowerSpectrum(audio) elif mode == 'gmt': n = 60 hop = 1.0 / framerate n_octaves = 9 log_index = False if 'n' in params: n = params['n'] if 'o' in params: n_octaves = params['o'] n = n * (n_octaves-1) gram_args = [n] gram_kwargs['thop'] = hop gram_kwargs['twin'] = 2*hop gram_kwargs['freq_base'] = cqt.A0 * 4 gram_kwargs['freq_max'] = cqt.A0 * 2**n_octaves gram_kwargs['combine'] = True gram = auditory.GammatoneSpectrum(audio) i = 0 j = 0 #print gram_args #print gram_kwargs tqcv = 0 iso226_factors = None timesums=np.zeros(4) for spectrum in gram.walk(*gram_args,**gram_kwargs): iso_init = isinstance(iso226_factors, np.ndarray) i += 1 if hasattr(gram, 'fqs') and not iso_init: iso226_factors = np.array(map(iso,gram.fqs)) iso226_factors = 10 ** (iso226_factors / 10) iso226_factors = 1 / iso226_factors if os.path.isfile(outdir+'conv%05d.png'%i): continue if iso_init: spectrum = np.multiply(spectrum,iso226_factors) j += 1 im,times = draw_spectrum(spectrum,shape,sym,inv,log_index,n_octaves,mode=mode,no_color=no_color,edge=edge_filter) timesums += np.array(times) imsave(outdir+'img%05d.png'%i,(im*255).astype(np.uint8)) # this is for saving the kernel images t0=time.time() im = convolve_quaternion(im, pad, preserve_alpha, no_color=no_color) tqcv += time.time()-t0 imsave(outdir+'conv%05d.png'%i,im.astype(np.uint8)) if i%100==0: ttot = tqcv + timesums.sum() actual_nframes = nframes - (i - j) pct = i / nframes pct_diff = j / actual_nframes eta = ttot / pct_diff - ttot print '%d %4.4f%% %6.2fs elapsed %6.2fs eta %6.2f convolution %s timing' % (i, 100.0 * pct, ttot, eta, tqcv, str(timesums)) print 'done rendering',i,tqcv+timesums.sum(),tqcv,timesums return