def plotstft(audiopath, generatefig=True, binsize=2**10, plotpath=None, colormap="jet"): #colormap="jet" samplerate, samples = wav.read(audiopath) s = stft(samples, binsize) audio_path_split = audiopath.split('/') if len(audio_path_split)-1 > 0: plotpath = audio_path_split[0] + "/" + audio_path_split[1] + "/graphs/" + audio_path_split[2] # dodaj folder graphs u putanju za cuvanje grafika plotpath = plotpath.replace('.wav', '.png') # zameni ekstenziju fajla na .png plotpath = audiopath.replace('.wav', '.png') sshow, freq = logscale_spec(s, factor=80.0, sr=samplerate) ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) fig = plt.figure(figsize=(8, 4.25)) plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") plt.colorbar() plt.xlabel("Time [s]") plt.ylabel("Frequency dB[Hz]") plt.xlim([0, timebins-1]) plt.ylim([0, freqbins]) xlocs = np.float32(np.linspace(0, timebins-1, 10)) plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate]) ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 20))) plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) #plt.clf() fig.canvas.draw() # bitno!!! formira model grafika tj samu matricu grafika, ali je ne prikazuje korisniku! if not(generatefig): plt.show() """ -temp- deo samo za prikaz sta ce ici u obucavanje mreze... posle obrisati.. """ # odlicno radi... img_data = ImageTransform.fig2data(fig) img_data = ImageTransform.transform(img_data) plt.imshow(img_data, 'gray') plt.figure() img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije cv2.imwrite("test.png", img_data) plt.imshow(img_data, 'gray') plt.show() else: img_data = prepare_fig_to_img(fig) #za formiranje grafika u data-set-u ... TODO: napraviti zasebnu fun.. cv2.imwrite(plotpath, img_data) #plt.close(fig) # sprecava memory leak - curenje memorije return fig # vrati matlabov plot obj(numpy array)
def prepare_fig_to_img(graph_fig): """ @brief Ulaz: matlabov grafik objekat Matlabova figura postaje slika, nad slikom se vrsi 1. crop-ovanje 2. grayscale 3. binarizacija 4. uklanjanje suma 5. resize Izlaz: slika spremna za obucavanje mreze (numpy matrica) """ img_data = ImageTransform.fig2data(graph_fig) img_data = ImageTransform.transform(img_data) img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije return img_data
def prepare_fig_to_img(graph_fig): """ @brief Ulaz: matlabov grafik objekat Matlabova figura postaje slika, nad slikom se vrsi 1. crop-ovanje 2. grayscale 3. binarizacija 4. uklanjanje suma 5. resize Izlaz: slika spremna za obucavanje mreze (numpy matrica) """ img_data = ImageTransform.fig2data(graph_fig) img_data = ImageTransform.transform(img_data) img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise( img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph( img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije return img_data
def plotstft(audiopath, generatefig=True, binsize=2**10, plotpath=None, colormap="jet"): #colormap="jet" samplerate, samples = wav.read(audiopath) s = stft(samples, binsize) audio_path_split = audiopath.split('/') if len(audio_path_split) - 1 > 0: plotpath = audio_path_split[0] + "/" + audio_path_split[ 1] + "/graphs/" + audio_path_split[ 2] # dodaj folder graphs u putanju za cuvanje grafika plotpath = plotpath.replace('.wav', '.png') # zameni ekstenziju fajla na .png plotpath = audiopath.replace('.wav', '.png') sshow, freq = logscale_spec(s, factor=80.0, sr=samplerate) ims = 20. * np.log10(np.abs(sshow) / 10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) fig = plt.figure(figsize=(8, 4.25)) plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") plt.colorbar() plt.xlabel("Time [s]") plt.ylabel("Frequency dB[Hz]") plt.xlim([0, timebins - 1]) plt.ylim([0, freqbins]) xlocs = np.float32(np.linspace(0, timebins - 1, 10)) plt.xticks(xlocs, [ "%.02f" % l for l in ((xlocs * len(samples) / timebins) + (0.5 * binsize)) / samplerate ]) ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 20))) plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) #plt.clf() fig.canvas.draw( ) # bitno!!! formira model grafika tj samu matricu grafika, ali je ne prikazuje korisniku! if not (generatefig): plt.show() """ -temp- deo samo za prikaz sta ce ici u obucavanje mreze... posle obrisati.. """ # odlicno radi... img_data = ImageTransform.fig2data(fig) img_data = ImageTransform.transform(img_data) plt.imshow(img_data, 'gray') plt.figure() img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise( img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph( img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije cv2.imwrite("test.png", img_data) plt.imshow(img_data, 'gray') plt.show() else: img_data = prepare_fig_to_img( fig ) #za formiranje grafika u data-set-u ... TODO: napraviti zasebnu fun.. cv2.imwrite(plotpath, img_data) #plt.close(fig) # sprecava memory leak - curenje memorije return fig # vrati matlabov plot obj(numpy array)