def plotstft(audiopath, generatefig=True, binsize=2**10, plotpath=None, colormap="jet"): #colormap="jet"
    samplerate, samples = wav.read(audiopath)
    s = stft(samples, binsize)

    audio_path_split = audiopath.split('/')
    if len(audio_path_split)-1 > 0:
        plotpath = audio_path_split[0] + "/" + audio_path_split[1] + "/graphs/" + audio_path_split[2] # dodaj folder graphs u putanju za cuvanje grafika
        plotpath = plotpath.replace('.wav', '.png')            # zameni ekstenziju fajla na .png

    plotpath = audiopath.replace('.wav', '.png')

    sshow, freq = logscale_spec(s, factor=80.0, sr=samplerate)
    ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel

    timebins, freqbins = np.shape(ims)

    fig = plt.figure(figsize=(8, 4.25))
    plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
    plt.colorbar()

    plt.xlabel("Time [s]")
    plt.ylabel("Frequency dB[Hz]")
    plt.xlim([0, timebins-1])
    plt.ylim([0, freqbins])

    xlocs = np.float32(np.linspace(0, timebins-1, 10))
    plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate])
    ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 20)))
    plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])

    #plt.clf()

    fig.canvas.draw()       # bitno!!! formira model grafika tj samu matricu grafika, ali je ne prikazuje korisniku!

    if not(generatefig):
        plt.show()
        """ -temp- deo samo za prikaz sta ce ici u obucavanje mreze... posle obrisati.. """
        # odlicno radi...
        img_data = ImageTransform.fig2data(fig)
        img_data = ImageTransform.transform(img_data)
        plt.imshow(img_data, 'gray')
        plt.figure()
        img_data = ImageTransform.image_bin(img_data)
        img_data = ImageTransform.invert(img_data)
        img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode
        img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije
        cv2.imwrite("test.png", img_data)
        plt.imshow(img_data, 'gray')
        plt.show()
    else:
        img_data = prepare_fig_to_img(fig)      #za formiranje grafika u data-set-u ... TODO: napraviti zasebnu fun..
        cv2.imwrite(plotpath, img_data)

        #plt.close(fig) # sprecava memory leak - curenje memorije
    return fig      # vrati matlabov plot obj(numpy array)
def prepare_fig_to_img(graph_fig):
    """
    @brief
    Ulaz: matlabov grafik objekat
    Matlabova figura postaje slika, nad slikom se vrsi
    1. crop-ovanje
    2. grayscale
    3. binarizacija
    4. uklanjanje suma
    5. resize
    Izlaz: slika spremna za obucavanje mreze (numpy matrica)
    """
    img_data = ImageTransform.fig2data(graph_fig)
    img_data = ImageTransform.transform(img_data)
    img_data = ImageTransform.image_bin(img_data)
    img_data = ImageTransform.invert(img_data)
    img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode
    img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije
    return img_data
def prepare_fig_to_img(graph_fig):
    """
    @brief
    Ulaz: matlabov grafik objekat
    Matlabova figura postaje slika, nad slikom se vrsi
    1. crop-ovanje
    2. grayscale
    3. binarizacija
    4. uklanjanje suma
    5. resize
    Izlaz: slika spremna za obucavanje mreze (numpy matrica)
    """
    img_data = ImageTransform.fig2data(graph_fig)
    img_data = ImageTransform.transform(img_data)
    img_data = ImageTransform.image_bin(img_data)
    img_data = ImageTransform.invert(img_data)
    img_data = ImageTransform.remove_noise(
        img_data)  # zatvaranje 1.dilate 2.erode
    img_data = ImageTransform.resize_graph(
        img_data, 70, 33)  #org 350x165, 350%5=70, 165%5=33, odrzane proporcije
    return img_data
Example #4
0
img_mean = (0.4914, 0.4822, 0.4465)
img_std = (0.2023, 0.1994, 0.2010)
# Transforms
transform_train = [
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(img_mean, img_std),
]

transform_test = [
    transforms.ToTensor(),
    transforms.Normalize(img_mean,img_std),
]

t_transform_train = ImageTransform.transform(transform_train)
t_transform_test = ImageTransform.transform(transform_test)

# Dataset and DataLoader arguments
dataset_name = torchvision.datasets.CIFAR10
trainSet_dict = dict(root='./data', train=True, download=True, transform=t_transform_train)
trainLoad_dict = dict(batch_size=32, shuffle=True, num_workers=4)
testSet_dict = dict(root='./data', train=False, download=True, transform=t_transform_test)
testLoad_dict = dict(batch_size=32, shuffle=False, num_workers=4)

IMAGE_PATH = "images/"
MODEL_PATH = "model/"
def main():
    # Device
    SEED = 1
    cuda = torch.cuda.is_available()
def plotstft(audiopath,
             generatefig=True,
             binsize=2**10,
             plotpath=None,
             colormap="jet"):  #colormap="jet"
    samplerate, samples = wav.read(audiopath)
    s = stft(samples, binsize)

    audio_path_split = audiopath.split('/')
    if len(audio_path_split) - 1 > 0:
        plotpath = audio_path_split[0] + "/" + audio_path_split[
            1] + "/graphs/" + audio_path_split[
                2]  # dodaj folder graphs u putanju za cuvanje grafika
        plotpath = plotpath.replace('.wav',
                                    '.png')  # zameni ekstenziju fajla na .png

    plotpath = audiopath.replace('.wav', '.png')

    sshow, freq = logscale_spec(s, factor=80.0, sr=samplerate)
    ims = 20. * np.log10(np.abs(sshow) / 10e-6)  # amplitude to decibel

    timebins, freqbins = np.shape(ims)

    fig = plt.figure(figsize=(8, 4.25))
    plt.imshow(np.transpose(ims),
               origin="lower",
               aspect="auto",
               cmap=colormap,
               interpolation="none")
    plt.colorbar()

    plt.xlabel("Time [s]")
    plt.ylabel("Frequency dB[Hz]")
    plt.xlim([0, timebins - 1])
    plt.ylim([0, freqbins])

    xlocs = np.float32(np.linspace(0, timebins - 1, 10))
    plt.xticks(xlocs, [
        "%.02f" % l for l in ((xlocs * len(samples) / timebins) +
                              (0.5 * binsize)) / samplerate
    ])
    ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 20)))
    plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])

    #plt.clf()

    fig.canvas.draw(
    )  # bitno!!! formira model grafika tj samu matricu grafika, ali je ne prikazuje korisniku!

    if not (generatefig):
        plt.show()
        """ -temp- deo samo za prikaz sta ce ici u obucavanje mreze... posle obrisati.. """
        # odlicno radi...
        img_data = ImageTransform.fig2data(fig)
        img_data = ImageTransform.transform(img_data)
        plt.imshow(img_data, 'gray')
        plt.figure()
        img_data = ImageTransform.image_bin(img_data)
        img_data = ImageTransform.invert(img_data)
        img_data = ImageTransform.remove_noise(
            img_data)  # zatvaranje 1.dilate 2.erode
        img_data = ImageTransform.resize_graph(
            img_data, 70,
            33)  #org 350x165, 350%5=70, 165%5=33, odrzane proporcije
        cv2.imwrite("test.png", img_data)
        plt.imshow(img_data, 'gray')
        plt.show()
    else:
        img_data = prepare_fig_to_img(
            fig
        )  #za formiranje grafika u data-set-u ... TODO: napraviti zasebnu fun..
        cv2.imwrite(plotpath, img_data)

        #plt.close(fig) # sprecava memory leak - curenje memorije
    return fig  # vrati matlabov plot obj(numpy array)