Beispiel #1
0
def plot(data, title=None, x_label='Time (s)', y_label='Amplitude', size=(10, 6), caption=None,
         fig_name=None,
         show=False,
         close=True):
    if size:
        plt.figure(figsize=(10, 6), frameon=True)

    plt.plot(list(range(0, len(data))), data)
    plt.ylabel(y_label)
    plt.xlabel(x_label)
    plt.title(title)
    # Remove a margem no eixo x
    plt.margins(x=0)

    if caption:
        plt.figtext(0.5, 0.01, caption, wrap=True,
                    horizontalalignment='center')

    if fig_name:
        Directory.create_directory(fig_name, True)
        plt.savefig(fig_name, transparent=False)

    if show:
        plt.show()

    if close:
        plt.close()
def write(path, data, rate):
    import soundfile as sf
    from deep_audio import Directory

    Directory.create_directory(path, file=True)

    sf.write(path, data, rate, subtype='PCM_16')
Beispiel #3
0
def plot_subplots(audio, mfccs, lpccs, rate, title=None, size_multiplier=2, cmap='magma', caption=None,
                  fig_name=None, show=False):
    from matplotlib import rcParams, rcParamsDefault

    small_size = 8
    medium_size = 10
    bigger_size = 12

    image_size = (10 * size_multiplier, 6 * size_multiplier)

    # controls default text sizes
    plt.rc('font', size=small_size * size_multiplier)
    # fontsize of the axes title
    plt.rc('axes', titlesize=small_size * size_multiplier)
    # fontsize of the x and y labels
    plt.rc('axes', labelsize=medium_size * size_multiplier)
    # fontsize of the tick labels
    plt.rc('xtick', labelsize=small_size * size_multiplier)
    # fontsize of the tick labels
    plt.rc('ytick', labelsize=small_size * size_multiplier)
    plt.rc('legend', fontsize=small_size *
           size_multiplier)  # legend fontsize
    # fontsize of the figure title
    plt.rc('figure', titlesize=bigger_size * size_multiplier)

    plt.subplots(2, 2, figsize=image_size)
    plt.subplots_adjust(left=0.125, right=0.9, bottom=0.1,
                        top=0.9, wspace=0.3, hspace=0.3)
    plt.suptitle(title)

    plt.subplot(2, 2, 1)
    plot_audio(audio, rate, close=False, size=None)

    plt.subplot(2, 2, 2)
    plot_spectrogram(
        audio, rate, cmap=cmap, close=False, size=None)

    plt.subplot(2, 2, 3)
    plot_cepstrals(
        data=lpccs, y_label='LPCC Index', cmap=cmap, size=None, close=False)

    plt.subplot(2, 2, 4)
    plot_cepstrals(
        data=mfccs, y_label='MFCC Index', cmap=cmap, size=None, close=False)

    if caption:
        plt.figtext(0.5, 0.01, caption, wrap=True,
                    horizontalalignment='center')

    if fig_name:
        Directory.create_directory(fig_name, True)
        plt.savefig(fig_name, transparent=False)

    if show:
        plt.show()

    plt.close()

    # Reseta todo o estilo configurado no inicio da função
    rcParams.update(rcParamsDefault)
Beispiel #4
0
def plot_cepstrals(data, title=None, x_label='Frame Index', y_label='Index', cmap='magma', size=(10, 6),
                   caption=None,
                   fig_name=None,
                   show=False, close=True):
    if size:
        plt.figure(figsize=(10, 6), frameon=True)
    plt.imshow(data.T,
               origin='lower',
               aspect='auto',
               cmap=cmap,
               interpolation='nearest')
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    # plt.colorbar(format='%+2.0f')
    # plt.clim(vmin, vmax)

    if caption:
        plt.figtext(0.5, 0.01, caption, wrap=True,
                    horizontalalignment='center')

    if fig_name:
        Directory.create_directory(fig_name, True)
        plt.savefig(fig_name, transparent=False)

    if show:
        plt.show()

    if close:
        plt.close()
def create_json_file(file, data, indent=2, cls=None):
    from deep_audio import Directory
    import json

    directory = '/'.join(file.split('/')[:-1])

    Directory.create_directory(directory)

    with open(file, "w") as fp:
        json.dump(data, fp, indent=indent, cls=cls)
Beispiel #6
0
def plot_spectrogram(data, rate, n_fft=1024, title=None, x_label='Time (s)', y_label='Frequency (kHz)',
                     cmap='magma', size=(10, 6), caption=None, fig_name=None, show=False, close=True):
    if size:
        plt.figure(figsize=(10, 6), frameon=True)
    plt.specgram(data, NFFT=n_fft, Fs=rate, cmap=cmap)
    plt.title(title)
    plt.ylabel(y_label)
    plt.xlabel(x_label)

    if caption:
        plt.figtext(0.5, 0.01, caption, wrap=True,
                    horizontalalignment='center')

    if fig_name:
        Directory.create_directory(fig_name, True)
        plt.savefig(fig_name, transparent=False)

    if show:
        plt.show()

    if close:
        plt.close()
Beispiel #7
0
def object_to_attention(filename, attrs, files):
    from deep_audio import Directory
    data = {
        'labels': [],
        'attrs': [],
        'mapping': [file.replace('.wav', '') for _, file in enumerate(files)]
    }

    for i in attrs:
        data['attrs'].extend(i['attrs'])
        data['labels'].extend(i['labels'])

    rows = []

    for info, i in enumerate(data['labels']):
        row = f'{info} qid:{info} '
        info_attrs = flatten_matrix(data['attrs'][i])
        for info_attr, j in enumerate(info_attrs):
            row += f'{j}:{info_attr} '
        rows.append(row)

    Directory.create_file(filename, rows)
    del data
Beispiel #8
0
def selection(folder,
              valid_size=0.25,
              test_size=0.2,
              random_state=42,
              flat=False,
              squeeze=False,
              mapping=False):
    from deep_audio import Directory
    from sklearn.model_selection import train_test_split
    from numpy import squeeze

    X, y, labels = Directory.load_json_data(folder)

    if flat:
        X = flatten_matrix(X)

    if squeeze == True:
        X = squeeze(X, axis=3)

    if test_size == 0:
        if mapping:
            return X, y, labels

        return X, y

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, stratify=y, test_size=test_size, random_state=random_state)
    if valid_size == 0:
        return X_train, X_test, y_train, y_test

    X_train, X_valid, y_train, y_valid = train_test_split(
        X_train,
        y_train,
        stratify=y_train,
        test_size=valid_size,
        random_state=random_state)

    return X_train, X_valid, X_test, y_train, y_valid, y_test
Beispiel #9
0
import matplotlib.pyplot as plt
from numpy import lib, max
from deep_audio import Directory, JSON, Process, Terminal

args = Terminal.get_args()

# %%
model_algo = 'perceptron'
language = args['language'] or 'portuguese'
library = args['representation'] or 'psf'
n_people = args['people'] or None
n_segments = args['segments'] or None
n_rate = 24000
random_state = 42

filename_ps = Directory.verify_people_segments(people=n_people,
                                               segments=n_segments)

# %%
global X_train, X_valid, X_test, y_train, y_valid, y_test

DATASET_PATH = Directory.processed_filename(language, library, n_rate,
                                            n_people, n_segments)

# %%
# SPLIT DOS DADOS

X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection(
    DATASET_PATH)

mapping = set(y_train)
# %%
Beispiel #10
0
language = args['language']
method = args['method']
library = args['representation']
people = args['people']
segments = args['segments']
normalization = args['normalization']
flat = args['flat']
augment = args['augmentation']
sampling_rate = 24000
random_state = 42

epochs = 2000
batch_size = 128
# %%
file_path = Directory.processed_filename(language, library, sampling_rate,
                                         people, segments, augment)
# %%
X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection(
    file_path, flat=flat)

param_grid = {}

# %%
if normalization == 'minmax':
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train.reshape(
        -1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(
        X_test.shape)
Beispiel #11
0
from deep_audio import Directory, Process, Terminal, Model
# %%
args = Terminal.get_args()

language = args['language']
library = args['representation']
people = args['people']
segments = args['segments']
normalization = args['normalization']
augment = args['augmentation']
sampling_rate = 24000
random_state = 42
# %%
global X_train, X_valid, X_test, y_train, y_valid, y_test

file_path = Directory.processed_filename(
    language, library, sampling_rate, people, segments, augment)
# %%

X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection(
    file_path, flat=True)

if normalization == 'minmax':
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(
        X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(
        X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

elif normalization == 'standard':
args = Terminal.get_args()

language = 'portuguese'
method = 'svm'
library = 'psf'
people = args['people']
segments = args['segments']
normalization = args['normalization']
augment = args['augmentation']
sampling_rate = 24000
random_state = 42

filename_holder = Directory.model_filename(method=method,
                                           language=language,
                                           library=library,
                                           normalization=normalization,
                                           augmentation=augment,
                                           json=False,
                                           models=True)

info = json.load(open(filename_holder + 'info.json', 'r'))
scaler = load(open(filename_holder + 'scaler.pkl', 'rb'))

model = load(open(filename_holder + 'model.h5', 'rb'))

signal, rate = librosa.load(args['inferencia'], sr=sampling_rate)

# signal = Audio.trim(signal)

segment_time = 5
signal = signal[:len(signal) - len(signal) % (rate * segment_time)]
Beispiel #13
0
def mixed_selection(first_folder,
                    second_folder,
                    third_folder,
                    fourth_folder,
                    lm_validation=False,
                    lm_test=False,
                    rm_validation=False,
                    rm_test=False,
                    valid_size=0.25,
                    test_size=0.2,
                    random_state=42):
    global X_train, y_train, X_valid, y_valid, X_test, y_test
    from deep_audio import Directory
    from sklearn.model_selection import train_test_split
    from numpy import concatenate
    import numpy as np

    X_first, y_first, _ = Directory.load_json_data(first_folder)
    X_second, y_second, _ = Directory.load_json_data(second_folder)
    X_third, y_third, _ = Directory.load_json_data(third_folder)
    X_fourth, y_fourth, _ = Directory.load_json_data(fourth_folder)

    X_first = flatten_matrix(X_first)
    X_second = flatten_matrix(X_second)
    X_third = flatten_matrix(X_third)
    X_fourth = flatten_matrix(X_fourth)

    X_train_first, X_test_first, y_train_first, y_test_first = train_test_split(
        X_first,
        y_first,
        stratify=y_first,
        test_size=test_size,
        random_state=random_state)

    X_train_first, X_valid_first, y_train_first, y_valid_first = train_test_split(
        X_train_first,
        y_train_first,
        stratify=y_train_first,
        test_size=valid_size,
        random_state=random_state)

    X_train_second, X_test_second, y_train_second, y_test_second = train_test_split(
        X_second,
        y_second,
        stratify=y_second,
        test_size=test_size,
        random_state=random_state)

    X_train_second, X_valid_second, y_train_second, y_valid_second = train_test_split(
        X_train_second,
        y_train_second,
        stratify=y_train_second,
        test_size=valid_size,
        random_state=random_state)

    X_train_first = concatenate((X_train_first, X_train_second), axis=1)
    y_train = y_train_first

    if not rm_validation:
        X_valid_first = X_valid_first
    else:
        X_valid_first = concatenate((X_valid_first, X_valid_second), axis=1)

    y_valid = y_valid_first

    if not rm_test:
        X_test_first = X_test_first
    else:
        X_test_first = concatenate((X_test_first, X_test_second), axis=1)

    y_test = y_test_first

    X_train_third, X_test_third, y_train_third, y_test_third = train_test_split(
        X_third,
        y_third,
        stratify=y_third,
        test_size=test_size,
        random_state=random_state)

    X_train_third, X_valid_third, y_train_third, y_valid_third = train_test_split(
        X_train_third,
        y_train_third,
        stratify=y_train_third,
        test_size=valid_size,
        random_state=random_state)

    X_train_fourth, X_test_fourth, y_train_fourth, y_test_fourth = train_test_split(
        X_fourth,
        y_fourth,
        stratify=y_fourth,
        test_size=test_size,
        random_state=random_state)

    X_train_fourth, X_valid_fourth, y_train_fourth, y_valid_fourth = train_test_split(
        X_train_fourth,
        y_train_fourth,
        stratify=y_train_fourth,
        test_size=valid_size,
        random_state=random_state)

    X_train_third = concatenate((X_train_third, X_train_fourth), axis=1)

    if not rm_validation:
        X_valid_third = X_valid_third
    else:
        X_valid_third = concatenate((X_valid_third, X_valid_fourth), axis=1)

    if not rm_test:
        X_test_third = X_test_third
    else:
        X_test_third = concatenate((X_test_third, X_test_fourth), axis=1)

    X_train = concatenate((X_train_first, X_train_third), axis=0)
    y_train = concatenate(
        (y_train_first, y_train_third + np.max(y_train_first) + 1), axis=0)

    if not lm_validation:
        X_valid = X_valid_first
        y_valid = y_valid_first
    else:
        X_valid = concatenate((X_valid_first, X_valid_third), axis=0)
        y_valid = concatenate(
            (y_valid_first, y_valid_third + np.max(y_valid_first) + 1), axis=0)

    if not lm_test:
        X_test = X_test_first
        y_test = y_test_first
    else:
        X_test = concatenate((X_test_first, X_test_third), axis=0)
        y_test = concatenate(
            (y_test_first, y_test_third + np.max(y_test_first) + 1), axis=0)

    return X_train, X_valid, X_test, y_train, y_valid, y_test
Beispiel #14
0
# %%
args = Terminal.get_args()

language = args['language']
method = 'svm'
library = args['representation']
people = args['people']
segments = args['segments']
normalization = args['normalization']
augment = args['augmentation']
sampling_rate = 24000
random_state = 42

# %%
file_path = Directory.processed_filename(
    language, library, sampling_rate, people, segments, augment)
# %%
X_train, y_train, mapping = Process.selection(
    file_path, valid_size=0, test_size=0, mapping=True, flat=True)

# %%
if normalization == 'minmax':
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(
        X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)

elif normalization == 'standard':
    from sklearn.preprocessing import StandardScaler
library = args['representation']
people = args['people']
segments = args['segments']
sampling_rate = 24000
random_state = 42

# language = 'mixed'
# library = 'psf'
# people = None
# segments = None
# sampling_rate = 24000
# random_state = 42
# %%
global X_train, X_valid, X_test, y_train, y_valid, y_test

file_path = Directory.processed_filename(language, library, sampling_rate,
                                         people, segments)
# %%
if language == 'mixed' and library == 'mixed':
    first_folder = Directory.processed_filename('portuguese', 'psf',
                                                sampling_rate, None, None)
    second_folder = Directory.processed_filename('portuguese', 'melbanks',
                                                 sampling_rate, None, None)
    third_folder = Directory.processed_filename('english', 'psf',
                                                sampling_rate, people,
                                                segments)
    fourth_folder = Directory.processed_filename('english', 'melbanks',
                                                 sampling_rate, people,
                                                 segments)

    X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection(
        first_folder,
Beispiel #16
0
# quantidade de audios
n_audios = args['people'] or None
# bibliotecas
library = args['representation']
# lingua
language = args['language'] or 'portuguese'
# normalização do sinal
normalization = args['normalization'] or 'nonorm'
# flat processing
flat = args['flat']
# caminho para os audios
path = f'{language}/audios/{sampling_rate}'
# augmentation
augment = args['augmentation']

f = Directory.filenames(path)

# %%


def _noise(sample, rate):
    noise_max = np.max(sample)
    return Augmentation.noise_addition(
        sample, random.uniform(noise_max * 0.01, noise_max * 0.1))


def _cut(sample, rate):
    cut_seconds = random.randint(sample.shape[0] * 0.2, sample.shape[0] * 0.6)
    pos_cut = random.randint(sample.shape[0] * 0.1, sample.shape[0] * 0.9)

    return Augmentation.cut_signal(sample, pos_cut, cut_seconds)
Beispiel #17
0
def mixed_selection_representation(first_folder,
                                   second_folder,
                                   validation=False,
                                   test=False,
                                   valid_size=0.25,
                                   test_size=0.2,
                                   random_state=42):
    global X_train, y_train, X_valid, y_valid, X_test, y_test
    from deep_audio import Directory
    from sklearn.model_selection import train_test_split
    from numpy import concatenate

    X_portuguese, y_portuguese, _ = Directory.load_json_data(first_folder)
    X_english, y_english, _ = Directory.load_json_data(second_folder)

    X_portuguese = flatten_matrix(X_portuguese)
    X_english = flatten_matrix(X_english)

    X_train_pt, X_test_pt, y_train_pt, y_test_pt = train_test_split(
        X_portuguese,
        y_portuguese,
        stratify=y_portuguese,
        test_size=test_size,
        random_state=random_state)

    X_train_pt, X_valid_pt, y_train_pt, y_valid_pt = train_test_split(
        X_train_pt,
        y_train_pt,
        stratify=y_train_pt,
        test_size=valid_size,
        random_state=random_state)

    X_train_en, X_test_en, y_train_en, y_test_en = train_test_split(
        X_english,
        y_english,
        stratify=y_english,
        test_size=test_size,
        random_state=random_state)

    X_train_en, X_valid_en, y_train_en, y_valid_en = train_test_split(
        X_train_en,
        y_train_en,
        stratify=y_train_en,
        test_size=valid_size,
        random_state=random_state)

    X_train = concatenate((X_train_pt, X_train_en), axis=1)
    y_train = y_train_pt

    if not validation:
        X_valid = X_valid_pt
    else:
        X_valid = concatenate((X_valid_pt, X_valid_en), axis=1)

    y_valid = y_valid_pt

    if not test:
        X_test = X_test_pt
    else:
        X_test = concatenate((X_test_pt, X_test_en), axis=1)

    y_test = y_test_pt

    return X_train, X_valid, X_test, y_train, y_valid, y_test
Beispiel #18
0
def mixed_selection_language(portuguese_folder,
                             english_folder,
                             validation=False,
                             test=False,
                             valid_size=0.25,
                             test_size=0.2,
                             random_state=42,
                             flat=False,
                             squeeze=False):
    global X_train, y_train, X_valid, y_valid, X_test, y_test
    from deep_audio import Directory
    from sklearn.model_selection import train_test_split
    from numpy import concatenate, squeeze, max

    X_portuguese, y_portuguese, _ = Directory.load_json_data(portuguese_folder)
    X_english, y_english, _ = Directory.load_json_data(english_folder)

    if flat:
        X_portuguese = flatten_matrix(X_portuguese)
        X_english = flatten_matrix(X_english)

    # if squeeze:
    #     X_portuguese = squeeze(X_portuguese, axis=3)
    #     X_english = squeeze(X_english, axis=3)

    X_train_pt, X_test_pt, y_train_pt, y_test_pt = train_test_split(
        X_portuguese,
        y_portuguese,
        stratify=y_portuguese,
        test_size=test_size,
        random_state=random_state)

    X_train_pt, X_valid_pt, y_train_pt, y_valid_pt = train_test_split(
        X_train_pt,
        y_train_pt,
        stratify=y_train_pt,
        test_size=valid_size,
        random_state=random_state)

    X_train_en, X_test_en, y_train_en, y_test_en = train_test_split(
        X_english,
        y_english,
        stratify=y_english,
        test_size=test_size,
        random_state=random_state)

    X_train_en, X_valid_en, y_train_en, y_valid_en = train_test_split(
        X_train_en,
        y_train_en,
        stratify=y_train_en,
        test_size=valid_size,
        random_state=random_state)

    X_train = concatenate((X_train_pt, X_train_en), axis=0)
    y_train = concatenate((y_train_pt, y_train_en + max(y_train_pt) + 1),
                          axis=0)

    if not validation:
        X_valid = X_valid_pt
        y_valid = y_valid_pt
    else:
        X_valid = concatenate((X_valid_pt, X_valid_en), axis=0)
        y_valid = concatenate((y_valid_pt, y_valid_en + max(y_valid_pt) + 1),
                              axis=0)

    if not test:
        X_test = X_test_pt
        y_test = y_test_pt
    else:
        X_test = concatenate((X_test_pt, X_test_en), axis=0)
        y_test = concatenate((y_test_pt, y_test_en + max(y_test_pt) + 1),
                             axis=0)

    return X_train, X_valid, X_test, y_train, y_valid, y_test
Beispiel #19
0
#%%
args = Terminal.get_args(sys.argv[1:])

# %%
num_cores = multiprocessing.cpu_count()
language = args['language'] or 'portguese'
origin_path = f'base_{language}'
dest_path = f'{language}/audios'
s_rate = [24000]
n_audios = args['people'] or None

print(dest_path)
# %%

f = Directory.filenames_recursive(origin_path)


def process_directory(dir, n_rate):
    signal = []

    for j, audioname in enumerate(f[dir]):
        holder_signal, sr = Audio.read(f'{origin_path}/{dir}/{audioname}',
                                       sr=n_rate)

        signal.extend(Audio.trim(holder_signal, 20))

    signal = array(signal)

    Audio.write(f'{dest_path}/{n_rate}/{dir}.wav', signal, n_rate)
Beispiel #20
0
language = args['language'] or 'portuguese'
library = args['representation'] or 'stft'
people = args['people'] or None
segments = args['segments'] or None
sampling_rate = 24000
random_state = 42
normalization = args['normalization'] or 'nonorm'
flat = args['flat']

epochs = 500
batch_size = 128
# %%
global X_train, X_valid, X_test, y_train, y_valid, y_test

file_path = Directory.processed_filename(
    language, library, sampling_rate, people, segments)
# %%
if language == 'mixed' and library == 'mixed':
    first_folder = Directory.processed_filename(
        'portuguese', 'psf', sampling_rate, None, None)
    second_folder = Directory.processed_filename(
        'portuguese', 'melbanks', sampling_rate, None, None)
    third_folder = Directory.processed_filename(
        'english', 'psf', sampling_rate, people, segments)
    fourth_folder = Directory.processed_filename(
        'english', 'melbanks', sampling_rate, people, segments)

    X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection(
        first_folder, second_folder, third_folder, fourth_folder,
        lm_validation=False,
        lm_test=False,