def model_filename(method,
                   language,
                   library,
                   normalization,
                   accuracy=0,
                   n_people=None,
                   n_segments=None,
                   augmentation=None,
                   json=True,
                   models=False):

    filename = f'{language}/'
    if models:
        filename += 'models/'
    filename += f'{normalization}/'
    filename += verify_people_segments(n_people, n_segments)
    filename += verify_augmentation(augmentation)
    filename += f'{method}/'
    filename += f'{library}/'
    if accuracy != 0:
        accuracy = Process.pad_accuracy(accuracy)
        filename += f'{accuracy}/'

    if json:
        filename += f'info.json'

    return filename
Esempio n. 2
0
            # Audio.write(
            #     f'portuguese/processed/psf/{dir}_{i}_{sample_index}.wav', sample, rate)

            m['attrs'].append(attr.tolist())

        del attr
    del signal
    return m


if __name__ == '__main__':
    filename = Directory.processed_filename(language, library, sampling_rate,
                                            n_audios, n_segments, augment)

    # m = []
    # for j, i in enumerate(f):
    #     if j < 1:
    #         m.append(process_directory(i, j, library))

    m = Parallel(n_jobs=-1,
                 verbose=len(f))(delayed(process_directory)(i, j, library)
                                 for j, i in enumerate(f)
                                 if n_audios == None or j < n_audios)

    Process.object_to_json(
        filename,
        m,
        f,
    )
    del m
Esempio n. 3
0
n_rate = 24000
random_state = 42

filename_ps = Directory.verify_people_segments(people=n_people,
                                               segments=n_segments)

# %%
global X_train, X_valid, X_test, y_train, y_valid, y_test

DATASET_PATH = Directory.processed_filename(language, library, n_rate,
                                            n_people, n_segments)

# %%
# SPLIT DOS DADOS

X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection(
    DATASET_PATH)

mapping = set(y_train)
# %%


def build_model(learning_rate=0.0001):
    # build the network architecture
    input_shape = [X_train.shape[1]] if library == 'mixed' else [
        X_train.shape[1], X_train.shape[2]
    ]

    model = Sequential([
        # 1st hidden layer
        Flatten(input_shape=input_shape),
        Dense(512, activation='relu'),
Esempio n. 4
0
library = args['representation']
people = args['people']
segments = args['segments']
normalization = args['normalization']
flat = args['flat']
augment = args['augmentation']
sampling_rate = 24000
random_state = 42

epochs = 2000
batch_size = 128
# %%
file_path = Directory.processed_filename(language, library, sampling_rate,
                                         people, segments, augment)
# %%
X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection(
    file_path, flat=flat)

param_grid = {}

# %%
if normalization == 'minmax':
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train.reshape(
        -1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(
        X_test.shape)

elif normalization == 'standard':
    from sklearn.preprocessing import StandardScaler
Esempio n. 5
0
language = args['language']
method = 'svm'
library = args['representation']
people = args['people']
segments = args['segments']
normalization = args['normalization']
augment = args['augmentation']
sampling_rate = 24000
random_state = 42

# %%
file_path = Directory.processed_filename(
    language, library, sampling_rate, people, segments, augment)
# %%
X_train, y_train, mapping = Process.selection(
    file_path, valid_size=0, test_size=0, mapping=True, flat=True)

# %%
if normalization == 'minmax':
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(
        X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)

elif normalization == 'standard':
    from sklearn.preprocessing import StandardScaler

    scaler = StandardScaler()
    X_train = scaler.fit_transform(
        X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    first_folder = Directory.processed_filename('portuguese', 'psf',
                                                sampling_rate, None, None)
    second_folder = Directory.processed_filename('portuguese', 'melbanks',
                                                 sampling_rate, None, None)
    third_folder = Directory.processed_filename('english', 'psf',
                                                sampling_rate, people,
                                                segments)
    fourth_folder = Directory.processed_filename('english', 'melbanks',
                                                 sampling_rate, people,
                                                 segments)

    X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection(
        first_folder,
        second_folder,
        third_folder,
        fourth_folder,
        lm_validation=False,
        lm_test=False,
        rm_validation=True,
        rm_test=True)
elif language == 'mixed':
    portuguese_folder = Directory.processed_filename('portuguese', library,
                                                     sampling_rate, people,
                                                     segments)
    english_folder = Directory.processed_filename('english', library,
                                                  sampling_rate, people,
                                                  segments)

    X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection_language(
        portuguese_folder=portuguese_folder,
        english_folder=english_folder,