def model_filename(method, language, library, normalization, accuracy=0, n_people=None, n_segments=None, augmentation=None, json=True, models=False): filename = f'{language}/' if models: filename += 'models/' filename += f'{normalization}/' filename += verify_people_segments(n_people, n_segments) filename += verify_augmentation(augmentation) filename += f'{method}/' filename += f'{library}/' if accuracy != 0: accuracy = Process.pad_accuracy(accuracy) filename += f'{accuracy}/' if json: filename += f'info.json' return filename
# Audio.write( # f'portuguese/processed/psf/{dir}_{i}_{sample_index}.wav', sample, rate) m['attrs'].append(attr.tolist()) del attr del signal return m if __name__ == '__main__': filename = Directory.processed_filename(language, library, sampling_rate, n_audios, n_segments, augment) # m = [] # for j, i in enumerate(f): # if j < 1: # m.append(process_directory(i, j, library)) m = Parallel(n_jobs=-1, verbose=len(f))(delayed(process_directory)(i, j, library) for j, i in enumerate(f) if n_audios == None or j < n_audios) Process.object_to_json( filename, m, f, ) del m
n_rate = 24000 random_state = 42 filename_ps = Directory.verify_people_segments(people=n_people, segments=n_segments) # %% global X_train, X_valid, X_test, y_train, y_valid, y_test DATASET_PATH = Directory.processed_filename(language, library, n_rate, n_people, n_segments) # %% # SPLIT DOS DADOS X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection( DATASET_PATH) mapping = set(y_train) # %% def build_model(learning_rate=0.0001): # build the network architecture input_shape = [X_train.shape[1]] if library == 'mixed' else [ X_train.shape[1], X_train.shape[2] ] model = Sequential([ # 1st hidden layer Flatten(input_shape=input_shape), Dense(512, activation='relu'),
library = args['representation'] people = args['people'] segments = args['segments'] normalization = args['normalization'] flat = args['flat'] augment = args['augmentation'] sampling_rate = 24000 random_state = 42 epochs = 2000 batch_size = 128 # %% file_path = Directory.processed_filename(language, library, sampling_rate, people, segments, augment) # %% X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection( file_path, flat=flat) param_grid = {} # %% if normalization == 'minmax': from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() X_train = scaler.fit_transform(X_train.reshape( -1, X_train.shape[-1])).reshape(X_train.shape) X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape( X_test.shape) elif normalization == 'standard': from sklearn.preprocessing import StandardScaler
language = args['language'] method = 'svm' library = args['representation'] people = args['people'] segments = args['segments'] normalization = args['normalization'] augment = args['augmentation'] sampling_rate = 24000 random_state = 42 # %% file_path = Directory.processed_filename( language, library, sampling_rate, people, segments, augment) # %% X_train, y_train, mapping = Process.selection( file_path, valid_size=0, test_size=0, mapping=True, flat=True) # %% if normalization == 'minmax': from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() X_train = scaler.fit_transform( X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape) elif normalization == 'standard': from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_train = scaler.fit_transform( X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
first_folder = Directory.processed_filename('portuguese', 'psf', sampling_rate, None, None) second_folder = Directory.processed_filename('portuguese', 'melbanks', sampling_rate, None, None) third_folder = Directory.processed_filename('english', 'psf', sampling_rate, people, segments) fourth_folder = Directory.processed_filename('english', 'melbanks', sampling_rate, people, segments) X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection( first_folder, second_folder, third_folder, fourth_folder, lm_validation=False, lm_test=False, rm_validation=True, rm_test=True) elif language == 'mixed': portuguese_folder = Directory.processed_filename('portuguese', library, sampling_rate, people, segments) english_folder = Directory.processed_filename('english', library, sampling_rate, people, segments) X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection_language( portuguese_folder=portuguese_folder, english_folder=english_folder,