network_data.encoding_features = 40 network_data.learning_rate = 0.001 network_data.adam_epsilon = 0.01 network_data.optimizer = tf.train.AdamOptimizer( learning_rate=network_data.learning_rate, epsilon=network_data.adam_epsilon) ########################################################################################################### encoder_decoder = RecurrentEncoderDecoder(network_data) encoder_decoder.create_graph() test_database = Database.fromFile(project_data.TEST_ENCODER_DATABASE_FILE, project_data) test_feats, _ = test_database.to_set() test_feats = test_feats[0:1] encoder_decoder.train(input_seq=test_feats, output_seq=test_feats, restore_run=True, save_partial=True, save_freq=10, use_tensorboard=True, tensorboard_freq=5, training_epochs=30, batch_size=5) out = encoder_decoder.predict(test_feats[0]) f1 = plt.figure(0)
feature_config.mfcc_window = np.hanning label_type = "classic" # "classic", "las", "optim" use_embedding = False word_level = False vocab_file = project_data.VOCAB_FILE if label_type == "classic": label_class = ClassicLabel elif label_type == "las": label_class = LASLabel else: label_class = OptimalLabel for wav_dir in wav_dirs: database = Database(project_data) # Get the names of each wav file in the directory wav_names = os.listdir(wav_dir) # wav_names = wav_names[0:100] for wav_index in range(len(wav_names)): if wav_dir == project_data.WAV_TRAIN_DIR: label_dir = project_data.TRANSCRIPTION_TRAIN_DIR else: label_dir = project_data.TRANSCRIPTION_TEST_DIR # Get filenames wav_filename = wav_dir + '/' + wav_names[wav_index] label_filename = label_dir + '/' + wav_names[wav_index].split( ".")[0] + '.TXT'
network_data.rnn_regularizer = 0 network_data.use_dropout = True network_data.decoder_function = tf.nn.ctc_greedy_decoder network_data.learning_rate = 0.001 network_data.adam_epsilon = 0.0001 network_data.optimizer = tf.train.AdamOptimizer( learning_rate=network_data.learning_rate, epsilon=network_data.adam_epsilon) ########################################################################################################### network = ZorzNet(network_data) network.create_graph() train_database = Database.fromFile(project_data.TRAIN_DATABASE_FILE, project_data) test_database = Database.fromFile(project_data.TEST_DATABASE_FILE, project_data) train_feats, train_labels = train_database.to_set() test_feats, test_labels = test_database.to_set() train_feats = train_feats[2:3] train_labels = train_labels[2:3] network.train(train_features=train_feats, train_labels=train_labels, restore_run=False, save_partial=True, save_freq=10, use_tensorboard=True,
# Load project data project_data = ProjectData() transcription_file_list = ['promptsl40.train', 'promptsl40.test'] is_train_flag = False for file in transcription_file_list: if 'train' in file: print('Loading TRAIN database') is_train_flag = True else: print('Loading TEST database') is_train_flag = False database = Database(project_data) # Create a list of all labels transcription_list = [] with open(project_data.TRANSCRIPTION_DIR+'/'+file, 'r', encoding='latin1') as f: for line in f.readlines(): aux = line.split(' ', 1) name = aux[0].rstrip() # Remove accents transcription = remove_accents(aux[1].rstrip()) dict = {'name': name, 'transcription': transcription} transcription_list.append(dict) # Get the names of each wav file in the directory if is_train_flag: wav_dir = project_data.WAV_TRAIN_DIR
feature_config.preemph = 0.98 feature_config.num_filters = 40 feature_config.num_ceps = 26 feature_config.mfcc_window = np.hamming label_type = "las" # "classic", "las", "optim" if label_type == "classic": label_class = ClassicLabel elif label_type == "las": label_class = LASLabel else: label_class = OptimalLabel for wav_dir in wav_dirs: database = Database(project_data) # Get the names of each wav file in the directory wav_names = os.listdir(wav_dir) for wav_index in range(len(wav_names)): if wav_dir == project_data.WAV_TRAIN_DIR: label_dir = project_data.TRANSCRIPTION_TRAIN_DIR else: label_dir = project_data.TRANSCRIPTION_TEST_DIR # Get filenames wav_filename = wav_dir + '/' + wav_names[wav_index] label_filename = label_dir + '/' + wav_names[wav_index].split(".")[0] + '.TXT' audio_feature = AudioFeature.fromFile(wav_filename, feature_config)
from ConfigSpace.hyperparameters import CategoricalHyperparameter, UniformFloatHyperparameter, \ UniformIntegerHyperparameter from smac.stats.stats import Stats from smac.utils.io.traj_logging import TrajLogger from tensorflow.contrib.layers import l2_regularizer from src.neural_network.data_conversion import indexToStr from src.neural_network.ZorzNet.ZorzNetData import ZorzNetData from src.neural_network.ZorzNet.ZorzNet import ZorzNet from src.utils.Database import Database from src.utils.ProjectData import ProjectData from src.utils.smac_utils import wait_for_user_input_non_block, remove_if_exist project_data = ProjectData() train_database = Database.fromFile(project_data.TRAIN_DATABASE_FILE, project_data) val_database = Database.fromFile(project_data.VAL_DATABASE_FILE, project_data) # TODO Add a different method for this train_feats, train_labels, _, _, _, _ = train_database.split_sets(1.0, 0.0, 0.0) val_feats, val_labels, _, _, _, _ = val_database.split_sets(1.0, 0.0, 0.0) # ----------------------------------------------------------------------------------------- optimization_epochs = 100 validation_epochs = 200 # space_optimization_evals = 1 batch_size = 50 run_folder = 'run' run_count_step = 1