def _generate_spllited_encoder_input_data_partition(audio_data, word_level, dataset_number, partitions=8, test=False): audio_sets = [] limits = [] for i in range(1, partitions + 1): limits.append(int(len(audio_data) * i / partitions)) audio_sets.append(audio_data[0:limits[0]]) for i in range(1, partitions): audio_sets.append(audio_data[limits[i - 1]:limits[i]]) # Delete original dataset audio_data = [] gc.collect() for index, audio_set in enumerate(audio_sets): audio_set = np.array(audio_set) if not test: if word_level: path = settings.AUDIO_WORD_SPLIT_TRAIN_PATH + "dataset" + str( dataset_number) + "/audio_set" + str(index) + ".pkl" else: path = settings.AUDIO_CHAR_SPLIT_TRAIN_PATH + "dataset" + str( dataset_number) + "/audio_set" + str(index) + ".pkl" else: if word_level: path = settings.AUDIO_WORD_SPLIT_TEST_PATH + "dataset" + str( dataset_number) + "/audio_set" + str(index) + ".pkl" else: path = settings.AUDIO_CHAR_SPLIT_TEST_PATH + "dataset" + str( dataset_number) + "/audio_set" + str(index) + ".pkl" generate_pickle_file(audio_set, path)
def generate_pickle_dataset(): mapped_audio = map_audio_transcripts() audioInput = [] for path, transcription in mapped_audio.items(): print(path + "====>" + transcription) audioInput.append(AudioInput(path=path, transcript=transcription)) updated_data = get_fixed_size_data(audioInput_data=audioInput) generate_pickle_file(updated_data, PICKLE_PAD_FILE_PATH)
def on_epoch_end(self, epoch, logs=None): # Saving training history # Check if directory exists directory_path = settings.TRAINED_MODELS_PATH + self.model_name if not file_exists(directory_path): create_dir(directory_path) # Word level history if self.word_level: hist_path = settings.TRAINED_MODELS_PATH + self.model_name + "/" + self.model_name + "word.pkl" average_accuracy = 0 if file_exists(hist_path): acc_loss_history = load_pickle_data(hist_path) else: acc_loss_history = dict() acc_loss_history["accuracy"] = [] acc_loss_history["loss"] = [] # Average accuracy for i in range(0, 6): accuracy = "decoder_dense" + str(i) + "_acc" average_accuracy += logs[accuracy] average_accuracy = float(average_accuracy) / float(6) acc_loss_history["accuracy"].append(average_accuracy) acc_loss_history["loss"].append(logs["loss"]) # Character level history else: hist_path = settings.TRAINED_MODELS_PATH + self.model_name + "/" + self.model_name + "char.pkl" if file_exists(hist_path): acc_loss_history = load_pickle_data(hist_path) else: acc_loss_history = dict() acc_loss_history["accuracy"] = [] acc_loss_history["loss"] = [] acc_loss_history["accuracy"].append(logs["acc"]) acc_loss_history["loss"].append(logs["loss"]) generate_pickle_file(acc_loss_history, hist_path) plot_train_loss_acc(hist_path, self.word_level) self.model.save(self.model_path)
def generate_pickle_dataset(threshold=10): """ Generates pickle dataset_out of transcription and :param threshold: :return: """ threshold = threshold * 3600 mapped_audio = map_audio_transcripts_generic() audioInput = [] timing = 0 pickle_file_index = 0 for path, transcription in mapped_audio.items(): print(path + "====>" + transcription) # Calling transcription preprocessing special_characters = special_characters_table() transcription = transcript_preprocessing(transcription, special_characters) if transcription is not None: # Calculating Total audio length for partitions audioInput_instance = AudioInput(path=path, transcript=transcription) audioInput.append(audioInput_instance) timing += audioInput_instance.audio_length print("Timing is : " + str(timing)) if timing >= threshold: path = settings.PICKLE_PARTITIONS_PATH + "dataset" + str( pickle_file_index) + ".pkl" generate_pickle_file(audioInput, path) pickle_file_index += 1 timing = 0 del audioInput gc.collect() audioInput = []
def normalize_encoder_input(dataset): first_interval = int(len(dataset) / 4) second_interval = int(len(dataset) / 2) third_interval = int(len(dataset) * 3 / 4) splitted_dataset = [] splitted_dataset.append(dataset[0:first_interval]) splitted_dataset.append(dataset[first_interval:second_interval]) splitted_dataset.append(dataset[second_interval:third_interval]) splitted_dataset.append(dataset[third_interval:len(dataset)]) min_attributes = [] max_attributes = [] print("into normalization") # Calculating and saving min and max values of dataset for attribute_index in range(0, settings.MFCC_FEATURES_LENGTH): attribute_values = get_attribute_values(dataset, attribute_index) min_attributes.append(np.min(attribute_values)) max_attributes.append(np.max(attribute_values)) print(min_attributes) print(max_attributes) generate_pickle_file(min_attributes, settings.ENCODER_INPUT_MIN_VALUES_PATH) generate_pickle_file(max_attributes, settings.ENCODER_INPUT_MAX_VALUES_PATH) print("generating new dataset") del dataset gc.collect() dataset_index = 0 while splitted_dataset: for i, encoder_input in enumerate(splitted_dataset[0]): normalized_encoder_input = [] for line in encoder_input: normalized_line = [] for index_column, value in enumerate(line): normalized_line.append( min_max_normalization(value, index_column, min_attributes, max_attributes)) normalized_encoder_input.append(normalized_line) print("normalized " + str(i)) splitted_dataset[0][i] = normalized_encoder_input generate_pickle_file( splitted_dataset[0], settings.NORMALIZED_ENCODER_INPUT_PATHS + "dataset" + str(dataset_index) + ".pkl") splitted_dataset.pop(0) gc.collect() dataset_index += 1 final_dataset = _group_splitted_datasets(splitted_dataset) return final_dataset
def get_dataset_information(word_level, train_ratio): print("GENERATING DATASET INFORMATION") list_datasets = get_files(settings.PICKLE_PARTITIONS_PATH) all_transcripts = [] samples_number = 0 if word_level: for dataset_set, dataset_file in enumerate(list_datasets): train_data, test_data = _get_train_test_data_partition( dataset_path=dataset_file, train_ratio=train_ratio) samples_number += len(train_data) train_audio, train_transcripts = _get_audio_transcripts_word_level( train_data) test_audio, test_transcripts = _get_audio_transcripts_word_level( test_data) settings.MFCC_FEATURES_LENGTH = train_audio[0].shape[1] all_transcripts += train_transcripts all_transcripts += test_transcripts settings.TOTAL_SAMPLES_NUMBER = samples_number settings.WORD_SET = get_distinct_words(all_transcripts) settings.LONGEST_WORD_LENGTH = get_longest_word_length( settings.WORD_SET) settings.CHARACTER_SET = sorted(get_character_set(all_transcripts)) settings.WORD_TARGET_LENGTH = (len(settings.CHARACTER_SET) + 1) * settings.LONGEST_WORD_LENGTH general_info = [] print("MFCC FEATURES : " + str(settings.MFCC_FEATURES_LENGTH)) print("TOTAL SAMPLES : " + str(settings.TOTAL_SAMPLES_NUMBER)) print("WORD SET : " + str(len(settings.WORD_SET))) print("LONGEST WORD LENGTH " + str(settings.LONGEST_WORD_LENGTH)) print("CHARACTER SET : " + str(settings.CHARACTER_SET)) print("CHARATER SET LENGTH " + str(len(settings.CHARACTER_SET))) general_info.append(settings.MFCC_FEATURES_LENGTH) general_info.append(settings.TOTAL_SAMPLES_NUMBER) general_info.append(settings.WORD_SET) general_info.append(settings.LONGEST_WORD_LENGTH) general_info.append(settings.CHARACTER_SET) general_info.append(settings.WORD_TARGET_LENGTH) generate_pickle_file(general_info, settings.DATASET_WORD_INFORMATION_PATH) generate_pickle_file(general_info, settings.DATASET_WORD_INFERENCE_INFORMATION_PATH) else: for dataset_set, dataset_file in enumerate(list_datasets): train_data, test_data = _get_train_test_data_partition( dataset_path=dataset_file, train_ratio=train_ratio) samples_number += len(train_data) train_audio, train_transcripts = _get_audio_transcripts_character_level( train_data) test_audio, test_transcripts = _get_audio_transcripts_character_level( test_data) settings.MFCC_FEATURES_LENGTH = train_audio[0].shape[1] all_transcripts += train_transcripts all_transcripts += test_transcripts settings.TOTAL_SAMPLES_NUMBER = samples_number settings.CHARACTER_SET = get_character_set(all_transcripts) general_info = [] general_info.append(settings.MFCC_FEATURES_LENGTH) general_info.append(settings.TOTAL_SAMPLES_NUMBER) general_info.append(settings.CHARACTER_SET) generate_pickle_file(general_info, settings.DATASET_CHAR_INFORMATION_PATH) generate_pickle_file(general_info, settings.DATASET_CHAR_INFERENCE_INFORMATION_PATH)