コード例 #1
0
def _generate_spllited_encoder_input_data_partition(audio_data,
                                                    word_level,
                                                    dataset_number,
                                                    partitions=8,
                                                    test=False):
    audio_sets = []
    limits = []
    for i in range(1, partitions + 1):
        limits.append(int(len(audio_data) * i / partitions))

    audio_sets.append(audio_data[0:limits[0]])
    for i in range(1, partitions):
        audio_sets.append(audio_data[limits[i - 1]:limits[i]])

    # Delete original dataset
    audio_data = []
    gc.collect()
    for index, audio_set in enumerate(audio_sets):
        audio_set = np.array(audio_set)
        if not test:
            if word_level:
                path = settings.AUDIO_WORD_SPLIT_TRAIN_PATH + "dataset" + str(
                    dataset_number) + "/audio_set" + str(index) + ".pkl"
            else:
                path = settings.AUDIO_CHAR_SPLIT_TRAIN_PATH + "dataset" + str(
                    dataset_number) + "/audio_set" + str(index) + ".pkl"
        else:
            if word_level:
                path = settings.AUDIO_WORD_SPLIT_TEST_PATH + "dataset" + str(
                    dataset_number) + "/audio_set" + str(index) + ".pkl"
            else:
                path = settings.AUDIO_CHAR_SPLIT_TEST_PATH + "dataset" + str(
                    dataset_number) + "/audio_set" + str(index) + ".pkl"

        generate_pickle_file(audio_set, path)
コード例 #2
0
def generate_pickle_dataset():
    mapped_audio = map_audio_transcripts()
    audioInput = []
    for path, transcription in mapped_audio.items():
        print(path + "====>" + transcription)
        audioInput.append(AudioInput(path=path, transcript=transcription))

    updated_data = get_fixed_size_data(audioInput_data=audioInput)

    generate_pickle_file(updated_data, PICKLE_PAD_FILE_PATH)
コード例 #3
0
    def on_epoch_end(self, epoch, logs=None):
        # Saving training history
        # Check if directory exists
        directory_path = settings.TRAINED_MODELS_PATH + self.model_name
        if not file_exists(directory_path):
            create_dir(directory_path)

        # Word level history
        if self.word_level:
            hist_path = settings.TRAINED_MODELS_PATH + self.model_name + "/" + self.model_name + "word.pkl"
            average_accuracy = 0
            if file_exists(hist_path):
                acc_loss_history = load_pickle_data(hist_path)
            else:
                acc_loss_history = dict()
                acc_loss_history["accuracy"] = []
                acc_loss_history["loss"] = []

                # Average accuracy
            for i in range(0, 6):
                accuracy = "decoder_dense" + str(i) + "_acc"
                average_accuracy += logs[accuracy]

            average_accuracy = float(average_accuracy) / float(6)

            acc_loss_history["accuracy"].append(average_accuracy)
            acc_loss_history["loss"].append(logs["loss"])

        # Character level history
        else:
            hist_path = settings.TRAINED_MODELS_PATH + self.model_name + "/" + self.model_name + "char.pkl"
            if file_exists(hist_path):
                acc_loss_history = load_pickle_data(hist_path)
            else:
                acc_loss_history = dict()
                acc_loss_history["accuracy"] = []
                acc_loss_history["loss"] = []

            acc_loss_history["accuracy"].append(logs["acc"])
            acc_loss_history["loss"].append(logs["loss"])

        generate_pickle_file(acc_loss_history, hist_path)
        plot_train_loss_acc(hist_path, self.word_level)

        self.model.save(self.model_path)
コード例 #4
0
def generate_pickle_dataset(threshold=10):
    """
    Generates pickle dataset_out of transcription and
    :param threshold:
    :return:
    """

    threshold = threshold * 3600
    mapped_audio = map_audio_transcripts_generic()
    audioInput = []
    timing = 0
    pickle_file_index = 0
    for path, transcription in mapped_audio.items():
        print(path + "====>" + transcription)

        # Calling transcription preprocessing
        special_characters = special_characters_table()
        transcription = transcript_preprocessing(transcription,
                                                 special_characters)

        if transcription is not None:
            # Calculating Total audio length for partitions
            audioInput_instance = AudioInput(path=path,
                                             transcript=transcription)
            audioInput.append(audioInput_instance)

            timing += audioInput_instance.audio_length
            print("Timing is : " + str(timing))

        if timing >= threshold:
            path = settings.PICKLE_PARTITIONS_PATH + "dataset" + str(
                pickle_file_index) + ".pkl"
            generate_pickle_file(audioInput, path)
            pickle_file_index += 1
            timing = 0
            del audioInput
            gc.collect()
            audioInput = []
コード例 #5
0
def normalize_encoder_input(dataset):
    first_interval = int(len(dataset) / 4)
    second_interval = int(len(dataset) / 2)
    third_interval = int(len(dataset) * 3 / 4)

    splitted_dataset = []
    splitted_dataset.append(dataset[0:first_interval])
    splitted_dataset.append(dataset[first_interval:second_interval])
    splitted_dataset.append(dataset[second_interval:third_interval])
    splitted_dataset.append(dataset[third_interval:len(dataset)])

    min_attributes = []
    max_attributes = []
    print("into normalization")
    # Calculating and saving min and max values of dataset
    for attribute_index in range(0, settings.MFCC_FEATURES_LENGTH):
        attribute_values = get_attribute_values(dataset, attribute_index)
        min_attributes.append(np.min(attribute_values))
        max_attributes.append(np.max(attribute_values))

    print(min_attributes)
    print(max_attributes)
    generate_pickle_file(min_attributes,
                         settings.ENCODER_INPUT_MIN_VALUES_PATH)
    generate_pickle_file(max_attributes,
                         settings.ENCODER_INPUT_MAX_VALUES_PATH)

    print("generating new dataset")

    del dataset
    gc.collect()
    dataset_index = 0
    while splitted_dataset:
        for i, encoder_input in enumerate(splitted_dataset[0]):
            normalized_encoder_input = []
            for line in encoder_input:
                normalized_line = []
                for index_column, value in enumerate(line):
                    normalized_line.append(
                        min_max_normalization(value, index_column,
                                              min_attributes, max_attributes))
                normalized_encoder_input.append(normalized_line)
            print("normalized " + str(i))
            splitted_dataset[0][i] = normalized_encoder_input
        generate_pickle_file(
            splitted_dataset[0], settings.NORMALIZED_ENCODER_INPUT_PATHS +
            "dataset" + str(dataset_index) + ".pkl")

        splitted_dataset.pop(0)
        gc.collect()
        dataset_index += 1

    final_dataset = _group_splitted_datasets(splitted_dataset)

    return final_dataset
コード例 #6
0
def get_dataset_information(word_level, train_ratio):
    print("GENERATING DATASET INFORMATION")

    list_datasets = get_files(settings.PICKLE_PARTITIONS_PATH)
    all_transcripts = []
    samples_number = 0
    if word_level:
        for dataset_set, dataset_file in enumerate(list_datasets):
            train_data, test_data = _get_train_test_data_partition(
                dataset_path=dataset_file, train_ratio=train_ratio)
            samples_number += len(train_data)

            train_audio, train_transcripts = _get_audio_transcripts_word_level(
                train_data)
            test_audio, test_transcripts = _get_audio_transcripts_word_level(
                test_data)

            settings.MFCC_FEATURES_LENGTH = train_audio[0].shape[1]

            all_transcripts += train_transcripts
            all_transcripts += test_transcripts

        settings.TOTAL_SAMPLES_NUMBER = samples_number
        settings.WORD_SET = get_distinct_words(all_transcripts)
        settings.LONGEST_WORD_LENGTH = get_longest_word_length(
            settings.WORD_SET)
        settings.CHARACTER_SET = sorted(get_character_set(all_transcripts))
        settings.WORD_TARGET_LENGTH = (len(settings.CHARACTER_SET) +
                                       1) * settings.LONGEST_WORD_LENGTH

        general_info = []

        print("MFCC FEATURES : " + str(settings.MFCC_FEATURES_LENGTH))
        print("TOTAL SAMPLES : " + str(settings.TOTAL_SAMPLES_NUMBER))
        print("WORD SET : " + str(len(settings.WORD_SET)))
        print("LONGEST WORD LENGTH " + str(settings.LONGEST_WORD_LENGTH))
        print("CHARACTER SET : " + str(settings.CHARACTER_SET))
        print("CHARATER SET LENGTH " + str(len(settings.CHARACTER_SET)))

        general_info.append(settings.MFCC_FEATURES_LENGTH)
        general_info.append(settings.TOTAL_SAMPLES_NUMBER)
        general_info.append(settings.WORD_SET)
        general_info.append(settings.LONGEST_WORD_LENGTH)
        general_info.append(settings.CHARACTER_SET)
        general_info.append(settings.WORD_TARGET_LENGTH)

        generate_pickle_file(general_info,
                             settings.DATASET_WORD_INFORMATION_PATH)
        generate_pickle_file(general_info,
                             settings.DATASET_WORD_INFERENCE_INFORMATION_PATH)

    else:
        for dataset_set, dataset_file in enumerate(list_datasets):
            train_data, test_data = _get_train_test_data_partition(
                dataset_path=dataset_file, train_ratio=train_ratio)
            samples_number += len(train_data)

            train_audio, train_transcripts = _get_audio_transcripts_character_level(
                train_data)
            test_audio, test_transcripts = _get_audio_transcripts_character_level(
                test_data)

            settings.MFCC_FEATURES_LENGTH = train_audio[0].shape[1]

            all_transcripts += train_transcripts
            all_transcripts += test_transcripts

        settings.TOTAL_SAMPLES_NUMBER = samples_number
        settings.CHARACTER_SET = get_character_set(all_transcripts)

        general_info = []
        general_info.append(settings.MFCC_FEATURES_LENGTH)
        general_info.append(settings.TOTAL_SAMPLES_NUMBER)
        general_info.append(settings.CHARACTER_SET)

        generate_pickle_file(general_info,
                             settings.DATASET_CHAR_INFORMATION_PATH)
        generate_pickle_file(general_info,
                             settings.DATASET_CHAR_INFERENCE_INFORMATION_PATH)