예제 #1
0
    def create_embeddings(self, X_train, y_train, X_test, y_test):
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')

        x_list, y_list, _ = create_data_lists(short_utterance, X_train, X_test,
                                              y_train, y_test)

        x_cluster_list = []
        y_cluster_list = []
        for x_data, y_data in zip(x_list, y_list):
            x_cluster, y_cluster = self._generate_cluster_data(x_data, y_data)
            x_cluster_list.append(x_cluster)
            y_cluster_list.append(y_cluster)

        # Load the network and add Batchiterator
        net = load(self.net_path)
        net.batch_iterator_test = BatchIterator(batch_size=128)

        # Predict the output
        # predict = prepare_predict(net)
        # output_train = predict(x_train_cluster)
        # output_test = predict(x_test_cluster)
        outputs = [None] * len(x_cluster_list)
        for i, x_cluster in enumerate(x_cluster_list):
            outputs[i] = net.predict_proba(x_cluster)

        embeddings, speakers, number_embeddings =\
            generate_embeddings(outputs, y_cluster_list, outputs[0].shape[1])

        #Calculate the time per utterance
        time = TimeCalculator.calc_time_all_utterances(
            y_cluster_list, self.config.getint('luvo', 'seg_size'))

        return embeddings, speakers, number_embeddings, time
예제 #2
0
    def get_embeddings(self):
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')
        logger = get_logger('kldiv', logging.INFO)
        logger.info('Run pairwise_kldiv')
        checkpoints = self.checkpoints

        X_train, y_train, s_list_train = load_test_data(
            self.get_validation_train_data())
        X_test, y_test, s_list_test = load_test_data(
            self.get_validation_test_data())

        x_list, y_list, s_list = create_data_lists(short_utterance, X_train,
                                                   X_test, y_train, y_test,
                                                   s_list_train, s_list_test)

        # Prepare return value
        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_total_times = []

        for checkpoint in checkpoints:
            logger.info('Run checkpoint: ' + checkpoint)
            network_file = get_experiment_nets(checkpoint)

            x_cluster_list = []
            y_cluster_list = []
            for x, y, s in zip(x_list, y_list, s_list):
                x_cluster, y_cluster = run_analysis_network(
                    network_file, x, y, s)
                x_cluster_list.append(x_cluster)
                y_cluster_list.append(y_cluster)

            embeddings, speakers, num_embeddings =\
                generate_embeddings(x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1])
            # Fill return values
            set_of_embeddings.append(embeddings)
            set_of_speakers.append(speakers)
            set_of_num_embeddings.append(num_embeddings)

            # Calculate the time per utterance
            time = TimeCalculator.calc_time_all_utterances(
                y_cluster_list, config.getint('pairwise_kldiv', 'seg_size'))
            set_of_total_times.append(time)

        return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_total_times
예제 #3
0
def create_embeddings(config, checkpoints, x_list, y_list, out_layer=7, seg_size=100):
    # Prepare return value
    set_of_embeddings = []
    set_of_speakers = []
    set_of_num_embeddings = []
    set_of_total_times = []

    # Values out of the loop
    metrics = ['accuracy']
    loss = get_loss(config)
    custom_objects = get_custom_objects(config)
    optimizer = 'adadelta'

    for checkpoint in checkpoints:
        logger.info('Run checkpoint: ' + checkpoint)
        # Load and compile the trained network
        network_file = get_experiment_nets(checkpoint)
        model_full = load_model(network_file, custom_objects=custom_objects)
        model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics)

        # Get a Model with the embedding layer as output and predict
        model_partial = Model(inputs=model_full.input, outputs=model_full.layers[out_layer].output)

        x_cluster_list = []
        y_cluster_list = []
        for x, y in zip(x_list, y_list):
            x_cluster = np.asarray(model_partial.predict(x))
            x_cluster_list.append(x_cluster)
            y_cluster_list.append(y)

        embeddings, speakers, num_embeddings = \
            generate_embeddings(x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1])

        # Fill return values
        set_of_embeddings.append(embeddings)
        set_of_speakers.append(speakers)
        set_of_num_embeddings.append(num_embeddings)

        # Calculate the time per utterance
        time = TimeCalculator.calc_time_all_utterances(y_cluster_list, seg_size)
        set_of_total_times.append(time)

    return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_total_times
예제 #4
0
    def create_embeddings(self, X_train, y_train, X_test, y_test):
        seg_size = self.config.getint('luvo', 'seg_size')
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')

        x_train, speakers_train = prepare_data(X_train, y_train, seg_size)
        x_test, speakers_test = prepare_data(X_test, y_test, seg_size)

        x_list, y_list, _ = create_data_lists(short_utterance, x_train, x_test,
                                              speakers_train, speakers_test)

        # Load the network and add Batchiterator
        network_file = get_experiment_nets(self.network_name + ".h5")
        model_full = load_model(network_file)
        model_full.compile(loss='categorical_crossentropy',
                           optimizer='sgd',
                           metrics=['accuracy'])

        # Get a Model with the embedding layer as output and predict
        model_partial = Model(inputs=model_full.input,
                              outputs=model_full.layers[self.config.getint(
                                  'luvo', 'out_layer')].output)

        x_cluster_list = []
        y_cluster_list = []
        for x_data, y_data in zip(x_list, y_list):
            print(x_data.shape)
            x_cluster = np.asarray(model_partial.predict(x_data))
            x_cluster_list.append(x_cluster)
            y_cluster_list.append(y_data)

        embeddings, speakers, num_embeddings = generate_embeddings(
            x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1])

        # Calculate the time per utterance
        time = TimeCalculator.calc_time_all_utterances(
            y_cluster_list, self.config.getint('luvo', 'seg_size'))

        return embeddings, speakers, num_embeddings, time
예제 #5
0
    def get_embeddings(self):
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')
        out_layer = self.config.getint('pairwise_lstm', 'out_layer')
        seg_size = self.config.getint('pairwise_lstm', 'seg_size')
        vec_size = self.config.getint('pairwise_lstm', 'vec_size')

        logger = get_logger('lstm', logging.INFO)
        logger.info('Run pairwise_lstm test')
        logger.info('out_layer -> ' + str(out_layer))
        logger.info('seg_size -> ' + str(seg_size))
        logger.info('vec_size -> ' + str(vec_size))

        # Load and prepare train/test data
        x_train, speakers_train, s_list_train = load_test_data(
            self.get_validation_train_data())
        x_test, speakers_test, s_list_test = load_test_data(
            self.get_validation_test_data())
        x_train, speakers_train, = prepare_data(x_train, speakers_train,
                                                seg_size)
        x_test, speakers_test = prepare_data(x_test, speakers_test, seg_size)

        x_list, y_list, s_list = create_data_lists(short_utterance, x_train,
                                                   x_test, speakers_train,
                                                   speakers_test, s_list_train,
                                                   s_list_test)

        # Prepare return values
        set_of_embeddings = []
        set_of_speakers = []
        speaker_numbers = []
        set_of_total_times = []
        checkpoints = list_all_files(get_experiment_nets(),
                                     "^pairwise_lstm.*\.h5")

        # Values out of the loop
        metrics = [
            'accuracy',
            'categorical_accuracy',
        ]
        loss = pairwise_kl_divergence
        custom_objects = {'pairwise_kl_divergence': pairwise_kl_divergence}
        optimizer = 'rmsprop'
        vector_size = vec_size  #256 * 2

        # Fill return values
        for checkpoint in checkpoints:
            logger.info('Running checkpoint: ' + checkpoint)
            # Load and compile the trained network
            network_file = get_experiment_nets(checkpoint)
            model_full = load_model(network_file,
                                    custom_objects=custom_objects)
            model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics)

            # Get a Model with the embedding layer as output and predict
            model_partial = Model(inputs=model_full.input,
                                  outputs=model_full.layers[out_layer].output)

            x_cluster_list = []
            y_cluster_list = []
            for x, y, s in zip(x_list, y_list, s_list):
                x_cluster = np.asarray(model_partial.predict(x))
                x_cluster_list.append(x_cluster)
                y_cluster_list.append(y)

            embeddings, speakers, num_embeddings = generate_embeddings(
                x_cluster_list, y_cluster_list, vector_size)

            # Fill the embeddings and speakers into the arrays
            set_of_embeddings.append(embeddings)
            set_of_speakers.append(speakers)
            speaker_numbers.append(num_embeddings)

            # Calculate the time per utterance
            time = TimeCalculator.calc_time_all_utterances(
                y_cluster_list, seg_size)
            set_of_total_times.append(time)

        logger.info('Pairwise_lstm test done.')
        return checkpoints, set_of_embeddings, set_of_speakers, speaker_numbers, set_of_total_times