コード例 #1
0
    def get_embeddings(self):
        X_train, y_train, speaker_train_names = load(
            get_speaker_pickle(self.get_validation_data_name() +
                               '_train_mfcc'))
        X_test, y_test, speaker_test_names = load(
            get_speaker_pickle(self.get_validation_data_name() + '_test_mfcc'))

        model = load(get_experiment_nets(self.name))

        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []

        train_outputs = self.generate_outputs(X_train, model)
        test_outputs = self.generate_outputs(X_test, model)

        set_of_times = [np.zeros((len(y_test) + len(y_train)), dtype=int)]

        outputs, y_list, s_list = create_data_lists(False, train_outputs,
                                                    test_outputs, y_train,
                                                    y_test)

        embeddings, speakers, number_embeddings = generate_embeddings(
            outputs, y_list, len(model))

        set_of_embeddings.append(embeddings)
        set_of_speakers.append(speakers)
        set_of_num_embeddings.append(number_embeddings)
        checkpoints = [self.network_file]

        return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
コード例 #2
0
    def create_embeddings(self, X_train, y_train, X_test, y_test):
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')

        x_list, y_list, _ = create_data_lists(short_utterance, X_train, X_test,
                                              y_train, y_test)

        x_cluster_list = []
        y_cluster_list = []
        for x_data, y_data in zip(x_list, y_list):
            x_cluster, y_cluster = self._generate_cluster_data(x_data, y_data)
            x_cluster_list.append(x_cluster)
            y_cluster_list.append(y_cluster)

        # Load the network and add Batchiterator
        net = load(self.net_path)
        net.batch_iterator_test = BatchIterator(batch_size=128)

        # Predict the output
        # predict = prepare_predict(net)
        # output_train = predict(x_train_cluster)
        # output_test = predict(x_test_cluster)
        outputs = [None] * len(x_cluster_list)
        for i, x_cluster in enumerate(x_cluster_list):
            outputs[i] = net.predict_proba(x_cluster)

        embeddings, speakers, number_embeddings =\
            generate_embeddings(outputs, y_cluster_list, outputs[0].shape[1])

        #Calculate the time per utterance
        time = TimeCalculator.calc_time_all_utterances(
            y_cluster_list, self.config.getint('luvo', 'seg_size'))

        return embeddings, speakers, number_embeddings, time
コード例 #3
0
    def get_embeddings(self):
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')
        logger = get_logger('kldiv', logging.INFO)
        logger.info('Run pairwise_kldiv')
        checkpoints = self.checkpoints

        X_train, y_train, s_list_train = load_test_data(
            self.get_validation_train_data())
        X_test, y_test, s_list_test = load_test_data(
            self.get_validation_test_data())

        x_list, y_list, s_list = create_data_lists(short_utterance, X_train,
                                                   X_test, y_train, y_test,
                                                   s_list_train, s_list_test)

        # Prepare return value
        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_total_times = []

        for checkpoint in checkpoints:
            logger.info('Run checkpoint: ' + checkpoint)
            network_file = get_experiment_nets(checkpoint)

            x_cluster_list = []
            y_cluster_list = []
            for x, y, s in zip(x_list, y_list, s_list):
                x_cluster, y_cluster = run_analysis_network(
                    network_file, x, y, s)
                x_cluster_list.append(x_cluster)
                y_cluster_list.append(y_cluster)

            embeddings, speakers, num_embeddings =\
                generate_embeddings(x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1])
            # Fill return values
            set_of_embeddings.append(embeddings)
            set_of_speakers.append(speakers)
            set_of_num_embeddings.append(num_embeddings)

            # Calculate the time per utterance
            time = TimeCalculator.calc_time_all_utterances(
                y_cluster_list, config.getint('pairwise_kldiv', 'seg_size'))
            set_of_total_times.append(time)

        return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_total_times
コード例 #4
0
    def get_embeddings(self):
        # Get settings
        short_utterance = self.config.getboolean('test', 'short_utterances')
        out_layer = self.config.getint('pairwise_kldiv', 'out_layer')
        seg_size = self.config.getint('pairwise_kldiv', 'seg_size')
        self.logger.info('Run pairwise_kldiv')

        # Load and prepare test data
        X_long, y_long, s_list_long = load_test_data(self.get_validation_train_data())
        X_short, y_short, s_list_short = load_test_data(self.get_validation_test_data())
        X_long, y_long = self._prepare_data(X_long, y_long)
        X_short, y_short = self._prepare_data(X_short, y_short)

        x_list, y_list, _ = create_data_lists(short_utterance, X_long, X_short,
                                              y_long, y_short, s_list_long, s_list_short)

        embeddings_data =  create_embeddings(self.config, self.checkpoints, x_list, y_list, out_layer, seg_size)
        return embeddings_data
コード例 #5
0
    def create_embeddings(self, X_train, y_train, X_test, y_test):
        seg_size = self.config.getint('luvo', 'seg_size')
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')

        x_train, speakers_train = prepare_data(X_train, y_train, seg_size)
        x_test, speakers_test = prepare_data(X_test, y_test, seg_size)

        x_list, y_list, _ = create_data_lists(short_utterance, x_train, x_test,
                                              speakers_train, speakers_test)

        # Load the network and add Batchiterator
        network_file = get_experiment_nets(self.network_name + ".h5")
        model_full = load_model(network_file)
        model_full.compile(loss='categorical_crossentropy',
                           optimizer='sgd',
                           metrics=['accuracy'])

        # Get a Model with the embedding layer as output and predict
        model_partial = Model(inputs=model_full.input,
                              outputs=model_full.layers[self.config.getint(
                                  'luvo', 'out_layer')].output)

        x_cluster_list = []
        y_cluster_list = []
        for x_data, y_data in zip(x_list, y_list):
            print(x_data.shape)
            x_cluster = np.asarray(model_partial.predict(x_data))
            x_cluster_list.append(x_cluster)
            y_cluster_list.append(y_data)

        embeddings, speakers, num_embeddings = generate_embeddings(
            x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1])

        # Calculate the time per utterance
        time = TimeCalculator.calc_time_all_utterances(
            y_cluster_list, self.config.getint('luvo', 'seg_size'))

        return embeddings, speakers, num_embeddings, time
コード例 #6
0
    def get_embeddings(self):
        '''
        finally, testing:
        '''
        speaker_list = self.get_validation_data_name()
        distrib_nb = self.config.getint('i_vector', 'distrib_nb')
        nbThread = self.config.getint('i_vector', 'nbThread')
        vector_size = self.config.getint('i_vector', 'vector_size')
        feature_extension = 'h5'

        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_times = []
        checkpoints = ["/TV_{}".format(self.network_file)]

        #load data:
        ubm = sidekit.Mixture()
        ubm.read(get_experiment_nets() +
                 '/ubm_{}.h5'.format(self.network_file))
        ubm_list, test_list_long = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_train_data())[1])[0])
        ubm_list, test_list_short = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_test_data())[1])[0])
        tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5(
            get_experiment_nets() + "/TV_{}".format(self.network_file))

        fs = sidekit.FeaturesServer(feature_filename_structure=(
            "{dir}/{speaker_list}/feat/{{}}.{ext}".format(
                dir=get_training('i_vector'),
                speaker_list=speaker_list,
                ext=feature_extension)),
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    keep_all_features=True,
                                    delta=True,
                                    double_delta=True,
                                    rasta=True,
                                    context=None)

        #exract ivectors
        test_stat_long = sidekit.StatServer(test_list_long,
                                            ubm=ubm,
                                            distrib_nb=distrib_nb,
                                            feature_size=0,
                                            index=None)
        test_stat_long.accumulate_stat(ubm=ubm,
                                       feature_server=fs,
                                       seg_indices=range(
                                           test_stat_long.segset.shape[0]),
                                       num_thread=nbThread)

        test_stat_short = sidekit.StatServer(test_list_short,
                                             ubm=ubm,
                                             distrib_nb=distrib_nb,
                                             feature_size=0,
                                             index=None)
        test_stat_short.accumulate_stat(ubm=ubm,
                                        feature_server=fs,
                                        seg_indices=range(
                                            test_stat_short.segset.shape[0]),
                                        num_thread=nbThread)

        test_iv_long = test_stat_long.estimate_hidden(tv_mean,
                                                      tv_sigma,
                                                      V=tv,
                                                      batch_size=100,
                                                      num_thread=nbThread)[0]
        test_iv_short = test_stat_short.estimate_hidden(tv_mean,
                                                        tv_sigma,
                                                        V=tv,
                                                        batch_size=100,
                                                        num_thread=nbThread)[0]

        iv_lis, y_list, s_list = create_data_lists(
            False, test_iv_long.stat1, test_iv_short.stat1,
            test_list_long.leftids.astype(int),
            test_list_short.leftids.astype(int))

        #generate embeddings
        embeddings, speakers, num_embeddings = generate_embeddings(
            iv_lis, y_list, vector_size)

        set_of_embeddings.append(embeddings)
        set_of_speakers.append(speakers)
        set_of_num_embeddings.append(num_embeddings)
        set_of_times = [
            np.zeros(
                (len(test_list_long.leftids) + len(test_list_short.leftids), ),
                dtype=int)
        ]

        return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
コード例 #7
0
    def get_embeddings(self):
        short_utterance = self.config.getboolean('validation',
                                                 'short_utterances')
        out_layer = self.config.getint('pairwise_lstm', 'out_layer')
        seg_size = self.config.getint('pairwise_lstm', 'seg_size')
        vec_size = self.config.getint('pairwise_lstm', 'vec_size')

        logger = get_logger('lstm', logging.INFO)
        logger.info('Run pairwise_lstm test')
        logger.info('out_layer -> ' + str(out_layer))
        logger.info('seg_size -> ' + str(seg_size))
        logger.info('vec_size -> ' + str(vec_size))

        # Load and prepare train/test data
        x_train, speakers_train, s_list_train = load_test_data(
            self.get_validation_train_data())
        x_test, speakers_test, s_list_test = load_test_data(
            self.get_validation_test_data())
        x_train, speakers_train, = prepare_data(x_train, speakers_train,
                                                seg_size)
        x_test, speakers_test = prepare_data(x_test, speakers_test, seg_size)

        x_list, y_list, s_list = create_data_lists(short_utterance, x_train,
                                                   x_test, speakers_train,
                                                   speakers_test, s_list_train,
                                                   s_list_test)

        # Prepare return values
        set_of_embeddings = []
        set_of_speakers = []
        speaker_numbers = []
        set_of_total_times = []
        checkpoints = list_all_files(get_experiment_nets(),
                                     "^pairwise_lstm.*\.h5")

        # Values out of the loop
        metrics = [
            'accuracy',
            'categorical_accuracy',
        ]
        loss = pairwise_kl_divergence
        custom_objects = {'pairwise_kl_divergence': pairwise_kl_divergence}
        optimizer = 'rmsprop'
        vector_size = vec_size  #256 * 2

        # Fill return values
        for checkpoint in checkpoints:
            logger.info('Running checkpoint: ' + checkpoint)
            # Load and compile the trained network
            network_file = get_experiment_nets(checkpoint)
            model_full = load_model(network_file,
                                    custom_objects=custom_objects)
            model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics)

            # Get a Model with the embedding layer as output and predict
            model_partial = Model(inputs=model_full.input,
                                  outputs=model_full.layers[out_layer].output)

            x_cluster_list = []
            y_cluster_list = []
            for x, y, s in zip(x_list, y_list, s_list):
                x_cluster = np.asarray(model_partial.predict(x))
                x_cluster_list.append(x_cluster)
                y_cluster_list.append(y)

            embeddings, speakers, num_embeddings = generate_embeddings(
                x_cluster_list, y_cluster_list, vector_size)

            # Fill the embeddings and speakers into the arrays
            set_of_embeddings.append(embeddings)
            set_of_speakers.append(speakers)
            speaker_numbers.append(num_embeddings)

            # Calculate the time per utterance
            time = TimeCalculator.calc_time_all_utterances(
                y_cluster_list, seg_size)
            set_of_total_times.append(time)

        logger.info('Pairwise_lstm test done.')
        return checkpoints, set_of_embeddings, set_of_speakers, speaker_numbers, set_of_total_times