Ejemplo n.º 1
0
    def train_tv(self):
        # Create status servers
        self.__create_stats()

        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "stat",
                                                 tv_filename),
                                    ubm,
                                    tv_rank=self.TV_RANK,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath)

        filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS)
        lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex))
        for f in lst:
            os.remove(f)
Ejemplo n.º 2
0
    def evaluate(self, explain=True):
        """
        This method is used to score our trained model. 
        """
        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Load TV matrix
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser(outputPath + ".h5")

        # Extract i-vectors from enrollment data
        logging.info("Extracting i-vectors from enrollment data")
        filename = 'enroll_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        enroll_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', filename))
        enroll_iv = fa.extract_ivectors_single(ubm=ubm,
                                               stat_server=enroll_stat,
                                               uncertainty=False)

        # Extract i-vectors from test data
        logging.info("Extracting i-vectors from test data")
        filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        test_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', filename))
        test_iv = fa.extract_ivectors_single(ubm=ubm,
                                             stat_server=test_stat,
                                             uncertainty=False)

        # Do cosine distance scoring and write results
        logging.info("Calculating cosine score")
        test_ndx = sidekit.Ndx.read(
            os.path.join(self.BASE_DIR, "task", "test_ndx.h5"))
        scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv,
                                                       test_iv,
                                                       test_ndx,
                                                       wccn=None)
        # Write scores
        filename = "ivector_scores_cos_{}.h5".format(self.NUM_GAUSSIANS)
        scores_cos.write(os.path.join(self.BASE_DIR, "result", filename))

        # Explain the Analysis by writing more readible text file
        if explain:
            modelset = list(scores_cos.modelset)
            segset = list(scores_cos.segset)
            scores = np.array(scores_cos.scoremat)
            filename = "ivector_scores_explained_{}.txt".format(
                iv.NUM_GAUSSIANS)
            fout = open(os.path.join(iv.BASE_DIR, "result", filename), "a")
            fout.truncate(0)  #clear content
            for seg_idx, seg in enumerate(segset):
                fout.write("Wav: {}\n".format(seg))
                for speaker_idx, speaker in enumerate(modelset):
                    fout.write("\tSpeaker {}:\t{}\n".format(
                        speaker, scores[speaker_idx, seg_idx]))
                fout.write("\n")
            fout.close()
Ejemplo n.º 3
0
    def train_ubm(self,
                  feature_dir,
                  speaker_list,
                  ubm_list,
                  distrib_nb,
                  feature_extension='h5',
                  num_threads=10):
        '''
        training the GMM with EM-Algorithm
        '''

        self.logger.info('training UBM')

        fs = sidekit.FeaturesServer(feature_filename_structure=(
            "{dir}/{speaker_list}/feat/{{}}.{ext}".format(
                dir=feature_dir,
                speaker_list=speaker_list,
                ext=feature_extension)),
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    keep_all_features=True,
                                    delta=True,
                                    double_delta=True,
                                    rasta=True,
                                    context=None)

        ubm = sidekit.Mixture()
        llk = ubm.EM_split(fs, ubm_list, distrib_nb, num_thread=num_threads)
        ubm.write(get_experiment_nets() +
                  '/ubm_{}.h5'.format(self.network_file))

        return ubm, fs
Ejemplo n.º 4
0
def adaptation(args):
    if args.feat_type == 'mfcc':
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-12]"
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'
    if args.feat_type == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'

    # create feature server for loading feature from disk
    feature_server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=True if args.delta else False,
        double_delta=True if args.delta else False,
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)

    enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5')
    ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5')

    ubm = sidekit.Mixture()
    ubm.read(os.getcwd() + '/model/ubm.h5')
    enroll_stat = sidekit.StatServer(enroll_idmap,
                                     distrib_nb=ubm.distrib_nb(),
                                     feature_size=ubm.dim())
    enroll_stat.accumulate_stat(ubm=ubm,
                                feature_server=feature_server,
                                seg_indices=range(enroll_stat.segset.shape[0]),
                                num_thread=args.num_thread)
    enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5')

    print('MAP adaptation', end='')
    regulation_factor = 16
    enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor)
    enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5')
    print('\rMAP adaptation done')

    print('Compute scores', end='')
    score = sidekit.gmm_scoring(ubm,
                                enroll_sv,
                                ndx,
                                feature_server,
                                num_thread=args.num_thread)
    score.write(os.getcwd() + '/task/dev_score.h5')
    print('\rCompute scores done')
Ejemplo n.º 5
0
    def evaluate(self, explain=True):
        """
        This method is used to evaluate the test set. It does so by"
        - read the test_ndx file that contains the test set
        - read the trained UBM model, and trained parameters (enroll_stat file)
        - evaluate the test set using gmm_scoring and write the scores
        - if explain=True, write the scores in a more readible way
        Args:
            explain (boolean): If True, write another text file that contain
            the same information as the one within ubm_scores file but in a 
            readible way.
        """
        ############################# READING ############################
        # Create Feature server
        server = self.createFeatureServer()
        # Read the index for the test datas
        test_ndx = sidekit.Ndx.read(
            os.path.join(self.BASE_DIR, "task", "test_ndx.h5"))
        # Read the UBM model
        ubm = sidekit.Mixture()
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))
        filename = "enroll_stat_{}.h5".format(self.NUM_GAUSSIANS)
        enroll_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, "stat", filename))
        # MAP adaptation of enrollment speaker models
        enroll_sv = enroll_stat.adapt_mean_map_multisession(
            ubm=ubm,
            r=3  # MAP regulation factor
        )

        ############################ Evaluating ###########################
        # Compute scores
        scores_gmm_ubm = sidekit.gmm_scoring(ubm=ubm,
                                             enroll=enroll_sv,
                                             ndx=test_ndx,
                                             feature_server=server,
                                             num_thread=self.NUM_THREADS)
        # Save the model's Score object
        filename = "ubm_scores_{}.h5".format(self.NUM_GAUSSIANS)
        scores_gmm_ubm.write(os.path.join(self.BASE_DIR, "result", filename))

        # Explain the Analysis by writing more readible text file
        if explain:
            filename = "ubm_scores_{}_explained.txt".format(self.NUM_GAUSSIANS)
            fout = open(os.path.join(self.BASE_DIR, "result", filename), "a")
            fout.truncate(0)  #clear content
            modelset = list(scores_gmm_ubm.modelset)
            segset = list(scores_gmm_ubm.segset)
            scores = np.array(scores_gmm_ubm.scoremat)
            for seg_idx, seg in enumerate(segset):
                fout.write("Wav: {}\n".format(seg))
                for speaker_idx, speaker in enumerate(modelset):
                    fout.write("\tSpeaker {}:\t{}\n"\
                        .format(speaker, scores[speaker_idx, seg_idx]))
                fout.write("\n")
            fout.close()
Ejemplo n.º 6
0
    def train(self, SAVE_FLAG=True):
        #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html
        train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll"))
        for i in range(len(train_list)):
            train_list[i] = train_list[i].split(".h5")[0]
        server = self.createFeatureServer("enroll")
        logging.info("Training...")
        ubm = sidekit.Mixture()
        # Expectation-Maximization estimation of the Mixture parameters.
        ubm.EM_split(
            features_server=server,  #sidekit.FeaturesServer used to load data
            feature_list=train_list,  # list of feature files to train the model
            distrib_nb=self.
            NUM_GUASSIANS,  # final number of Gaussian distributions
            iterations=(
                1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8
            ),  # list of iteration number for each step of the learning process
            num_thread=self.
            NUM_THREADS,  # number of thread to launch for parallel computing
            save_partial=False  # if False, it only saves the last model
        )
        # -> 2 iterations of EM with 2   distributions
        # -> 2 iterations of EM with 4   distributions
        # -> 4 iterations of EM with 8   distributions
        # -> 4 iterations of EM with 16  distributions
        # -> 4 iterations of EM with 32  distributions
        # -> 4 iterations of EM with 64  distributions
        # -> 8 iterations of EM with 128 distributions
        # -> 8 iterations of EM with 256 distributions
        # -> 8 iterations of EM with 512 distributions
        # -> 8 iterations of EM with 1024 distributions
        model_dir = os.path.join(self.BASE_DIR, "ubm")
        modelname = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        logging.info("Saving the model {} at {}".format(modelname, model_dir))
        ubm.write(os.path.join(model_dir, modelname))

        # Read idmap for the enrolling data
        enroll_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5"))
        # Create Statistic Server to store/process the enrollment data
        enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap,
                                         ubm=ubm)
        logging.debug(enroll_stat)

        # Compute the sufficient statistics for a list of sessions whose indices are segIndices.
        server.feature_filename_structure = os.path.join(
            self.BASE_DIR, "feat", "{}.h5")
        #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
        enroll_stat.accumulate_stat(ubm=ubm,
                                    feature_server=server,
                                    seg_indices=range(
                                        enroll_stat.segset.shape[0]))
        if SAVE_FLAG:
            # Save the status of the enroll data
            filename = "enroll_stat_{}.h5".format(self.NUM_GUASSIANS)
            enroll_stat.write(os.path.join(self.BASE_DIR, "ubm", filename))
Ejemplo n.º 7
0
def train_ubm(**args):
    if args['feat_type'] == 'mfcc':
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-12]"
        features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature'
    if args['feat_type'] == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
        features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature'

    utils.remove(features_folder)

    ubmlist = []
    if os.path.exists(os.getcwd() + '/log/aishell2_wavlist.log'):
        with open(os.getcwd() + '/log/aishell2_wavlist.log', 'r') as fobj:
            for i in fobj:
                ubmlist.append(i[0:-1])
    else:
        ubmlist = preprocess()

    # create feature server for loading feature from disk
    server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=args['delta'],
        double_delta=args['delta'],
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)
    # create Mixture object for training
    ubm = sidekit.Mixture()
    ubm.EM_split(server,
                 ubmlist,
                 args['distribNum'],
                 iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8),
                 num_thread=args['num_thread'],
                 save_partial=True)
    # write trained ubm to disk
    ubm.write(os.getcwd() + '/model/ubm_512.h5')
Ejemplo n.º 8
0
def train_ubm(**args):
    if (args['feat_type'] == 'mfcc') or (args['feat_type'] == 'plp'):
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-19]"
    if args['feat_type'] == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
    features_folder = os.getcwd() + '/{}_train_feature'.format(
        args['feat_type'])

    ubmlist = []
    try:
        with open(os.getcwd() + '/log/aishell2.log', 'r') as fobj:
            for i in fobj:
                ubmlist.append(i[0:-1])
    except FileNotFoundError:
        print('please generate ubm wav list as first')

    # create feature server for loading feature from disk
    server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=args['delta'],
        double_delta=args['delta'],
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)
    # create Mixture object for training
    ubm = sidekit.Mixture()
    ubm.EM_split(server,
                 ubmlist,
                 args['distribNum'],
                 iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8),
                 num_thread=args['num_thread'],
                 save_partial=True)
    # write trained ubm to disk
    ubm.write(os.getcwd() + '/model/ubm_512.h5')
Ejemplo n.º 9
0
    def evaluate(self, explain=True):
        ############################# READING ############################
        # Create Feature server
        server = self.createFeatureServer()
        # Read the index for the test datas
        test_ndx = sidekit.Ndx.read(
            os.path.join(self.BASE_DIR, "task", "test_ndx.h5"))
        # Read the UBM model
        ubm = sidekit.Mixture()
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        ############################ Evaluating ###########################
        filename = "enroll_stat_{}.h5".format(self.NUM_GUASSIANS)
        enroll_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, "ubm", filename))
        # MAP adaptation of enrollment speaker models
        enroll_sv = enroll_stat.adapt_mean_map_multisession(
            ubm=ubm,
            r=3  # MAP regulation factor
        )
        # Compute scores
        scores_gmm_ubm = sidekit.gmm_scoring(ubm=ubm,
                                             enroll=enroll_sv,
                                             ndx=test_ndx,
                                             feature_server=server,
                                             num_thread=self.NUM_THREADS)
        # Save the model's Score object
        filename = "ubm_scores_{}.h5".format(self.NUM_GUASSIANS)
        scores_gmm_ubm.write(os.path.join(self.BASE_DIR, "result", filename))

        #write Analysis
        if explain:
            filename = "ubm_scores_explained_{}.txt".format(self.NUM_GUASSIANS)
            fout = open(os.path.join(self.BASE_DIR, "result", filename), "a")
            fout.truncate(0)  #clear content
            modelset = list(scores_gmm_ubm.modelset)
            segset = list(scores_gmm_ubm.segset)
            scores = np.array(scores_gmm_ubm.scoremat)
            for seg_idx, seg in enumerate(segset):
                fout.write("Wav: {}\n".format(seg))
                for speaker_idx, speaker in enumerate(modelset):
                    fout.write("\tSpeaker {}:\t{}\n".format(
                        speaker, scores[speaker_idx, seg_idx]))
                fout.write("\n")
            fout.close()
Ejemplo n.º 10
0
    def data_init(self):
        # Read tv_idmap, and plda_idmap
        tv_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "idmap_tv.h5"))
        plda_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "idmap_plda.h5"))
        # Load UBM
        ubm = sidekit.Mixture()
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))
        # Create Feature Server
        fs = self.__createFeatureServer()

        # Create a joint StatServer for TV and PLDA training data
        back_idmap = plda_idmap.merge(tv_idmap)
        if not back_idmap.validate():
            logging.warning("Error merging tv_idmap & plda_idmap")
            return
        back_stat = sidekit.StatServer(statserver_file_name=back_idmap,
                                       ubm=ubm)
        # Jointly compute the sufficient statistics of TV and PLDA data
        #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
        back_stat.accumulate_stat(ubm=ubm,
                                  feature_server=fs,
                                  seg_indices=range(back_stat.segset.shape[0]))
        back_stat.write(os.path.join(self.BASE_DIR, "task", 'stat_back.h5'))
        # Load the sufficient statistics from TV training data
        tv_stat = sidekit.StatServer.read_subset(
            os.path.join(self.BASE_DIR, "task", 'stat_back.h5'), tv_idmap)
        tv_stat.write(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5'))
        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GUASSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "task",
                                                 'tv_stat.h5'),
                                    ubm,
                                    tv_rank=self.RANK_TV,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath)
Ejemplo n.º 11
0
    def evaluate(self):
        """
        This method is used to score our trained model. 
        """
        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Load TV matrix
        filename = "tv_matrix_{}".format(self.NUM_GUASSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser(outputPath + ".h5")

        # Extract i-vectors from enrollment data
        logging.info("Extracting i-vectors from enrollment data")
        enroll_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', 'enroll_stat_32.h5'))
        enroll_iv = fa.extract_ivectors_single(ubm=ubm,
                                               stat_server=enroll_stat,
                                               uncertainty=False)

        # Extract i-vectors from test data
        logging.info("Extracting i-vectors from test data")
        test_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', 'test_stat.h5'))
        test_iv = fa.extract_ivectors_single(ubm=ubm,
                                             stat_server=test_stat,
                                             uncertainty=False)

        # Do cosine distance scoring and write results
        logging.info("Calculating cosine score")
        test_ndx = sidekit.Ndx.read(
            os.path.join(self.BASE_DIR, "task", "test_ndx.h5"))
        scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv,
                                                       test_iv,
                                                       test_ndx,
                                                       wccn=None)
        # Write scores
        filename = "ivector_scores_cos_{}.h5".format(self.NUM_GUASSIANS)
        scores_cos.write(os.path.join(self.BASE_DIR, "result", filename))
Ejemplo n.º 12
0
    def train_tv(self):
        """
        This method is used to train the Total Variability (TV) matrix
        and save it into 'ivector' directory !! 
        """
        # Create status servers
        self.__create_stats()

        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "stat",
                                                 tv_filename),
                                    ubm,
                                    tv_rank=self.TV_RANK,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath)
        # tv = fa.F # TV matrix
        # tv_mean = fa.mean # Mean vector
        # tv_sigma = fa.Sigma # Residual covariance matrix

        # Clear files produced at each iteration
        filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS)
        lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex))
        for f in lst:
            os.remove(f)
Ejemplo n.º 13
0
train_feature_filename_structure = "./mfcc/train/{}.h5"
enroll_feature_filename_structure = "./mfcc/enroll/{}.h5"
test_feature_filename_structure = "./mfcc/test/{}.h5"

train_wavscp_path = os.path.join(project_dir, 'data/train/wav.scp')
enroll_wavscp_path = os.path.join(project_dir, 'data/enroll/wav.scp')
test_wavscp_path = os.path.join(project_dir, 'data/test/wav.scp')

print('get train feats')
#uttId,filepath_list,feat_list
ubmList, train_input_file_list, train_output_feats_list = basic_ops.get_info4mfcc(train_wavscp_path, project_dir, 'train')
basic_ops.make_mfcc_feats(ubmList, train_input_file_list, train_output_feats_list, nj)
server_train = basic_ops.get_feature_server(train_feature_filename_structure)

print('Train the UBM by EM')
ubm = sidekit.Mixture()
llk = ubm.EM_split(server_train, ubmList, components_num, num_thread=nj, save_partial=True)
ubm.write("/home/wcq/bird/task/ubm512.h5")


ubm = sidekit.Mixture('/home/wcq/bird/task/ubm512.h5')

print('get enroll feats')
enrollList, enroll_input_file_list, enroll_output_feats_list = basic_ops.get_info4mfcc(enroll_wavscp_path, project_dir, 'enroll')
basic_ops.make_mfcc_feats(enrollList, enroll_input_file_list, enroll_output_feats_list, nj)
server_enroll = basic_ops.get_feature_server(enroll_feature_filename_structure)

#prepare the idmap for
models = []
segments = []
enroll_idmap = sidekit.IdMap()
Ejemplo n.º 14
0
    def __create_stats(self):
        """
        This private method is used to create Statistic Servers.
        TODO: post some more info
        """
        # Read tv_idmap
        tv_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "tv_idmap.h5"))
        back_idmap = tv_idmap
        # If PLDA is enabled
        if self.ENABLE_PLDA:
            # Read plda_idmap
            plda_idmap = sidekit.IdMap.read(
                os.path.join(self.BASE_DIR, "task", "plda_idmap.h5"))
            # Create a joint StatServer for TV and PLDA training data
            back_idmap = plda_idmap.merge(tv_idmap)
            if not back_idmap.validate():
                raise RuntimeError("Error merging tv_idmap & plda_idmap")

        # Check UBM model
        ubm_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm_path = os.path.join(self.BASE_DIR, "ubm", ubm_name)
        if not os.path.exists(ubm_path):
            #if UBM model does not exist, train one
            logging.info("Training UBM-{} model".format(self.NUM_GAUSSIANS))
            ubm = UBM(self.conf_path)
            ubm.train()
        #load trained UBM model
        logging.info("Loading trained UBM-{} model".format(self.NUM_GAUSSIANS))
        ubm = sidekit.Mixture()
        ubm.read(ubm_path)
        back_stat = sidekit.StatServer(statserver_file_name=back_idmap,
                                       ubm=ubm)
        # Create Feature Server
        fs = self.createFeatureServer()

        # Jointly compute the sufficient statistics of TV and (if enabled) PLDA data
        back_filename = 'back_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(
                os.path.join(self.BASE_DIR, "stat", back_filename)):
            #BUG: don't use self.NUM_THREADS when assgining num_thread
            # as it's prune to race-conditioning
            back_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          back_stat.segset.shape[0]))
            back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename))

        # Load the sufficient statistics from TV training data
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat",
                                           tv_filename)):
            tv_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap)
            tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename))

        # Load sufficient statistics and extract i-vectors from PLDA training data
        if self.ENABLE_PLDA:
            plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GAUSSIANS)
            if not os.path.isfile(
                    os.path.join(self.BASE_DIR, "stat", plda_filename)):
                plda_stat = sidekit.StatServer.read_subset(
                    os.path.join(self.BASE_DIR, "stat", back_filename),
                    plda_idmap)
                plda_stat.write(
                    os.path.join(self.BASE_DIR, "stat", plda_filename))

        # Load sufficient statistics from test data
        filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)):
            test_idmap = sidekit.IdMap.read(
                os.path.join(self.BASE_DIR, "task", "test_idmap.h5"))
            test_stat = sidekit.StatServer(statserver_file_name=test_idmap,
                                           ubm=ubm)
            # Create Feature Server
            fs = self.createFeatureServer()
            # Jointly compute the sufficient statistics of TV and PLDA data
            #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
            test_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          test_stat.segset.shape[0]))
            test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
Ejemplo n.º 15
0
    def train(self, SAVE=True):
        """
        This method is used to train our UBM model by doing the following:
        - Create FeatureServe for the enroll features
        - create use EM algorithm to train our UBM over the enroll features
        - create StatServer to save trained parameters
        - if Save arugment is True (which is by default), then it saves that
          StatServer.
        Args:
            SAVE (boolean): if True, then it will save the StatServer. If False,
               then the StatServer will be discarded.
        """
        #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html
        train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll"))
        for i in range(len(train_list)):
            train_list[i] = train_list[i].split(".h5")[0]
        server = self.createFeatureServer("enroll")
        logging.info("Training...")
        ubm = sidekit.Mixture()
        # Set the model name
        ubm.name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) 
        # Expectation-Maximization estimation of the Mixture parameters.
        ubm.EM_split(
            features_server=server, #sidekit.FeaturesServer used to load data
            feature_list=train_list, #list of feature files to train the model
            distrib_nb=self.NUM_GAUSSIANS, #number of Gaussian distributions
            num_thread=self.NUM_THREADS, # number of parallel processes
            save_partial=False, # if False, it only saves the last model
            iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8)
            )
            # -> 2 iterations of EM with 2    distributions
            # -> 2 iterations of EM with 4    distributions
            # -> 4 iterations of EM with 8    distributions
            # -> 4 iterations of EM with 16   distributions
            # -> 4 iterations of EM with 32   distributions
            # -> 4 iterations of EM with 64   distributions
            # -> 8 iterations of EM with 128  distributions
            # -> 8 iterations of EM with 256  distributions
            # -> 8 iterations of EM with 512  distributions
            # -> 8 iterations of EM with 1024 distributions
        model_dir = os.path.join(self.BASE_DIR, "ubm")
        logging.info("Saving the model {} at {}".format(ubm.name, model_dir))
        ubm.write(os.path.join(model_dir, ubm.name))

        # Read idmap for the enrolling data
        enroll_idmap = sidekit.IdMap.read(os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5"))
        # Create Statistic Server to store/process the enrollment data
        enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap,
                                         ubm=ubm)
        logging.debug(enroll_stat)

        server.feature_filename_structure = os.path.join(self.BASE_DIR, "feat", "{}.h5")
        # Compute the sufficient statistics for a list of sessions whose indices are segIndices.
        #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
        enroll_stat.accumulate_stat(ubm=ubm,
                                    feature_server=server,
                                    seg_indices=range(enroll_stat.segset.shape[0])
                                   )
        if SAVE:
            # Save the status of the enroll data
            filename = "enroll_stat_{}.h5".format(self.NUM_GAUSSIANS)
            enroll_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
Ejemplo n.º 16
0
    def get_embeddings(self):
        '''
        finally, testing:
        '''
        speaker_list = self.get_validation_data_name()
        distrib_nb = self.config.getint('i_vector', 'distrib_nb')
        nbThread = self.config.getint('i_vector', 'nbThread')
        vector_size = self.config.getint('i_vector', 'vector_size')
        feature_extension = 'h5'

        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_times = []
        checkpoints = ["/TV_{}".format(self.network_file)]

        #load data:
        ubm = sidekit.Mixture()
        ubm.read(get_experiment_nets() +
                 '/ubm_{}.h5'.format(self.network_file))
        ubm_list, test_list_long = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_train_data())[1])[0])
        ubm_list, test_list_short = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_test_data())[1])[0])
        tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5(
            get_experiment_nets() + "/TV_{}".format(self.network_file))

        fs = sidekit.FeaturesServer(feature_filename_structure=(
            "{dir}/{speaker_list}/feat/{{}}.{ext}".format(
                dir=get_training('i_vector'),
                speaker_list=speaker_list,
                ext=feature_extension)),
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    keep_all_features=True,
                                    delta=True,
                                    double_delta=True,
                                    rasta=True,
                                    context=None)

        #exract ivectors
        test_stat_long = sidekit.StatServer(test_list_long,
                                            ubm=ubm,
                                            distrib_nb=distrib_nb,
                                            feature_size=0,
                                            index=None)
        test_stat_long.accumulate_stat(ubm=ubm,
                                       feature_server=fs,
                                       seg_indices=range(
                                           test_stat_long.segset.shape[0]),
                                       num_thread=nbThread)

        test_stat_short = sidekit.StatServer(test_list_short,
                                             ubm=ubm,
                                             distrib_nb=distrib_nb,
                                             feature_size=0,
                                             index=None)
        test_stat_short.accumulate_stat(ubm=ubm,
                                        feature_server=fs,
                                        seg_indices=range(
                                            test_stat_short.segset.shape[0]),
                                        num_thread=nbThread)

        test_iv_long = test_stat_long.estimate_hidden(tv_mean,
                                                      tv_sigma,
                                                      V=tv,
                                                      batch_size=100,
                                                      num_thread=nbThread)[0]
        test_iv_short = test_stat_short.estimate_hidden(tv_mean,
                                                        tv_sigma,
                                                        V=tv,
                                                        batch_size=100,
                                                        num_thread=nbThread)[0]

        iv_lis, y_list, s_list = create_data_lists(
            False, test_iv_long.stat1, test_iv_short.stat1,
            test_list_long.leftids.astype(int),
            test_list_short.leftids.astype(int))

        #generate embeddings
        embeddings, speakers, num_embeddings = generate_embeddings(
            iv_lis, y_list, vector_size)

        set_of_embeddings.append(embeddings)
        set_of_speakers.append(speakers)
        set_of_num_embeddings.append(num_embeddings)
        set_of_times = [
            np.zeros(
                (len(test_list_long.leftids) + len(test_list_short.leftids), ),
                dtype=int)
        ]

        return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
Ejemplo n.º 17
0
test_wavscp_path = os.path.join(project_dir, 'data/test/wav.scp')

train_feature_filename_structure = "./mfcc/train/{}.h5"
enroll_feature_filename_structure = "./mfcc/enroll/{}.h5"
test_feature_filename_structure = "./mfcc/test/{}.h5"

train_ivecs_stat_path = './exp/train_ivecs_stat'
enroll_ivecs_stat_path = './exp/enroll_ivecs_stat'
test_ivecs_stat_path = './exp/test_ivecs_stat'

train_stats_path = './task/train_stat.h5'
enroll_stats_path = './task/enroll_stat.h5'
test_stats_path = './task/test_stat.h5'

ubm_path = 'task/ubm512.h5'
ubm = sidekit.Mixture(ubm_path)

print("Acc the train stats")
train_idmap = get_idmap(train_wavscp_path)
train_feature_server = basic_ops.get_feature_server(
    train_feature_filename_structure)
train_stat_server = get_stat_server(ubm, train_idmap, train_feature_server,
                                    train_stats_path)

print("Train the T")
# multiprocess on one node for train T space
fa = sidekit.FactorAnalyser()
fa.total_variability(train_stats_path,
                     ubm,
                     tv_rank,
                     nb_iter=10,
Ejemplo n.º 18
0
    def __create_stats(self):
        # Read tv_idmap, and plda_idmap
        tv_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "tv_idmap.h5"))
        plda_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "plda_idmap.h5"))
        # Create a joint StatServer for TV and PLDA training data
        back_idmap = plda_idmap.merge(tv_idmap)
        if not back_idmap.validate():
            raise RuntimeError("Error merging tv_idmap & plda_idmap")

        # Load UBM
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))
        back_stat = sidekit.StatServer(statserver_file_name=back_idmap,
                                       ubm=ubm)
        # Create Feature Server
        fs = self.createFeatureServer()

        # Jointly compute the sufficient statistics of TV and PLDA data
        back_filename = 'back_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(
                os.path.join(self.BASE_DIR, "stat", back_filename)):
            #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
            back_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          back_stat.segset.shape[0]))
            back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename))

        # Load the sufficient statistics from TV training data
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat",
                                           tv_filename)):
            tv_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap)
            tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename))

        # Load sufficient statistics and extract i-vectors from PLDA training data
        plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(
                os.path.join(self.BASE_DIR, "stat", plda_filename)):
            plda_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename), plda_idmap)
            plda_stat.write(os.path.join(self.BASE_DIR, "stat", plda_filename))

        # Load sufficient statistics from test data
        filename = 'test_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)):
            test_idmap = sidekit.IdMap.read(
                os.path.join(self.BASE_DIR, "task", "test_idmap.h5"))
            test_stat = sidekit.StatServer(statserver_file_name=test_idmap,
                                           ubm=ubm)
            # Create Feature Server
            fs = self.createFeatureServer()
            # Jointly compute the sufficient statistics of TV and PLDA data
            #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
            test_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          test_stat.segset.shape[0]))
            test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
Ejemplo n.º 19
0
									pre_emphasis=0.97,
									save_param=["energy", "cep", "fb"],
									keep_all_features=True)

server = sidekit.FeaturesServer(features_extractor=extractor,
								feature_filename_structure="chunks_features/all_train/{}.h5",
								# sources=None,
								dataset_list=["energy", "cep", "fb"],
								mask="[0-12]",
								feat_norm="cmvn",
								global_cmvn=None,
								dct_pca=False,
								dct_pca_config=None,
								sdc=False,
								# sdc_config=(1,3,7),
								delta=True,
								double_delta=True,
								delta_filter=None,
								context=None,
								traps_dct_nb=None,
								rasta=False,
								keep_all_features=True)


print('Train the UBM by EM')
# Extract all features and train a GMM without writing to disk
ubm = sidekit.Mixture()
llk = ubm.EM_split(server, utter_list, distribNb)#, num_thread=nbThread)
pickle.dump(ubm,open("ubm_64.pkl","wb"))
ubm.write('gmm/ubm_train_64.h5')
Ejemplo n.º 20
0
import sidekit as skk
import os
import sys
import multiprocessing
import logging
import htkmfc as hhtk

from matplotlib import pyplot as plt

print('Start')
signal, samplerate = sf.read('sw02289.sph')

Time = np.linspace(0, len(signal) / samplerate, num=len(signal))

# plt.figure(1)
# plt.title('Signal Wave...')
# plt.plot(Time,signal)
# plt.show()

# datahtk = np.reshape(signal[:,0], (1,lala))

a = skk.mfcc(signal[:, 0])

ubm = skk.Mixture()

#x= skk.FeaturesExtractor(feature_filename_structure='sw02289.sph',shift=0.01,sampling_frequency=16000, window_size=0.025);

f, (ax1, ax2) = plt.subplots(2, sharex=True)
ax1.plot(a[0])
ax2.plot(a[1])
plt.show()