def evaluate(self, explain=True):
        """
        This method is used to score our trained model. 
        """
        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Load TV matrix
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser(outputPath + ".h5")

        # Extract i-vectors from enrollment data
        logging.info("Extracting i-vectors from enrollment data")
        filename = 'enroll_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        enroll_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', filename))
        enroll_iv = fa.extract_ivectors_single(ubm=ubm,
                                               stat_server=enroll_stat,
                                               uncertainty=False)

        # Extract i-vectors from test data
        logging.info("Extracting i-vectors from test data")
        filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        test_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', filename))
        test_iv = fa.extract_ivectors_single(ubm=ubm,
                                             stat_server=test_stat,
                                             uncertainty=False)

        # Do cosine distance scoring and write results
        logging.info("Calculating cosine score")
        test_ndx = sidekit.Ndx.read(
            os.path.join(self.BASE_DIR, "task", "test_ndx.h5"))
        scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv,
                                                       test_iv,
                                                       test_ndx,
                                                       wccn=None)
        # Write scores
        filename = "ivector_scores_cos_{}.h5".format(self.NUM_GAUSSIANS)
        scores_cos.write(os.path.join(self.BASE_DIR, "result", filename))

        # Explain the Analysis by writing more readible text file
        if explain:
            modelset = list(scores_cos.modelset)
            segset = list(scores_cos.segset)
            scores = np.array(scores_cos.scoremat)
            filename = "ivector_scores_explained_{}.txt".format(
                iv.NUM_GAUSSIANS)
            fout = open(os.path.join(iv.BASE_DIR, "result", filename), "a")
            fout.truncate(0)  #clear content
            for seg_idx, seg in enumerate(segset):
                fout.write("Wav: {}\n".format(seg))
                for speaker_idx, speaker in enumerate(modelset):
                    fout.write("\tSpeaker {}:\t{}\n".format(
                        speaker, scores[speaker_idx, seg_idx]))
                fout.write("\n")
            fout.close()
예제 #2
0
def adapt_plda(input_diar, model, features_server):
    # Il faudra supprimer les locuteurs qui n'ont pas assez de sessions
    idmap_in = input_diar.id_map()
    ivectors = model.train(features_server, idmap_in, normalization=False) # extract i-vectors on the current document

    # Normalize i-vectors and train PLDA
    norm_mean, norm_cov = ivectors.estimate_spectral_norm_stat1(1, 'sphNorm')

    # Train  PLDA
    plda_fa = sidekit.FactorAnalyser()

    plda_fa.plda(ivectors,
                 rank_f=20,
                 nb_iter=10,
                 scaling_factor=1.,
                 output_file_name=None,
                 save_partial=False)

    model.sn_mean = norm_mean
    model.sn_cov = norm_cov
    model.plda_mean = plda_fa.mean
    model.plda_f = plda_fa.F
    model.plda_g = plda_fa.G
    model.plda_sigma = plda_fa.Sigma

    return model
예제 #3
0
    def train_tv(self):
        # Create status servers
        self.__create_stats()

        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "stat",
                                                 tv_filename),
                                    ubm,
                                    tv_rank=self.TV_RANK,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath)

        filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS)
        lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex))
        for f in lst:
            os.remove(f)
예제 #4
0
    def data_init(self):
        # Read tv_idmap, and plda_idmap
        tv_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "idmap_tv.h5"))
        plda_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "idmap_plda.h5"))
        # Load UBM
        ubm = sidekit.Mixture()
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))
        # Create Feature Server
        fs = self.__createFeatureServer()

        # Create a joint StatServer for TV and PLDA training data
        back_idmap = plda_idmap.merge(tv_idmap)
        if not back_idmap.validate():
            logging.warning("Error merging tv_idmap & plda_idmap")
            return
        back_stat = sidekit.StatServer(statserver_file_name=back_idmap,
                                       ubm=ubm)
        # Jointly compute the sufficient statistics of TV and PLDA data
        #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
        back_stat.accumulate_stat(ubm=ubm,
                                  feature_server=fs,
                                  seg_indices=range(back_stat.segset.shape[0]))
        back_stat.write(os.path.join(self.BASE_DIR, "task", 'stat_back.h5'))
        # Load the sufficient statistics from TV training data
        tv_stat = sidekit.StatServer.read_subset(
            os.path.join(self.BASE_DIR, "task", 'stat_back.h5'), tv_idmap)
        tv_stat.write(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5'))
        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GUASSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "task",
                                                 'tv_stat.h5'),
                                    ubm,
                                    tv_rank=self.RANK_TV,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath)
예제 #5
0
    def evaluate(self):
        """
        This method is used to score our trained model. 
        """
        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Load TV matrix
        filename = "tv_matrix_{}".format(self.NUM_GUASSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser(outputPath + ".h5")

        # Extract i-vectors from enrollment data
        logging.info("Extracting i-vectors from enrollment data")
        enroll_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', 'enroll_stat_32.h5'))
        enroll_iv = fa.extract_ivectors_single(ubm=ubm,
                                               stat_server=enroll_stat,
                                               uncertainty=False)

        # Extract i-vectors from test data
        logging.info("Extracting i-vectors from test data")
        test_stat = sidekit.StatServer.read(
            os.path.join(self.BASE_DIR, 'stat', 'test_stat.h5'))
        test_iv = fa.extract_ivectors_single(ubm=ubm,
                                             stat_server=test_stat,
                                             uncertainty=False)

        # Do cosine distance scoring and write results
        logging.info("Calculating cosine score")
        test_ndx = sidekit.Ndx.read(
            os.path.join(self.BASE_DIR, "task", "test_ndx.h5"))
        scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv,
                                                       test_iv,
                                                       test_ndx,
                                                       wccn=None)
        # Write scores
        filename = "ivector_scores_cos_{}.h5".format(self.NUM_GUASSIANS)
        scores_cos.write(os.path.join(self.BASE_DIR, "result", filename))
    def train_tv(self):
        """
        This method is used to train the Total Variability (TV) matrix
        and save it into 'ivector' directory !! 
        """
        # Create status servers
        self.__create_stats()

        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "stat",
                                                 tv_filename),
                                    ubm,
                                    tv_rank=self.TV_RANK,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath)
        # tv = fa.F # TV matrix
        # tv_mean = fa.mean # Mean vector
        # tv_sigma = fa.Sigma # Residual covariance matrix

        # Clear files produced at each iteration
        filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS)
        lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex))
        for f in lst:
            os.remove(f)
예제 #7
0
def train_net(net, train_dataloader, test_dataloader):
    if not os.path.exists(ConfigNetwork.modelname):
        last_model_loaded = False  # True # False
        # inits
        iteration_number = 0
        for epoch in range(0, ConfigNetwork.train_number_epochs):
            """
            if ConfigNetwork.learning_rate_scheduler:
                optimizer = optim.Adam(net.parameters(),
                                       lr = ConfigNetwork.learning_rate)
                scheduler = ReduceLROnPlateau(optimizer, 'min')
            else:
            """
            epoch_learning_rate_exponent = max(
                0,
                epoch - (ConfigNetwork.learning_rate_defactor_after_epoch - 1))
            lr = ConfigNetwork.learning_rate * ConfigNetwork.learning_rate_defactor**epoch_learning_rate_exponent
            parameters = filter(lambda p: p.requires_grad, net.parameters())
            if ConfigNetwork.train_vae:
                optimizer = pyro_optim.Adam({'lr': lr})
            else:
                optimizer = optim.Adam(parameters, lr=lr)
            base_file_pattern = os.path.join(
                ConfigNetwork.storage_dir,
                '{}_epoch_{}'.format(ConfigNetwork.modelname, epoch))
            epoch_net_file = '{}_model'.format(base_file_pattern)
            if epoch < ConfigNetwork.freeze_ResNet_epochs:
                net.set_ResNet_requires_grad(requires_grad=False)
            else:
                net.set_ResNet_requires_grad(requires_grad=True)
            if not os.path.exists(epoch_net_file) and epoch == 0:
                # init meta embeddings network
                if ConfigNetwork.train_with_meta_embeddings:
                    logging.debug('init B with plda expectation')
                    if not os.path.exists(ConfigNetwork.embeddings_file):
                        dataset = SoftMaxDatabase(
                            imageFolderDataset=ConfigFaceDatasets.
                            dataset_class(
                                root=ConfigFaceDatasets.training_dir),
                            transform=train_dataloader.dataset.transform,
                            should_invert=False)
                        embeddings_loader = DataLoader(
                            dataset,
                            shuffle=False,
                            num_workers=ConfigNetwork.num_workers,
                            batch_size=ConfigNetwork.batch_size_train)
                        softmax_net = net.to_softmaxNetwork()
                        softmax_net.normalize = False
                        with h5py.File(ConfigNetwork.embeddings_file,
                                       "a") as embd_file:
                            for i, data in enumerate(embeddings_loader, 0):
                                img0, label = data
                                img0, label = Variable(img0).cuda(), Variable(
                                    label).cuda()
                                output0 = super(SoftMaxNetwork,
                                                softmax_net).forward_once(img0)
                                embd_file.create_dataset(
                                    "{}".format(i),
                                    data=numpy.column_stack(
                                        (output0.data.cpu().numpy(),
                                         label.data.cpu().numpy())),
                                    compression="gzip",
                                    fletcher32=True)
                            logging.critical('extracted embeddings')
                    if not os.path.exists(
                            ConfigNetwork.embeddings_file_plda
                    ) or not os.path.exists(
                            ConfigNetwork.embeddings_mean_file):
                        data = []
                        with h5py.File(ConfigNetwork.embeddings_file,
                                       "r") as h5f:
                            for key, value in h5f.items():
                                data.append(value.value)
                        data = numpy.concatenate(data)
                        embeddings = data[:, :ConfigNetwork.embedding_size]
                        embeddings_mean = embeddings.mean(0)
                        numpy.save(ConfigNetwork.embeddings_mean_file,
                                   embeddings_mean)
                        logging.debug('embeddings mean: {}'.format(
                            embeddings.mean(0)))
                        embeddings -= embeddings.mean(0)
                        embeddings = (embeddings.T / numpy.linalg.norm(
                            embeddings, axis=1)).T  # prepare cosine distance
                        embedding_labels = data[:, ConfigNetwork.
                                                embedding_size:].squeeze()

                        s = sidekit.StatServer()
                        s.modelset = embedding_labels
                        s.segset = numpy.arange(
                            embedding_labels.shape[0]).astype(str)
                        s.stat0 = numpy.ones((embedding_labels.shape[0], 1))
                        s.stat1 = copy.deepcopy(embeddings)
                        s.start = numpy.empty(embedding_labels.shape[0],
                                              dtype='|O')
                        s.stop = numpy.empty(embedding_labels.shape[0],
                                             dtype='|O')
                        s.validate()
                        ids = numpy.unique(s.modelset)
                        class_nb = ids.shape[0]

                        f = sidekit.FactorAnalyser()
                        rank_f = ConfigNetwork.embedding_size
                        f.plda(s, rank_f=rank_f)
                        f.write(ConfigNetwork.embeddings_file_plda)
                    else:
                        f = sidekit.FactorAnalyser(
                            ConfigNetwork.embeddings_file_plda)

                    e_mu = torch.from_numpy(f.mean).type(torch.FloatTensor)
                    e_B = torch.from_numpy(
                        numpy.linalg.inv(f.Sigma).diagonal()).type(
                            torch.FloatTensor)
                    # e_B = torch.from_numpy(numpy.linalg.inv(f.Sigma)).type(torch.FloatTensor)
                    assert (isinstance(net, GME_SoftmaxNetwork))
                    net = GME_SoftmaxNetwork(
                        num_train_classes=net.num_train_classes,
                        pretrained_siamese_net=net.pretrained_net,
                        expected_mu=e_mu,
                        expected_B=e_B).cuda()
                    logging.debug('init B with plda done')

            if not os.path.exists(epoch_net_file):
                if last_model_loaded:
                    logging.critical(
                        'run validation on epoch {}'.format(epoch - 1))
                    test_model(database_dir=test_dataloader,
                               net=net,
                               net_distance=net_distance,
                               epoch=None)
                    last_model_loaded = False

                if ConfigNetwork.select_difficult_pairs_epoch is not None:
                    if epoch == ConfigNetwork.select_difficult_pairs_epoch:
                        train_dataloader = select_difficult_pairs(
                            net, train_dataloader)

                # train an epoch
                net.train()
                train_epoch(train_dataloader=train_dataloader,
                            net=net,
                            optimizer=optimizer,
                            epoch=epoch,
                            iteration_number=iteration_number)
                torch.save(obj=net.state_dict(), f=epoch_net_file)
            else:
                net.load_state_dict(torch.load(epoch_net_file))
                logging.info('loaded model for epoch: {}'.format(epoch))
                last_model_loaded = True
                continue

            logging.critical('run validation on epoch {}'.format(epoch))
            test_model(database_dir=test_dataloader,
                       net=net,
                       net_distance=net_distance,
                       epoch=None)
        torch.save(obj=net.state_dict(),
                   f='{}'.format(ConfigNetwork.modelname))
        logging.info('training completed, model stored.')
예제 #8
0
enroll_stats_path = './task/enroll_stat.h5'
test_stats_path = './task/test_stat.h5'

ubm_path = 'task/ubm512.h5'
ubm = sidekit.Mixture(ubm_path)

print("Acc the train stats")
train_idmap = get_idmap(train_wavscp_path)
train_feature_server = basic_ops.get_feature_server(
    train_feature_filename_structure)
train_stat_server = get_stat_server(ubm, train_idmap, train_feature_server,
                                    train_stats_path)

print("Train the T")
# multiprocess on one node for train T space
fa = sidekit.FactorAnalyser()
fa.total_variability(train_stats_path,
                     ubm,
                     tv_rank,
                     nb_iter=10,
                     min_div=True,
                     tv_init=None,
                     batch_size=2000,
                     save_init=True,
                     output_file_name=TV_matrix_path,
                     num_thread=nj)

print("Extract train ivectors")
train_ivecs_stat = fa.extract_ivectors(ubm,
                                       train_stats_path,
                                       uncertainty=False)