Esempio n. 1
0
def make_feature_server(dirname, frame_size):
    """Return a Sidekit FeatureServer instance for this
    experiement
    """

    sampling_frequency = 16000
    # window size must be twice the frame size to give the right number of FFT points but since
    # we can't zero pad, we'll be taking in more of the signal in each frame
    window_size =  (2* frame_size+1) / sampling_frequency
    shift = 0.008

    # make a feature server to compute features over our audio files
    extractor = sidekit.FeaturesExtractor(audio_filename_structure=dirname + "/{}.wav",
                                          sampling_frequency=sampling_frequency,
                                          lower_frequency=0,
                                          higher_frequency=sampling_frequency/2,
                                          filter_bank="lin",
                                          filter_bank_size=frame_size,
                                          window_size=window_size,
                                          shift=shift,
                                          ceps_number=20, 
                                          pre_emphasis=0.97,
                                          save_param=["fb"],
                                          keep_all_features=False)

    server = sidekit.FeaturesServer(features_extractor=extractor,
                                    sources=None,
                                    dataset_list=["fb"],
                                    keep_all_features=True)

    return server
Esempio n. 2
0
def calculateOneEmbedding(model, eval_feature_path, path):
    server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=eval_feature_path + "/{}.h5",
        sources=None,
        dataset_list=["energy", "cep", "vad"],
        mask="[0-19]",
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=True,
        double_delta=False,
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)
    feature, _ = server.load(path, channel=0)
    feature = feature.astype(np.float32)
    feature = torch.tensor(feature).unsqueeze(0)
    feature = feature.unsqueeze_(0)
    embedding, _ = model(feature)
    embedding = embedding / embedding.pow(2).sum(dim=1).sqrt()
    return embedding  # calculate the embedding
Esempio n. 3
0
    def train_ubm(self,
                  feature_dir,
                  speaker_list,
                  ubm_list,
                  distrib_nb,
                  feature_extension='h5',
                  num_threads=10):
        '''
        training the GMM with EM-Algorithm
        '''

        self.logger.info('training UBM')

        fs = sidekit.FeaturesServer(feature_filename_structure=(
            "{dir}/{speaker_list}/feat/{{}}.{ext}".format(
                dir=feature_dir,
                speaker_list=speaker_list,
                ext=feature_extension)),
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    keep_all_features=True,
                                    delta=True,
                                    double_delta=True,
                                    rasta=True,
                                    context=None)

        ubm = sidekit.Mixture()
        llk = ubm.EM_split(fs, ubm_list, distrib_nb, num_thread=num_threads)
        ubm.write(get_experiment_nets() +
                  '/ubm_{}.h5'.format(self.network_file))

        return ubm, fs
Esempio n. 4
0
 def __getitem__(self, index):
     features_server = sidekit.FeaturesServer(
         features_extractor=None,
         feature_filename_structure='../all_feature/{}.h5',
         sources=None,
         dataset_list=['fb'],
         mask=None,
         feat_norm='cms',
         global_cmvn=None,
         dct_pca=False,
         dct_pca_config=None,
         sdc=False,
         sdc_config=None,
         delta=False,
         double_delta=False,
         delta_filter=None,
         context=None,
         traps_dct_nb=None,
         rasta=True,
         keep_all_features=False)
     show_list = self.speech[index]
     speaker = show_list.split('/')[0]
     features, _ = features_server.load(show_list, channel=0)
     features = features.astype(np.float32)
     ind = np.argwhere(self.speakers_dir == speaker)[0]
     label = ind.astype(
         np.int64)[0]  #这里只要指出label所在的索引就好了,比如是第20个说话人说的,那么label就是[20]
     features = features.reshape(1, features.shape[1], features.shape[0])
     features = t.tensor(features)
     img = transforms.ToPILImage()(features)
     features = transforms.Resize((24, 400))(img)
     features = transforms.ToTensor()(features)
     return features.view(features.size()[1], features.size()[2]), label
Esempio n. 5
0
def remove(path):
    '''
    remove the h5 file which cannot be read
    param:
        path : trainset feature path
    '''
    server = sidekit.FeaturesServer(features_extractor=None,
                                    feature_filename_structure=path + "/{}.h5",
                                    sources=None,
                                    dataset_list=["fb", "vad"],
                                    mask=None,
                                    feat_norm="cmvn",
                                    global_cmvn=None,
                                    dct_pca=False,
                                    dct_pca_config=None,
                                    sdc=False,
                                    sdc_config=None,
                                    delta=False,
                                    double_delta=False,
                                    delta_filter=None,
                                    context=None,
                                    traps_dct_nb=None,
                                    rasta=True,
                                    keep_all_features=False)
    speaker = os.listdir(path)
    for s in speaker:
        speaker_path = os.path.join(path, s)
        speech = os.listdir(speaker_path)
        for sph in speech:
            speech_path = s + '/' + sph.split('.')[0]
            try:
                feature, _ = server.load(speech_path, 0)
            except:
                os.remove(os.path.join(speaker_path, sph))
    print('done')
Esempio n. 6
0
def adaptation(args):
    if args.feat_type == 'mfcc':
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-12]"
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'
    if args.feat_type == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'

    # create feature server for loading feature from disk
    feature_server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=True if args.delta else False,
        double_delta=True if args.delta else False,
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)

    enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5')
    ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5')

    ubm = sidekit.Mixture()
    ubm.read(os.getcwd() + '/model/ubm.h5')
    enroll_stat = sidekit.StatServer(enroll_idmap,
                                     distrib_nb=ubm.distrib_nb(),
                                     feature_size=ubm.dim())
    enroll_stat.accumulate_stat(ubm=ubm,
                                feature_server=feature_server,
                                seg_indices=range(enroll_stat.segset.shape[0]),
                                num_thread=args.num_thread)
    enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5')

    print('MAP adaptation', end='')
    regulation_factor = 16
    enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor)
    enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5')
    print('\rMAP adaptation done')

    print('Compute scores', end='')
    score = sidekit.gmm_scoring(ubm,
                                enroll_sv,
                                ndx,
                                feature_server,
                                num_thread=args.num_thread)
    score.write(os.getcwd() + '/task/dev_score.h5')
    print('\rCompute scores done')
Esempio n. 7
0
def train_ubm(**args):
    if args['feat_type'] == 'mfcc':
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-12]"
        features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature'
    if args['feat_type'] == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
        features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature'

    utils.remove(features_folder)

    ubmlist = []
    if os.path.exists(os.getcwd() + '/log/aishell2_wavlist.log'):
        with open(os.getcwd() + '/log/aishell2_wavlist.log', 'r') as fobj:
            for i in fobj:
                ubmlist.append(i[0:-1])
    else:
        ubmlist = preprocess()

    # create feature server for loading feature from disk
    server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=args['delta'],
        double_delta=args['delta'],
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)
    # create Mixture object for training
    ubm = sidekit.Mixture()
    ubm.EM_split(server,
                 ubmlist,
                 args['distribNum'],
                 iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8),
                 num_thread=args['num_thread'],
                 save_partial=True)
    # write trained ubm to disk
    ubm.write(os.getcwd() + '/model/ubm_512.h5')
Esempio n. 8
0
    def get_feature_server(self, feature_filename_structure, delta=True, double_delta=True,
                           dataset_list=["energy", "cep", "vad"], feat_norm="cmvn",
                           keep_all_features=False):

        server = sidekit.FeaturesServer(feature_filename_structure=feature_filename_structure,
                            dataset_list=dataset_list,
                            mask=None,
                            feat_norm=feat_norm,
                            delta=delta,
                            double_delta=double_delta,
                            rasta=True,
                            context=None,
                            keep_all_features=keep_all_features)
        return server
Esempio n. 9
0
def make_feature_server():
    """Return a Sidekit FeatureServer instance for this
    experiement

    config:  DATA_DIR, FEAT_DIR

    """
    dd = config('DATA_DIR')
    fd = config('FEAT_DIR')

    # TODO: more of these settings should be derived from the config file

    # make a feature server to compute features over our audio files
    extractor = sidekit.FeaturesExtractor(audio_filename_structure=dd+'{}.wav',
                                          feature_filename_structure=fd+"{}.h5",
                                          sampling_frequency=None,
                                          lower_frequency=200,
                                          higher_frequency=3800,
                                          filter_bank="log",
                                          filter_bank_size=24,
                                          window_size=0.025,
                                          shift=0.01,
                                          ceps_number=20,
                                          vad="snr",
                                          snr=40,
                                          pre_emphasis=0.97,
                                          save_param=["vad", "energy", "cep", "fb"],
                                          keep_all_features=True)

    server = sidekit.FeaturesServer(features_extractor=extractor,
                                    feature_filename_structure=fd+"{}.h5",
                                    sources=None,
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    global_cmvn=None,
                                    dct_pca=False,
                                    dct_pca_config=None,
                                    sdc=False,
                                    sdc_config=None,
                                    delta=True,
                                    double_delta=True,
                                    delta_filter=None,
                                    context=None,
                                    traps_dct_nb=None,
                                    rasta=True,
                                    keep_all_features=True)

    return server
Esempio n. 10
0
def train_ubm(**args):
    if (args['feat_type'] == 'mfcc') or (args['feat_type'] == 'plp'):
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-19]"
    if args['feat_type'] == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
    features_folder = os.getcwd() + '/{}_train_feature'.format(
        args['feat_type'])

    ubmlist = []
    try:
        with open(os.getcwd() + '/log/aishell2.log', 'r') as fobj:
            for i in fobj:
                ubmlist.append(i[0:-1])
    except FileNotFoundError:
        print('please generate ubm wav list as first')

    # create feature server for loading feature from disk
    server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=args['delta'],
        double_delta=args['delta'],
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)
    # create Mixture object for training
    ubm = sidekit.Mixture()
    ubm.EM_split(server,
                 ubmlist,
                 args['distribNum'],
                 iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8),
                 num_thread=args['num_thread'],
                 save_partial=True)
    # write trained ubm to disk
    ubm.write(os.getcwd() + '/model/ubm_512.h5')
Esempio n. 11
0
 def __init__(self, path, maxlen):
     super(DeepSpkDataset, self).__init__()
     pattern = re.compile(r'^S\d{4}')
     server = sidekit.FeaturesServer(features_extractor=None,
                                     feature_filename_structure=path+"/{}.h5",
                                     sources=None,
                                     dataset_list=["energy","cep","vad"],
                                     mask="[0-19]",
                                     feat_norm="cmvn",
                                     global_cmvn=None,
                                     dct_pca=False,
                                     dct_pca_config=None,
                                     sdc=False,
                                     sdc_config=None,
                                     delta=True,
                                     double_delta=False,
                                     delta_filter=None,
                                     context=None,
                                     traps_dct_nb=None,
                                     rasta=True,
                                     keep_all_features=False)
     self.server = server
     self.maxlen = maxlen
     speakers_list = os.listdir(path)
     speakers_dir = []
     for i in range(len(speakers_list)):
         if re.match(pattern, speakers_list[i]):
             speakers_dir.append(path + '/' + speakers_list[i])
     speech = []
     num_speakers = 0
     real_speakers_list = []
     for i in speakers_dir:
         speech_list = os.listdir(i)
         if len(speech_list) > 20:
             for j in speech_list:
                 show = i.split('/')[-1] + '/' + j.split('.')[0]
                 speech.append(show)
             num_speakers += 1
             real_speakers_list.append(i.split('/')[-1])
     self.speech = np.asarray(speech)
     self.num_speakers = num_speakers
     self.speakers_list = np.asarray(real_speakers_list)
     a = [np.argwhere(self.speakers_list == i.split('/')[0])[0] for i in self.speech] # 对每个语音生成label
     self.train_labels = torch.tensor(a, dtype = torch.int64)
     self.train_labels.squeeze_()
Esempio n. 12
0
    def __init__(self):
        self.extra = None
        super(Fea, self).__init__(
            audio_filename_structure=None,
            feature_filename_structure=
            None,  # the 2nd is channel ,it is added by me
            sampling_frequency=8000,
            lower_frequency=200,
            higher_frequency=3800,
            filter_bank="log",
            filter_bank_size=40,
            window_size=0.025,
            shift=0.01,
            ceps_number=12,
            vad="energy",
            pre_emphasis=0.97,
            save_param=["vad", "energy", "cep", "fb"],
            keep_all_features=True)

        self.feaServer = sidekit.FeaturesServer(
            features_extractor=None,
            feature_filename_structure=None,
            sources=None,
            dataset_list=None,  # ["energy","cep","fb"],
            feat_norm=None,
            # cms cmvn stg cmvn_sliding cms_sliding,there do not do it ,we can do it in next step
            delta=True,
            double_delta=True,
            rasta=True,
            keep_all_features=False,
            # mask="[0-12]",
            # global_cmvn=None,
            # dct_pca=False,
            # dct_pca_config=None,
            # sdc=False,
            # sdc_config=None,
            # delta_filter=None,
            # context=None,
            # traps_dct_nb=None,
        )
Esempio n. 13
0
 def createFeatureServer(self, group=None):
     if group:
         feat_dir = os.path.join(self.BASE_DIR, "feat", group)
     else:
         feat_dir = os.path.join(self.BASE_DIR, "feat")
     # feature_filename_structure: structure of the filename to use to load HDF5 files
     # dataset_list: string of the form ["cep", "fb", vad", energy", "bnf"]
     # feat_norm: type of normalization to apply as post-processing
     # delta: if True, append the first order derivative
     # double_delta: if True, append the second order derivative
     # rasta: if True, perform RASTA filtering
     # keep_all_features: boolean, if True, keep all features, if False, keep frames according to the vad labels
     server = sidekit.FeaturesServer(
         feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
         dataset_list=["vad", "energy", "cep", "fb"],
         feat_norm="cmvn",
         delta=True,
         double_delta=True,
         rasta=True,
         keep_all_features=True)
     logging.info("Feature-Server is created")
     logging.debug(server)
     return server
Esempio n. 14
0
def featureServer(ext):
    fs = sidekit.FeaturesServer(
        features_extractor=ext,
        feature_filename_structure=None,
        sources=None,  #['cep'],
        dataset_list=['cep', 'fb', 'vad', 'energy'],
        #dataset_list=["cep"],
        mask="[0-12]",
        #mask = None,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=True,
        double_delta=True,
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)

    return fs
Esempio n. 15
0
    def get_embeddings(self):
        '''
        finally, testing:
        '''
        speaker_list = self.get_validation_data_name()
        distrib_nb = self.config.getint('i_vector', 'distrib_nb')
        nbThread = self.config.getint('i_vector', 'nbThread')
        vector_size = self.config.getint('i_vector', 'vector_size')
        feature_extension = 'h5'

        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_times = []
        checkpoints = ["/TV_{}".format(self.network_file)]

        #load data:
        ubm = sidekit.Mixture()
        ubm.read(get_experiment_nets() +
                 '/ubm_{}.h5'.format(self.network_file))
        ubm_list, test_list_long = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_train_data())[1])[0])
        ubm_list, test_list_short = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_test_data())[1])[0])
        tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5(
            get_experiment_nets() + "/TV_{}".format(self.network_file))

        fs = sidekit.FeaturesServer(feature_filename_structure=(
            "{dir}/{speaker_list}/feat/{{}}.{ext}".format(
                dir=get_training('i_vector'),
                speaker_list=speaker_list,
                ext=feature_extension)),
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    keep_all_features=True,
                                    delta=True,
                                    double_delta=True,
                                    rasta=True,
                                    context=None)

        #exract ivectors
        test_stat_long = sidekit.StatServer(test_list_long,
                                            ubm=ubm,
                                            distrib_nb=distrib_nb,
                                            feature_size=0,
                                            index=None)
        test_stat_long.accumulate_stat(ubm=ubm,
                                       feature_server=fs,
                                       seg_indices=range(
                                           test_stat_long.segset.shape[0]),
                                       num_thread=nbThread)

        test_stat_short = sidekit.StatServer(test_list_short,
                                             ubm=ubm,
                                             distrib_nb=distrib_nb,
                                             feature_size=0,
                                             index=None)
        test_stat_short.accumulate_stat(ubm=ubm,
                                        feature_server=fs,
                                        seg_indices=range(
                                            test_stat_short.segset.shape[0]),
                                        num_thread=nbThread)

        test_iv_long = test_stat_long.estimate_hidden(tv_mean,
                                                      tv_sigma,
                                                      V=tv,
                                                      batch_size=100,
                                                      num_thread=nbThread)[0]
        test_iv_short = test_stat_short.estimate_hidden(tv_mean,
                                                        tv_sigma,
                                                        V=tv,
                                                        batch_size=100,
                                                        num_thread=nbThread)[0]

        iv_lis, y_list, s_list = create_data_lists(
            False, test_iv_long.stat1, test_iv_short.stat1,
            test_list_long.leftids.astype(int),
            test_list_short.leftids.astype(int))

        #generate embeddings
        embeddings, speakers, num_embeddings = generate_embeddings(
            iv_lis, y_list, vector_size)

        set_of_embeddings.append(embeddings)
        set_of_speakers.append(speakers)
        set_of_num_embeddings.append(num_embeddings)
        set_of_times = [
            np.zeros(
                (len(test_list_long.leftids) + len(test_list_short.leftids), ),
                dtype=int)
        ]

        return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
Esempio n. 16
0
for nk in keep_sessions.nistkey:
    keep_sessions.filename[keep_sessions.nistkey == nk] = file_dict[nk]

audio_file_list = keep_sessions.filename.as_matrix()
unique_idx = np.unique(audio_file_list, return_index=True)
audio_file_list = audio_file_list[unique_idx[1]]
feature_file_list = keep_sessions.nistkey.as_matrix()[unique_idx[1]]

with open('sph_files_to_process.p', "wb" ) as f:
    pickle.dump( (audio_file_list, feature_file_list), f)

print("Found {} sphere files to process\n".format(feature_file_list.shape[0]))

fs = sidekit.FeaturesServer(input_dir='',
                 input_file_extension='.sph',
                 label_dir='./',
                 label_file_extension='.lbl',
                 from_file='audio',
                 config='sid_8k')

idx = np.arange(len(audio_file_list))
random.shuffle(idx)
audio_file_list = audio_file_list[idx]
feature_file_list = feature_file_list[idx]




fs.save_parallel(audio_file_list, feature_file_list, 'spro4', feature_root_dir,
                         '.mfcc', and_label=False, numThread=nbThread)

Esempio n. 17
0
    keysX.append(
        sidekit.Key('task/sre10_coreX-coreX_det{}_key.h5'.format(cond + 1)))

with open('task/ubm_list.txt', 'r') as inputFile:
    ubmList = inputFile.read().split('\n')

if train:
    # %%
    #################################################################
    # Process the audio to generate MFCC
    #################################################################
    print('Create the feature server to extract MFCC features')
    fs = sidekit.FeaturesServer(input_dir=audioDir,
                                input_file_extension='.mfcc',
                                label_dir='./',
                                label_file_extension='.lbl',
                                from_file='spro4',
                                config='sid_8k',
                                keep_all_features=False)

    # %%
    #################################################################
    # Train the Universal background Model (UBM)
    #################################################################
    print('Train the UBM by EM')
    ubm = sidekit.Mixture()
    llk = ubm.EM_split(fs, ubmList, distribNb, numThread=nbThread)
    ubm.save_pickle('gmm/ubm_bnf.p')

    # %%
    #################################################################
Esempio n. 18
0
    def process(self, data_loaders, outputs):
        # Groups available:
        # Group 0:
        #   Input "model" with type  "system/text/1"
        #   Input "file_id" with type  "system/text/1"
        #   Input "speech" with type  "system/array_1d_floats/1"
        #   Input "speakers" with type  "allies/speakers/1"
        #   Input "uem" with type  "allies/uemranges/1"
        #   Output "model" with type  "system/text/1"
        #   Output "file_id" with type  "system/array_1d_text/1"
        #   Output "speakers" with type  "allies/speakers/1"

        # Create a Loader object to access all "inputs" from the previous blocks
        # Although this loader seems to focus on "features" is allows to access all "inputs"
        loader = data_loaders.loaderOf("features")

        

        # Get the model
        model_loader = data_loaders.loaderOf("model")
        model = pickle.loads(bytes(model_loader[0][0]['model'].text , "latin-1"))
        #import ipdb
        #ipdb.set_trace()

        # Fill a dictionnary to access features from the files
        name_dict = {}
        for i in range(loader.count()):
            file_id = loader[i][0]['file_info'].file_id
            name_dict[file_id] = int(i)

        # Create a sidekit.FeaturesServer object to load features from the platform
        fe = AlliesExtractor(loader,name_dict)
        fs = sidekit.FeaturesServer(features_extractor=fe, 
                                    dataset_list = ['cep'], 
                                    keep_all_features=True,
                                    delta=False,
                                    double_delta=False)

        # Here is the loop on files to process
        #   get the features
        #   get the UEM
        #   get the file_info
        for i in range(loader.count()):

            end = i
            (data, _, end) = loader[i]
            #uem = uem_loader[i][0]
            file_id = data['file_info'].file_id
            supervision = data['file_info'].supervision
            time_stamp = data['file_info'].time_stamp

            """
            Main diarization adaptation
            """

            # Compute the result to return (without system adaptation)
            spk = []
            st = []
            en = []
            for seg in model['global_diar']:
                #import ipdb
                #ipdb.set_trace()
                spk.append(seg[1])
                st.append(numpy.cast['float64'](seg[3])/100.)
                en.append(numpy.cast['float64'](seg[4])/100.)
            outputs['speakers'].write({ 'speaker': spk, 'start_time': st, 'end_time': en }, i)


            """
            cep = inputs['features'].data
            show = inputs['file_id'].data.text

            local_diar = first_pass_segmentation(cep, show)

            # Extract i-vectors within-show and perform within-show clustering
            iv_diar = iv_clustering(local_diar, self.model, self.fs)

            # Modify the cluster ID to add the show id as a prefix
            for seg in iv_diar.segments:
                seg['show'] = show + '_' + seg['show']

            self.global_diar.segments += iv_diar.segments

            # Perform cross show iv-clustering
            cross_diar = iv_clustering(global_diar, self.model, self.fs)

            # Si on adapte le modèle PLDA
            self.model = adapt_plda(input_diar, model, features_server)

            outputs.write({'model' : self.model})   # maqnque une serialisation
            outputs.write({'diarization' : cross_diar})  # A modifier pour utiliser les formats créés par Olivier

        # always return True, it signals BEAT to continue processing
        """
        model = pickle.dumps(model).decode('latin-1')
        outputs['model'].write({'text': model}, end)

        # always return True, it signals BEAT to continue processing
        return True
Esempio n. 19
0
    net = Net()
    data = np.load("mean_std.npz")
    input_mean = data["mean"]
    input_std = data["std"]

    # split the list of files to process
    training_segment_sets = [seg_list[i:i + segment_buffer_size]
                             for i in range(0, len(seg_list), segment_buffer_size)]

    # Initialized cross validation error
    last_cv_error = -1 * numpy.inf

    for ep in range(nb_epoch):

        print("Start epoch {} / {}".format(ep + 1, nb_epoch))
        features_server = sidekit.FeaturesServer(**fs_params)
        running_loss = accuracy = n = nbatch = 0.0

        # Move model to requested device (GPU)
        net.to(device)

        # Set training parameters
        criterion = torch.nn.CrossEntropyLoss(reduction='sum')
        optimizer = torch.optim.Adam(net.parameters())

        for idx_mb, file_list in enumerate(training_segment_sets):
            l = []
            f = []
            for idx, val in enumerate(file_list):
                show, s, _, label = val
                #show = show.replace("/",os.path.sep)
Esempio n. 20
0
extractor.save_list(show_list=show_list,
                    channel_list=channel_list,
                    num_thread=nbThread)

# Create a FeaturesServer

features_server = sidekit.FeaturesServer(
    features_extractor=None,
    feature_filename_structure="./features/{}.h5",
    sources=None,
    dataset_list=["energy", "cep", "vad"],
    mask=None,
    feat_norm="cmvn",
    global_cmvn=None,
    dct_pca=False,
    dct_pca_config=None,
    sdc=False,
    sdc_config=None,
    delta=True,
    double_delta=True,
    delta_filter=None,
    context=None,
    traps_dct_nb=None,
    rasta=True,
    keep_all_features=False)

# Train the Universal background Model (UBM)

print('Train the UBM by EM')
# Extract all features and train a GMM without writing to disk
ubm = sidekit.Mixture()
Esempio n. 21
0
									ceps_number=13,
									# vad="snr",
									# snr=10,
									pre_emphasis=0.97,
									save_param=["energy", "cep", "fb"],
									keep_all_features=True)

server = sidekit.FeaturesServer(features_extractor=extractor,
								feature_filename_structure="chunks_features/all_train/{}.h5",
								# sources=None,
								dataset_list=["energy", "cep", "fb"],
								mask="[0-12]",
								feat_norm="cmvn",
								global_cmvn=None,
								dct_pca=False,
								dct_pca_config=None,
								sdc=False,
								# sdc_config=(1,3,7),
								delta=True,
								double_delta=True,
								delta_filter=None,
								context=None,
								traps_dct_nb=None,
								rasta=False,
								keep_all_features=True)


print('Train the UBM by EM')
# Extract all features and train a GMM without writing to disk
ubm = sidekit.Mixture()
llk = ubm.EM_split(server, utter_list, distribNb)#, num_thread=nbThread)
pickle.dump(ubm,open("ubm_64.pkl","wb"))
Esempio n. 22
0
        '/home/adit/Desktop/DCASE2017-baseline-system-master/Text_DCASE/fold1_train_names.txt'
) as inputFile:
    ubmList = inputFile.read().split('\n')
nameList = []
for a in ubmList:
    nameList.append(a.rsplit(".")[0])
features_server = sidekit.FeaturesServer(
    features_extractor=None,
    feature_filename_structure="../HDF5_DCASE/Features/{}.h5",
    sources=None,
    dataset_list=["fb"],
    mask=None,
    feat_norm='cms',  #'cmvn'
    global_cmvn=None,
    dct_pca=False,
    dct_pca_config=None,
    sdc=False,
    sdc_config=None,
    delta=False,
    double_delta=False,
    delta_filter=None,
    context=None,
    traps_dct_nb=None,
    rasta=False,
    keep_all_features=None)
print('Train the UBM by EM')
# Extract all features and train a GMM without writing to disk
distribNb = 512  #np.power(2,i)
ubm = sidekit.Mixture()
p = "/home/adit/Desktop/"
print('__1__llk for ' + str(distribNb))