Exemple #1
0
 def extract(self, url_list):
     labels = [item[1] for item in url_list]
     url_list = [item[0] for item in url_list]
     self.logger.info("Start extract %d audio." % len(url_list))
     param = zip(url_list, [self.sample_rate for i in range(len(url_list))],
                           [self.hop_length for i in range(len(url_list))],
                           [self.fix_len for i in range(len(url_list))])
     ys = ops.multi_processing(self._extract_one, param, self.process_num)
     if self.slides != [None, None] and self.slides != [0, 0]:
         slides = [self.slides for i in range(len(ys))]
         ys = ops.multi_processing(slide_windows, zip(ys, slides), self.process_num)
     return ys, labels
Exemple #2
0
def run(config):
    reader = TFrecordClassBalanceGen(config, 'train')
    train_urls = [os.path.join(config.save_path, 'url/train_tmp_0')]
    for train_url in train_urls:
        x, y = ext_fbank_feature(train_url, config)
        x = ops.multi_processing(limit_len, x, config.n_threads, True)
        reader.write(x, y)
        del x, y
def ext_mfcc_feature(url_path, config):
    """This function is used for extract MFCC feature of a dataset.

    Parameters
    ----------
    url_path : ``str``
        The path of the 'PATH' file.
    config : ``config``
        config of feature. (To decide if we need slide_window, and params of slide_window)

    Returns
    -------
    fbank : ``list``
        The feature array. each frame concat with the frame before and after it.
    label : ``list``
        The label of fbank feature.

    """
    logger = logging.getLogger('data')

    with open(url_path, 'r') as urls:
        labels = []
        url_list = []
        for url in list(urls):
            line, label = str(url).split(" ")
            index = eval(str(label).split("\n")[0])
            labels.append([index])
            url_list.append(line)

    logger.info("Extracting MFCC feature, utt_nums is %d" % len(url_list))

    n_filt = [config.feature_dims for i in range(len(url_list))]
    slides = [config.slides for i in range(len(url_list))]
    if hasattr(config, 'min_db'):
        dbs = [config.min_db for i in range(len(url_list))]
        mfccs = ops.multi_processing(calc_mfcc,
                                     zip(url_list, n_filt, slides, dbs),
                                     config.n_threads)
    else:
        mfccs = ops.multi_processing(calc_mfcc, zip(url_list, n_filt, slides),
                                     config.n_threads)
    logger.info("Extracting MFCC feature succeed")
    return mfccs, labels
Exemple #4
0
 def extract(self, url_list):
     paired_data = self.url_pair(url_list, self.source_num)
     self.logger.info("Start extract %d audio." % len(url_list))
     param = zip(paired_data, 
                 [self.sample_rate for i in range(len(url_list))],
                 [self.n_fft for i in range(len(url_list))],
                 [self.dims for i in range(len(url_list))],
                 [self.config.hop_length for i in range(url_list)],
                 [self.fix_len for i in range(len(url_list))])
     spec_dic = ops.multi_processing(self._extract_one, param, self.process_num)
     return spec_dic['data'], spec_dic['label']
def ext_fbank_feature(url_path, config):
    """This function is used for extract features of one dataset.

    Parameters
    ----------
    url_path : ``str``
        The path of the 'PATH' file.
    config : ``config``
        config of feature. (Contain the parameters of slide_window and feature_dims)

    Returns
    -------
    fbank : ``list``
        The feature array. each frame concat with the frame before and after it.
    label : ``list``
        The label of fbank feature.

    Notes
    -----
    Changeable concat size is in the todolist

    """
    logger = logging.getLogger('data')

    url_list = []
    labels = []
    with open(url_path, 'r') as urls:
        for line in list(urls):
            url, label = str(line).split(" ")
            index = eval(str(label).split("\n")[0])
            url_list.append(url)
            labels.append([index])

    logger.info("Extracting fbank feature, utt_nums is %d" % len(url_list))
    if config.fix_len is not None:
        max_len = get_max_audio_time(url_list)
        max_lens = [max_len for i in range(len(url_list))]
    else:
        max_lens = [None for i in range(len(url_list))]
    n_filt = [config.feature_dims for i in range(len(url_list))]
    if config.slides is not None:
        slide_l = [config.slides[0] for i in range(len(url_list))]
        slide_r = [config.slides[1] for i in range(len(url_list))]
    else:
        slide_l = [None for i in range(len(url_list))]
        slide_r = [None for i in range(len(url_list))]

    fbanks = ops.multi_processing(
        calc_fbank, zip(url_list, n_filt, slide_l, slide_r, max_lens),
        config.n_threads)

    logger.info("Extracting fbank feature succeed")
    return fbanks, labels
def ext_fbank_feature(url_path, config):
    """This function is used for extract features of one dataset.

    Parameters
    ----------
    url_path : ``str``
        The path of the 'PATH' file.
    config : ``config``
        config of feature. (To decide if we need slide_window, and params of slide_window)

    Returns
    -------
    fbank : ``list``
        The feature array. each frame concat with the frame before and after it.
    label : ``list``
        The label of fbank feature.

    Notes
    -----
    Changeable concat size is in the todolist

    """
    logger = logging.getLogger(config.model_name)

    url_list = []
    labels = []
    with open(url_path, 'r') as urls:
        for line in list(urls):
            url, label = str(line).split(" ")
            index = eval(str(label).split("\n")[0])
            url_list.append(url)
            labels.append([index])

    logger.info("Extracting fbank feature, utt_nums is %d" % len(url_list))

    n_filt = [config.feature_dims for i in range(len(url_list))]
    slides = [config.slides for i in range(len(url_list))]
    fbanks = ops.multi_processing(calc_fbank, zip(url_list, n_filt, slides),
                                  config.n_threads)

    logger.info("Extracting fbank feature succeed")
    return fbanks, labels
def ext_spec_feature(url_path, config):
    logger = logging.getLogger('data')

    with open(url_path, 'r') as urls:
        labels = []
        url_list = []
        for url in list(urls):
            line, label = str(url).split(" ")
            index = eval(str(label).split("\n")[0])
            labels.append([index])
            url_list.append(line)

    logger.info("Extracting Spec feature, utt_nums is %d" % len(url_list))
    NFFT = [config.NFFT for i in range(len(url_list))]
    frame_size = [config.frame_size for i in range(len(url_list))]
    (mag_spec,
     phase_spec) = ops.multi_processing(get_stft,
                                        zip(url_list, NFFT, frame_size),
                                        config.n_threads)

    logger.info("Extracting Spec feature succeed")

    return mag_spec, phase_spec
if __name__ == '__main__':
    config = TrainConfig('../config.json')
    config.save_path = '.'
    train_urls = [
        '/home/data/speaker-recognition/url/train_1',
        '/home/data/speaker-recognition/url/train_2',
        '/home/data/speaker-recognition/url/train_3'
    ]
    enroll_url = '/home/data/speaker-recognition/url/enroll'
    test_url = '/home/data/speaker-recognition/url/test'

    gen_train = TFrecordGen(config, 'Train.record')
    for train_url in train_urls:
        x, y = ext_fbank_feature(train_url, config)
        x = ops.multi_processing(limit_len, x, config.n_threads, True)
        gen_train.write(x, y)
        del x, y

    gen_enroll = TFrecordGen(config, 'Enroll.record')
    x, y = ext_fbank_feature(enroll_url, config)
    x = ops.multi_processing(limit_len, x, config.n_threads, True)
    gen_enroll.write(x, y)

    gen_test = TFrecordGen(config, 'Test.record')
    x, y = ext_fbank_feature(test_url, config)
    x = ops.multi_processing(limit_len, x, config.n_threads, True)
    gen_test.write(x, y)
    logger = logging.getLogger(config.model_name + '_train')

    #logger.info("Feature proccessing done.")