Example #1
0
    def extract_features(self, group):
        assert group in ["enroll", "test"],\
            "Invalid group name!! Choose either 'enroll', 'test'"
        in_files = os.listdir(os.path.join(self.audio_dir, group))
        feat_dir = os.path.join(self.feat_dir, group)

        extractor = sidekit.FeaturesExtractor(
            audio_filename_structure=os.path.join(self.audio_dir, group, "{}"),
            feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
            lower_frequency=self.LOWER_FREQUENCY,
            higher_frequency=self.HIGHER_FREQUENCY,
            filter_bank=self.FILTER_BANK,
            filter_bank_size=self.FILTER_BANK_SIZE,
            window_size=self.WINDOW_SIZE,
            shift=self.WINDOW_SHIFT,
            ceps_number=self.CEPS_NUMBER,
            vad=self.VAD,
            snr=self.SNR_RATIO,
            save_param=self.FEAUTRES,
            keep_all_features=True)

        show_list = np.unique(np.hstack([in_files]))
        channel_list = np.zeros_like(show_list, dtype=int)

        SKIPPED = []
        for show, channel in zip(show_list, channel_list):
            try:
                extractor.save(show, channel)
            except RuntimeError:
                logging.info("SKIPPED")
                SKIPPED.append(show)
                continue
        logging.info("Number of skipped files: " + str(len(SKIPPED)))
        for show in SKIPPED:
            logging.debug(show)
Example #2
0
def make_feature_server(dirname, frame_size):
    """Return a Sidekit FeatureServer instance for this
    experiement
    """

    sampling_frequency = 16000
    # window size must be twice the frame size to give the right number of FFT points but since
    # we can't zero pad, we'll be taking in more of the signal in each frame
    window_size =  (2* frame_size+1) / sampling_frequency
    shift = 0.008

    # make a feature server to compute features over our audio files
    extractor = sidekit.FeaturesExtractor(audio_filename_structure=dirname + "/{}.wav",
                                          sampling_frequency=sampling_frequency,
                                          lower_frequency=0,
                                          higher_frequency=sampling_frequency/2,
                                          filter_bank="lin",
                                          filter_bank_size=frame_size,
                                          window_size=window_size,
                                          shift=shift,
                                          ceps_number=20, 
                                          pre_emphasis=0.97,
                                          save_param=["fb"],
                                          keep_all_features=False)

    server = sidekit.FeaturesServer(features_extractor=extractor,
                                    sources=None,
                                    dataset_list=["fb"],
                                    keep_all_features=True)

    return server
Example #3
0
    def extractFeatures(self, group):
        """
        This function computes the acoustic parameters of audio files insied 
        "self.BASE_DIR/group":
        for a list of audio files and save them to disk in a HDF5 format
        """
        in_files = os.listdir(os.path.join(self.BASE_DIR, "audio", group))
        feat_dir = os.path.join(self.BASE_DIR, "feat", group)
        # Feature extraction
        # lower_frequency: lower frequency (in Herz) of the filter bank
        # higher_frequency: higher frequency of the filter bank
        # filter_bank: type of fiter scale to use, can be "lin" or "log" (for linear of log-scale)
        # filter_bank_size: number of filters banks
        # window_size: size of the sliding window to process (in seconds)
        # shift: time shift of the sliding window (in seconds)
        # ceps_number: number of cepstral coefficients to extract
        # snr: signal to noise ratio used for "snr" vad algorithm
        # vad: Type of voice activity detection algorithm to use.
        #      It Can be "energy", "snr", "percentil" or "lbl".
        # save_param: list of strings that indicate which parameters to save. The strings can be:
        # for bottle-neck features and "vad" for the frame selection labels.
        # keep_all_features: boolean, if True, all frames are writen; if False, keep only frames according to the vad label
        extractor = sidekit.FeaturesExtractor(
            audio_filename_structure=os.path.join(self.BASE_DIR, "audio",
                                                  group, "{}"),
            feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
            lower_frequency=self.LOWER_FREQUENCY,
            higher_frequency=self.HIGHER_FREQUENCY,
            filter_bank=self.FILTER_BANK,
            filter_bank_size=self.FILTER_BANK_SIZE,
            window_size=self.WINDOW_SIZE,
            shift=self.WINDOW_SHIFT,
            ceps_number=self.CEPS_NUMBER,
            vad=self.VAD,
            snr=self.SNR_RATIO,
            save_param=self.FEAUTRES,
            keep_all_features=True)

        # Prepare file lists
        # show_list: list of IDs of the show to process
        show_list = np.unique(np.hstack([in_files]))
        # channel_list: list of channel indices corresponding to each show
        channel_list = np.zeros_like(show_list, dtype=int)

        # save the features in feat_dir where the resulting-files parameters
        # are always concatenated in the following order:
        # (energy, fb, cep, bnf, vad_label).
        # SKIPPED: list to track faulty-files
        SKIPPED = []
        for show, channel in zip(show_list, channel_list):
            try:
                extractor.save(show, channel)
            except RuntimeError:
                logging.info("SKIPPED")
                SKIPPED.append(show)
                continue
        logging.info("Number of skipped files: " + str(len(SKIPPED)))
        for show in SKIPPED:
            logging.debug(show)
Example #4
0
def extractFeature(audio_dir, feature_dir, cep='mfcc', train=True):
    '''
    param:
        audio_dir   : path to audio dir
        feature_dir : path to feature dir
        cep         : cepstrum type 'mfcc' or 'plp'
        train       : if True, extract feature from train set, else, from test set
    return:
        None
    '''
    # if feature dir not exists, create feature dir
    if not os.path.exists(feature_dir):
        os.mkdir(feature_dir)

    wavlist = []
    if train:
        wav_list_file = os.getcwd() + '/log/aishell2_train.log'
    else:
        wav_list_file = os.getcwd() + '/log/aishell2_test.log'

    if os.path.exists(wav_list_file):
        with open(wav_list_file, 'r') as fobj:
            for i in fobj:
                wavlist.append(i[0:-1])
    else:
        wavlist = readDirStruct(audio_dir, train)

    # prepare the necessary variables
    showlist = np.asarray(wavlist)
    channellist = np.zeros_like(showlist, dtype=int)
    # create feature extractor
    extractor = sidekit.FeaturesExtractor(
        audio_filename_structure=audio_dir + '/{}.wav',
        feature_filename_structure=feature_dir + "/{}.h5",
        sampling_frequency=16000,
        lower_frequency=133.3333,
        higher_frequency=6955.4976,
        filter_bank="log",
        filter_bank_size=64,
        window_size=0.025,
        shift=0.01,
        ceps_number=20,
        vad="snr",
        snr=40,
        pre_emphasis=0.97,
        save_param=["vad", "energy", "cep", "fb"],
        feature_type=cep,
        keep_all_features=True)
    # save the feature
    print('start extracting feature')
    extractor.save_list(show_list=showlist,
                        channel_list=channellist,
                        num_thread=cpu_count() // 2)
    print('extract feature done')
Example #5
0
def make_feature_server():
    """Return a Sidekit FeatureServer instance for this
    experiement

    config:  DATA_DIR, FEAT_DIR

    """
    dd = config('DATA_DIR')
    fd = config('FEAT_DIR')

    # TODO: more of these settings should be derived from the config file

    # make a feature server to compute features over our audio files
    extractor = sidekit.FeaturesExtractor(audio_filename_structure=dd+'{}.wav',
                                          feature_filename_structure=fd+"{}.h5",
                                          sampling_frequency=None,
                                          lower_frequency=200,
                                          higher_frequency=3800,
                                          filter_bank="log",
                                          filter_bank_size=24,
                                          window_size=0.025,
                                          shift=0.01,
                                          ceps_number=20,
                                          vad="snr",
                                          snr=40,
                                          pre_emphasis=0.97,
                                          save_param=["vad", "energy", "cep", "fb"],
                                          keep_all_features=True)

    server = sidekit.FeaturesServer(features_extractor=extractor,
                                    feature_filename_structure=fd+"{}.h5",
                                    sources=None,
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    global_cmvn=None,
                                    dct_pca=False,
                                    dct_pca_config=None,
                                    sdc=False,
                                    sdc_config=None,
                                    delta=True,
                                    double_delta=True,
                                    delta_filter=None,
                                    context=None,
                                    traps_dct_nb=None,
                                    rasta=True,
                                    keep_all_features=True)

    return server
Example #6
0
    def extract_speaker_data(self, speaker_files):
        curr_speaker_num = 0
        max_speakers = len(speaker_files.keys())
        file_names = []
        speaker_ids = []

        fe = sidekit.FeaturesExtractor(
            audio_filename_structure="{}.wav",
            feature_filename_structure="common/data/training/i_vector/" +
            self.speaker_list + "/feat/{}.h5",
            sampling_frequency=None,
            lower_frequency=200,
            higher_frequency=3800,
            filter_bank="log",
            filter_bank_size=24,
            window_size=0.025,
            shift=0.01,
            ceps_number=20,
            vad="snr",
            snr=40,
            pre_emphasis=0.97,
            save_param=["energy", "cep", "vad"],
            keep_all_features=True)

        # Crawl the base and all sub folders
        for speaker in speaker_files.keys():
            curr_speaker_num += 1
            speaker_uid = curr_speaker_num

            print('Extraction progress: %d/%d' %
                  (curr_speaker_num, max_speakers))

            # Extract files
            for full_path in speaker_files[speaker]:
                file_name = os.path.splitext(os.path.basename(full_path))[0]
                fe.save(show=speaker + '_' + file_name,
                        channel=0,
                        input_audio_filename=full_path,
                        output_feature_filename=None,
                        noise_file_name=None,
                        snr=10,
                        reverb_file_name=None,
                        reverb_level=-26.0)
                file_names.append(speaker + '_' + file_name)
                speaker_ids.append(str(speaker_uid))

        return file_names, speaker_ids
Example #7
0
def featureExtractor(subFolder_path):
    e = sidekit.FeaturesExtractor(
        audio_filename_structure=subFolder_path + '{}',
        feature_filename_structure=None,  #subFolder_path + '{}.h5',
        sampling_frequency=8000,
        lower_frequency=0,
        higher_frequency=4000,
        filter_bank="log",
        filter_bank_size=24,
        window_size=0.025,
        shift=0.01,
        ceps_number=20,
        vad="snr",
        snr=40,
        pre_emphasis=0.97,
        save_param=["vad", "energy", "cep", "fb"],
        keep_all_features=False)
    return e
Example #8
0
def extract_feat(args):
    # wav directory and feature directory
    audio_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/wav'
    features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature'
    wavlist = []
    if os.path.exists(os.getcwd() + '/log/aishell2_wavlist.log'):
        with open(os.getcwd() + '/log/aishell2_wavlist.log', 'r') as fobj:
            for i in fobj:
                wavlist.append(i[0:-1])
    else:
        wavlist = preprocess()

    # prepare the necessary variables
    showlist = np.asarray(wavlist)
    channellist = np.zeros_like(showlist, dtype=int)

    # create feature extractor
    extractor = sidekit.FeaturesExtractor(
        audio_filename_structure=audio_folder + '/{}.wav',
        feature_filename_structure=features_folder + "/{}.h5",
        sampling_frequency=16000,
        lower_frequency=100.0,
        higher_frequency=7000.0,
        filter_bank="log",
        filter_bank_size=64,
        window_size=0.025,
        shift=0.01,
        ceps_number=24,
        vad="snr",
        snr=40,
        pre_emphasis=0.97,
        save_param=["vad", "energy", "cep", "fb"],
        keep_all_features=True)

    # save the feature
    extractor.save_list(show_list=showlist,
                        channel_list=channellist,
                        num_thread=args.num_thread)
    def make_mfcc_feats(self, wavList, input_file_list, output_feat_list, nj):

        extractor = sidekit.FeaturesExtractor(audio_filename_structure=None,
                                              feature_filename_structure=None,
                                              sampling_frequency=8000,
                                              lower_frequency=20,
                                              higher_frequency=3700,
                                              filter_bank='log',
                                              filter_bank_size=24,
                                              window_size=0.025,
                                              shift=0.01,
                                              ceps_number=20,
                                              vad='percentil',
                                              snr=40,
                                              pre_emphasis=0.97,
                                              save_param=["energy", "cep", "vad"],
                                              keep_all_features=False
                                              )

        print("Start extracting the features")
        channel_list = numpy.zeros(len(wavList), dtype='int')
        extractor.save_list(show_list=wavList, channel_list=channel_list, audio_file_list=input_file_list,
                            feature_file_list=output_feat_list, num_thread=nj)
        print("extracting the features success")
Example #10
0
) as inputFile:
    ubmList = inputFile.read().split('\n')
nameList = []
for a in ubmList:
    nameList.append(a.rsplit('.')[0])

directory = '/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/'
extractor = sidekit.FeaturesExtractor(
    audio_filename_structure=
    "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/{}.wav",
    feature_filename_structure="/home/adit/Desktop/shikha_maam/Features/{}.h5",
    sampling_frequency=16000,
    lower_frequency=10,
    higher_frequency=8000,
    filter_bank="log",
    filter_bank_size=40,
    window_size=0.040,
    ceps_number=12,
    shift=0.020,
    vad=None,
    snr=None,
    pre_emphasis=0.97,
    save_param=["fb"],
    keep_all_features=None)

show_list = np.unique([nameList])
channel_list = np.zeros_like(show_list, dtype=int)

logging.info("Extract features and save to disk")
extractor.save_list(show_list=show_list, channel_list=channel_list)
Example #11
0
distribNb = 64  # number of Gaussian distributions for each GMM
audioDir = 'chunks_data/all_train/'

# Automatically set the number of parallel process to run.
# The number of threads to run is set equal to the number of cores available
# on the machine minus one or to 1 if the machine has a single core.
nbThread = max(multiprocessing.cpu_count()-1, 1)

extractor = sidekit.FeaturesExtractor(audio_filename_structure="chunks_data/all_train/{}",
									feature_filename_structure="chunks_features/all_train/{}.h5",
									sampling_frequency=None,
									lower_frequency=200,
									higher_frequency=4000,
									filter_bank="log",
									filter_bank_size=26,
									window_size=0.025,
									shift=0.01,
									ceps_number=13,
									# vad="snr",
									# snr=10,
									pre_emphasis=0.97,
									save_param=["energy", "cep", "fb"],
									keep_all_features=True)

server = sidekit.FeaturesServer(features_extractor=extractor,
								feature_filename_structure="chunks_features/all_train/{}.h5",
								# sources=None,
								dataset_list=["energy", "cep", "fb"],
								mask="[0-12]",
								feat_norm="cmvn",
								global_cmvn=None,
Example #12
0
    input_file_list.append(filepath)
    output_feat_list.append(os.path.join(feat_fir, 'train', uttId + '.h5'))
    output_feat_lines.append(
        os.path.join(feat_fir, 'train', uttId + '.h5') + '\n')

with open(test_feats_scp_paath, 'w') as f:
    f.writelines(output_feat_lines)

extractor_eval = sidekit.FeaturesExtractor(audio_filename_structure=None,
                                           feature_filename_structure=None,
                                           sampling_frequency=44100,
                                           lower_frequency=20,
                                           higher_frequency=3700,
                                           filter_bank='log',
                                           filter_bank_size=24,
                                           window_size=0.025,
                                           shift=0.01,
                                           ceps_number=20,
                                           vad='energy',
                                           snr=40,
                                           pre_emphasis=0.97,
                                           save_param=["energy", "cep", "vad"],
                                           keep_all_features=False)
print("Start extracting enroll features")
channel_list = np.zeros(len(enroll_list), dtype='int')
extractor_eval.save_list(show_list=enroll_list,
                         channel_list=channel_list,
                         audio_file_list=input_file_list,
                         feature_file_list=output_feat_list,
                         num_thread=4)
Example #13
0
test_ndx = sidekit.Ndx('/info/home/larcher/task/3sess-pwd_eval_m_ndx.h5')
key = sidekit.Key('/info/home/larcher/task/3sess-pwd_eval_m_key.h5')
with open('/info/home/larcher/task/ubm_list.txt') as inputFile:
    ubmList = inputFile.read().split('\n')

# Process the audio to save MFCC on disk

logging.info("Initialize FeaturesExtractor")
extractor = sidekit.FeaturesExtractor(
    audio_filename_structure=audioDir + "/{}.wav",
    feature_filename_structure="./features/{}.h5",
    sampling_frequency=16000,
    lower_frequency=133.3333,
    higher_frequency=6955.4976,
    filter_bank="log",
    filter_bank_size=40,
    window_size=0.025,
    shift=0.01,
    ceps_number=19,
    vad="snr",
    snr=40,
    pre_emphasis=0.97,
    save_param=["vad", "energy", "cep"],
    keep_all_features=False)

# Get the complete list of features to extract
show_list = np.unique(
    np.hstack([ubmList, enroll_idmap.rightids,
               np.unique(test_ndx.segset)]))
channel_list = np.zeros_like(show_list, dtype=int)

logging.info("Extract features and save to disk")
    def __init__(self,
                 input_filepath_structure=None,
                 feature_filename_structure=None,
                 mel_feature_filename_structure=None,
                 sampling_frequency=16000,
                 lower_frequency=200,
                 higher_frequency_mel=3800,
                 filter_bank="log",
                 filter_bank_size=24,
                 window_size=0.025,
                 shift=0.01,
                 ceps_number=20,
                 snr=40,
                 pre_emphasis=0.97):

        self.__sk_lin_feature_extractor = sk.FeaturesExtractor(
            audio_filename_structure=input_filepath_structure,
            feature_filename_structure=feature_filename_structure,
            lower_frequency=lower_frequency,
            higher_frequency=sampling_frequency / 2,
            filter_bank="lin",
            filter_bank_size=filter_bank_size,
            window_size=window_size,
            shift=shift,
            ceps_number=ceps_number,
            vad="snr",
            snr=snr,
            save_param=["cep", "fb"],
            sampling_frequency=sampling_frequency,
            keep_all_features=True,
            pre_emphasis=pre_emphasis,
        )
        self.__sk_mel_feature_extractor = sk.FeaturesExtractor(
            audio_filename_structure=input_filepath_structure,
            feature_filename_structure=mel_feature_filename_structure,
            lower_frequency=lower_frequency,
            higher_frequency=higher_frequency_mel,
            filter_bank="log",
            filter_bank_size=filter_bank_size,
            window_size=window_size,
            shift=shift,
            ceps_number=ceps_number,
            vad="snr",
            snr=snr,
            save_param=["vad", "energy", "cep", "fb"],
            sampling_frequency=sampling_frequency,
            keep_all_features=True,
            pre_emphasis=pre_emphasis,
        )
        self.input_filepath_structure = input_filepath_structure
        self.feature_filename_structure = feature_filename_structure
        self.mel_feature_filename_structure = mel_feature_filename_structure
        self.sampling_frequency = sampling_frequency
        self.lower_frequency = lower_frequency
        self.higher_frequency_mel = higher_frequency_mel
        self.filter_bank_size = filter_bank_size
        self.window_size = window_size
        self.shift = shift
        self.ceps_number = ceps_number
        self.snr = snr
        self.pre_emphasis = pre_emphasis
Example #15
0
directory = os.fsencode("/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio")#"/home/adit/Desktop")# )
#os.chdir = "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/"

for file in os.listdir(directory):
	filename = os.fsdecode(file)
	if filename.endswith(".wav"): 
	# print(os.path.join(directory, filename))
		
		extractor = sidekit.FeaturesExtractor(audio_filename_structure = "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio/"+filename,
												feature_filename_structure = "/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio_mfec_hdf5/"+filename+".h5",
												sampling_frequency= 44100,
												lower_frequency=20,
												higher_frequency = 22000,
												filter_bank = "log",
												filter_bank_size = 40,
												window_size = 0.040,
												ceps_number = 12,
												shift = 0.020,
												vad = "snr",
												snr = 40,
												pre_emphasis= 0.97,
												save_param = ["fb"],
												keep_all_features = True)
		extractor.save("")
		
		file = h5py.File('/home/adit/Desktop/DCASE2017-baseline-system-master/applications/data/TUT-acoustic-scenes-2017-development/audio_mfec_hdf5/'+filename+'.h5','r+')
		data = file['/fb']
		#print(data)
		tocsv = data[:,:]
		data1 = file['/fb_mean']
		tomean = data1[:,]
inputpath = "/home/aditya/Desktop/MFCCs"

show_list = []
channel_list = []
for filename in os.listdir(inputpath+"/accfiles"):
	show_list += [os.path.splitext(filename)[0]]
	channel_list += [0]

extractor = sidekit.FeaturesExtractor(audio_filename_structure=inputpath+"/accfiles/"+"{}.wav",
	                                  feature_filename_structure=inputpath+"/features/"+"{}.hd5",
	                                  sampling_frequency=8000,
	                                  lower_frequency=200,
	                                  higher_frequency=3800,
	                                  filter_bank="log",
	                                  filter_bank_size=24,
	                                  window_size=0.02,
	                                  shift=0.01,
	                                  ceps_number=20,
	                                  vad="snr",
	                                  snr=40,
	                                  pre_emphasis=0.97,
	                                  save_param=["energy", "cep"],
	                                  keep_all_features=True)

extractor.save_list(show_list=show_list,
                    channel_list=channel_list,
                    num_thread=6)