예제 #1
0
    def feature_normalisation(self, in_file_list, out_file_list):
        logger = logging.getLogger('feature_normalisation')
        
#        self.feature_dimension = feature_dimension
        try:
            assert len(in_file_list) == len(out_file_list)
        except  AssertionError:
            logger.critical('The input and output file numbers are not the same! %d vs %d' %(len(in_file_list), len(out_file_list)))
            raise

        if self.mean_vector == None:
            self.mean_vector = self.compute_mean(in_file_list, 0, self.feature_dimension)
        if self.std_vector  == None:
            self.std_vector = self.compute_std(in_file_list, self.mean_vector, 0, self.feature_dimension)
        
        io_funcs = HTKFeat_read()
        file_number = len(in_file_list)
        for i in xrange(file_number):
            features, current_frame_number = io_funcs.getall(in_file_list[i])
#            print   current_frame_number
#            features = io_funcs.data
#            current_frame_number = io_funcs.n_samples

            mean_matrix = numpy.tile(self.mean_vector, (current_frame_number, 1))
            std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1))
            
            norm_features = (features - mean_matrix) / std_matrix

            htk_writer  = HTKFeat_write(veclen=io_funcs.veclen, sampPeriod=io_funcs.sampPeriod, paramKind=9)            
            htk_writer.writeall(norm_features, out_file_list[i])

#            htk_writter = HTK_Parm_IO(n_samples=io_funcs.n_samples, samp_period=io_funcs.samp_period, samp_size=io_funcs.samp_size, param_kind=io_funcs.param_kind, data=norm_features)    
#            htk_writter.write_htk(out_file_list[i])

        return  self.mean_vector, self.std_vector
예제 #2
0
    def compute_mean(self, file_list, start_index, end_index):

        logger = logging.getLogger('feature_normalisation')
        
        local_feature_dimension = end_index - start_index
        
        mean_vector = numpy.zeros((1, local_feature_dimension))
        all_frame_number = 0

        io_funcs = HTKFeat_read()
        for file_name in file_list:
            features, current_frame_number = io_funcs.getall(file_name)
#            io_funcs = HTK_Parm_IO()
#            io_funcs.read_htk(file_name)
#            features = io_funcs.data
#            current_frame_number = io_funcs.n_samples

            mean_vector += numpy.reshape(numpy.sum(features[:, start_index:end_index], axis=0), (1, local_feature_dimension))
            all_frame_number += current_frame_number
            
        mean_vector /= float(all_frame_number)

        # setting the print options in this way seems to break subsequent printing of numpy float32 types
        # no idea what is going on - removed until this can be solved
        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed mean vector of length %d :' % mean_vector.shape[1] )
        logger.info(' mean: %s' % mean_vector)
        # restore the print options
        # numpy.set_printoptions(po)
        
        self.mean_vector = mean_vector
        
        return  mean_vector
예제 #3
0
    def compute_std(self, file_list, mean_vector, start_index, end_index):
    
        logger = logging.getLogger('feature_normalisation')
        
        local_feature_dimension = end_index - start_index

        std_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = HTKFeat_read()
        for file_name in file_list:
            features, current_frame_number = io_funcs.getall(file_name)

            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))
            
            std_vector += numpy.reshape(numpy.sum((features[:, start_index:end_index] - mean_matrix) ** 2, axis=0), (1, local_feature_dimension))
            all_frame_number += current_frame_number
            
        std_vector /= float(all_frame_number)
        
        std_vector = std_vector ** 0.5
        
        # setting the print options in this way seems to break subsequent printing of numpy float32 types
        # no idea what is going on - removed until this can be solved
        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed  std vector of length %d' % std_vector.shape[1] )
        logger.info('  std: %s' % std_vector)
        # restore the print options
        # numpy.set_printoptions(po)
        
        self.std_vector = std_vector
        
        return  std_vector
예제 #4
0
    def compute_std(self, file_list, mean_vector, start_index, end_index):

        logger = logging.getLogger('feature_normalisation')

        local_feature_dimension = end_index - start_index

        std_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = HTKFeat_read()
        for file_name in file_list:
            features, current_frame_number = io_funcs.getall(file_name)

            std_vector += numpy.reshape(
                numpy.sum(
                    (features[:, start_index:end_index] - mean_vector)**2,
                    axis=0), (1, local_feature_dimension))
            all_frame_number += current_frame_number

        std_vector /= float(all_frame_number)

        std_vector = std_vector**0.5

        # setting the print options in this way seems to break subsequent printing of numpy float32 types
        # no idea what is going on - removed until this can be solved
        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed  std vector of length %d' % std_vector.shape[1])
        logger.info('  std: %s' % std_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        self.std_vector = std_vector

        return std_vector
예제 #5
0
    def prepare_training(self, file_id_list_name, wav_dir, lab_dir, work_dir,
                         multiple_speaker):

        print('---preparing enverionment')
        self.cfg_dir = os.path.join(work_dir, 'config')
        self.model_dir = os.path.join(work_dir, 'model')
        self.cur_dir = os.path.join(self.model_dir, 'hmm0')
        if not os.path.exists(self.cfg_dir):
            os.makedirs(self.cfg_dir)
        if not os.path.exists(self.cur_dir):
            os.makedirs(self.cur_dir)

        self.phonemes = os.path.join(work_dir, 'mono_phone.list')
        self.phoneme_map = os.path.join(work_dir, 'phoneme_map.dict')
        # HMMs
        self.proto = os.path.join(self.cfg_dir, 'proto')
        # SCP files
        self.copy_scp = os.path.join(self.cfg_dir, 'copy.scp')
        self.test_scp = os.path.join(self.cfg_dir, 'test.scp')
        self.train_scp = os.path.join(self.cfg_dir, 'train.scp')
        # CFG
        self.cfg = os.path.join(self.cfg_dir, 'cfg')

        self.wav_dir = wav_dir
        self.lab_dir = lab_dir
        self.mfc_dir = os.path.join(work_dir, 'mfc')
        if not os.path.exists(self.mfc_dir):
            os.makedirs(self.mfc_dir)

        self.mono_lab_dir = os.path.join(work_dir, 'mono_no_align')
        if not os.path.exists(self.mono_lab_dir):
            os.makedirs(self.mono_lab_dir)

        file_id_list = self._read_file_list(file_id_list_name)
        print('---checking data')
        speaker_utt_dict = self._check_data(file_id_list, multiple_speaker)

        print('---extracting features')
        self._HCopy()
        print(time.strftime("%c"))
        print('---feature_normalisation')
        io_funcs = HTKFeat_read()
        htk_writer = HTKFeat_write(veclen=io_funcs.veclen,
                                   sampPeriod=io_funcs.sampPeriod,
                                   paramKind=9)
        normaliser = Statis(feature_dimension=39,
                            read_func=io_funcs.getall,
                            writer_func=htk_writer.writeall)

        for key_name in list(speaker_utt_dict.keys()):
            normaliser.feature_normalisation(
                speaker_utt_dict[key_name],
                speaker_utt_dict[key_name])  ## save to itself
        print(time.strftime("%c"))

        print('---making proto')
        self._make_proto()
예제 #6
0
    def feature_normalisation(self, in_file_list, out_file_list):
        logger = logging.getLogger('feature_normalisation')

        #        self.feature_dimension = feature_dimension
        try:
            assert len(in_file_list) == len(out_file_list)
        except AssertionError:
            logger.critical(
                'The input and output file numbers are not the same! %d vs %d'
                % (len(in_file_list), len(out_file_list)))
            raise

        # TODO: Compute them in one pass.
        if self.mean_vector == None:
            self.mean_vector = self.compute_mean(in_file_list, 0,
                                                 self.feature_dimension)
        if self.std_vector == None:
            self.std_vector = self.compute_std(in_file_list, self.mean_vector,
                                               0, self.feature_dimension)

        io_funcs = HTKFeat_read()
        file_number = len(in_file_list)
        for i in range(file_number):
            features, current_frame_number = io_funcs.getall(in_file_list[i])
            #            print   current_frame_number
            #            features = io_funcs.data
            #            current_frame_number = io_funcs.n_samples

            # mean_matrix = numpy.tile(self.mean_vector, (current_frame_number, 1))  # Use numpy broadcasting instead.
            # std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1))

            norm_features = (features - self.mean_vector) / self.std_vector

            htk_writer = HTKFeat_write(veclen=io_funcs.veclen,
                                       sampPeriod=io_funcs.sampPeriod,
                                       paramKind=9)
            htk_writer.writeall(norm_features, out_file_list[i])

#            htk_writter = HTK_Parm_IO(n_samples=io_funcs.n_samples, samp_period=io_funcs.samp_period, samp_size=io_funcs.samp_size, param_kind=io_funcs.param_kind, data=norm_features)
#            htk_writter.write_htk(out_file_list[i])

        return self.mean_vector, self.std_vector
예제 #7
0
def get_data_4_predict(x_dir, y_dir, is_y=True):
    if is_y:
        x_tmp = []
        y_tmp = []
        f_tmp = []
        for item in os.listdir(x_dir):
            if item.endswith(".htk"):
                # read the mfcc features
                reader = HTKFeat_read(x_dir + item)
                matrix = reader.getall()
                x_tmp.append(matrix)
                labels = np.loadtxt(y_dir + item.replace("_16.htk", '.txt'))
                y_tmp.append(labels)
                f_tmp.append([item, len(labels)])
        x = np.array(x_tmp)
        y = np.array(y_tmp)
        f_names = np.array(f_tmp)
        return x, y, f_names
    else:
        x_tmp = []
        f_tmp = []
        for item in os.listdir(x_dir):
            if item.endswith(".htk"):
                # read the mfcc features
                reader = HTKFeat_read(x_dir + item)
                matrix = reader.getall()
                x_tmp.append(matrix)
                f_tmp.append([item, len(matrix)])
        x = np.array(x_tmp)
        f_names = np.array(f_tmp)
        return x, f_names
예제 #8
0
    def compute_mean(self, file_list, start_index, end_index):

        logger = logging.getLogger('feature_normalisation')

        local_feature_dimension = end_index - start_index

        mean_vector = numpy.zeros((1, local_feature_dimension))
        all_frame_number = 0

        io_funcs = HTKFeat_read()
        for file_name in file_list:
            features, current_frame_number = io_funcs.getall(file_name)
            #            io_funcs = HTK_Parm_IO()
            #            io_funcs.read_htk(file_name)
            #            features = io_funcs.data
            #            current_frame_number = io_funcs.n_samples

            mean_vector += numpy.reshape(
                numpy.sum(features[:, start_index:end_index], axis=0),
                (1, local_feature_dimension))
            all_frame_number += current_frame_number

        mean_vector /= float(all_frame_number)

        # setting the print options in this way seems to break subsequent printing of numpy float32 types
        # no idea what is going on - removed until this can be solved
        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed mean vector of length %d :' %
                    mean_vector.shape[1])
        logger.info(' mean: %s' % mean_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        self.mean_vector = mean_vector

        return mean_vector