コード例 #1
0
    def get_frame(self, file_name, frame_id):
        """Returns a frame from a specific param file."""
        if self.last_file_name != file_name:
            self.last_file_name = file_name

            # print "FN", file_name

            # find matching param file
            param_file_name = self.get_param_file_name(file_name)
            if param_file_name == None:
                raise Exception("MLFMFCCOnlineAlignedArray: param_file_name cannot be None, file_name: " + file_name)
            # print "PFN", param_file_name

            # open the param file
            try:
                self.last_param_file_features = wave.open(param_file_name, 'r')
            except AttributeError:
                print "Error opening file:", param_file_name

            if self.last_param_file_features.getnchannels() != 1:
                raise Exception('Input wave is not in mono')

            if self.last_param_file_features.getsampwidth() != 2:
                raise Exception('Input wave is not in 16bit')

            sample_rate = self.last_param_file_features.getframerate()
            self.frame_size = int(sample_rate * self.windowsize / 10000000)
            if self.frame_size > 1024:
                self.frame_size = 2048
            elif self.frame_size > 512:
                self.frame_size = 1024
            elif self.frame_size > 256:
                self.frame_size = 512
            elif self.frame_size > 128:
                self.frame_size = 256
            elif self.frame_size > 64:
                self.frame_size = 128

            self.frame_shift = int(sample_rate * self.targetrate / 10000000)
            self.mfcc_front_end = MFCCFrontEnd(sample_rate, self.frame_size, usec0=self.usec0,
                                               usedelta=self.usedelta, useacc=self.useacc,
                                               n_last_frames=self.n_last_frames, mel_banks_only = self.mel_banks_only)

        # print "FS", self.frame_size
        self.last_param_file_features.setpos(max(frame_id * self.frame_shift - int(self.frame_size / 2), 0))
        frame = self.last_param_file_features.readframes(self.frame_size)
        # print "LN", len(frame)

        frame = numpy.frombuffer(frame, dtype=numpy.int16)

        try:
            mfcc_params = self.mfcc_front_end.param(frame)
        except ValueError:
            print file_name, frame_id, len(frame)
            raise
            
        return mfcc_params
コード例 #2
0
ファイル: htk.py プロジェクト: tkraut/alex
    def get_frame(self, file_name, frame_id):
        """Returns a frame from a specific param file."""
        if self.last_file_name != file_name:
            self.last_file_name = file_name

            # print "FN", file_name

            # find matching param file
            param_file_name = self.get_param_file_name(file_name)

            # print "PFN", param_file_name

            # open the param file
            self.last_param_file_features = wave.open(param_file_name, 'r')

            if self.last_param_file_features.getnchannels() != 1:
                raise Exception('Input wave is not in mono')

            if self.last_param_file_features.getsampwidth() != 2:
                raise Exception('Input wave is not in 16bit')

            sample_rate = self.last_param_file_features.getframerate()
            self.frame_size = int(sample_rate * self.windowsize / 10000000)
            if self.frame_size > 1024:
                self.frame_size = 2048
            elif self.frame_size > 512:
                self.frame_size = 1024
            elif self.frame_size > 256:
                self.frame_size = 512
            elif self.frame_size > 128:
                self.frame_size = 256
            elif self.frame_size > 64:
                self.frame_size = 128

            self.frame_shift = int(sample_rate * self.targetrate / 10000000)
            self.mfcc_front_end = MFCCFrontEnd(
                sample_rate,
                self.frame_size,
                usec0=self.usec0,
                usedelta=self.usedelta,
                useacc=self.useacc,
                n_last_frames=self.n_last_frames)

        # print "FS", self.frame_size
        self.last_param_file_features.setpos(
            max(frame_id * self.frame_shift - int(self.frame_size / 2), 0))
        frame = self.last_param_file_features.readframes(self.frame_size)
        # print "LN", len(frame)

        frame = numpy.frombuffer(frame, dtype=numpy.int16)

        mfcc_params = self.mfcc_front_end.param(frame)

        return mfcc_params
コード例 #3
0
    def __init__(self, cfg):
        self.cfg = cfg

        self.audio_recorded_in = []

        self.ffnn = TheanoFFNN()
        self.ffnn.load(self.cfg['VAD']['ffnn']['model'])

        self.log_probs_speech = deque(
            maxlen=self.cfg['VAD']['ffnn']['filter_length'])
        self.log_probs_sil = deque(
            maxlen=self.cfg['VAD']['ffnn']['filter_length'])

        self.last_decision = 0.0

        if self.cfg['VAD']['ffnn']['frontend'] == 'MFCC':
            self.front_end = MFCCFrontEnd(
                self.cfg['Audio']['sample_rate'],
                self.cfg['VAD']['ffnn']['framesize'],
                self.cfg['VAD']['ffnn']['usehamming'],
                self.cfg['VAD']['ffnn']['preemcoef'],
                self.cfg['VAD']['ffnn']['numchans'],
                self.cfg['VAD']['ffnn']['ceplifter'],
                self.cfg['VAD']['ffnn']['numceps'],
                self.cfg['VAD']['ffnn']['enormalise'],
                self.cfg['VAD']['ffnn']['zmeansource'],
                self.cfg['VAD']['ffnn']['usepower'],
                self.cfg['VAD']['ffnn']['usec0'],
                self.cfg['VAD']['ffnn']['usecmn'],
                self.cfg['VAD']['ffnn']['usedelta'],
                self.cfg['VAD']['ffnn']['useacc'],
                self.cfg['VAD']['ffnn']['n_last_frames'] +
                self.cfg['VAD']['ffnn']['n_prev_frames'],
                self.cfg['VAD']['ffnn']['lofreq'],
                self.cfg['VAD']['ffnn']['hifreq'],
                self.cfg['VAD']['ffnn']['mel_banks_only'])
        else:
            raise ASRException('Unsupported frontend: %s' %
                               (self.cfg['VAD']['ffnn']['frontend'], ))
コード例 #4
0
ファイル: ffnn.py プロジェクト: henrypig/alex-1
    def __init__(self, model, filter_length, sample_rate, framesize,
                 frameshift, usehamming, preemcoef, numchans, ceplifter,
                 numceps, enormalise, zmeansource, usepower, usec0, usecmn,
                 usedelta, useacc, n_last_frames, n_prev_frames, lofreq,
                 hifreq, mel_banks_only):
        self.audio_recorded_in = []

        self.ffnn = TheanoFFNN()
        self.ffnn.load(model)

        self.log_probs_speech = deque(maxlen=filter_length)
        self.log_probs_sil = deque(maxlen=filter_length)

        self.last_decision = 0.0

        self.front_end = MFCCFrontEnd(sample_rate, framesize, usehamming,
                                      preemcoef, numchans, ceplifter, numceps,
                                      enormalise, zmeansource, usepower, usec0,
                                      usecmn, usedelta, useacc,
                                      n_last_frames + n_prev_frames, lofreq,
                                      hifreq, mel_banks_only)

        self.framesize = framesize
        self.frameshift = frameshift