Beispiel #1
0
    def comp_feat(self, sig, rate):
        """
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        """

        # snip the edges
        sig = snip(sig, rate, float(self.conf['winlen']),
                   float(self.conf['winstep']))

        if 'scipy' in self.conf and self.conf['scipy'] == 'True':
            feat, energy = base.logfbank_scipy(sig, rate, self.conf)
        else:
            feat, energy = base.logfbank(sig, rate, self.conf)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        return feat
Beispiel #2
0
def testit(X, Y, batch_size, MODEL):
    n_batch = X.shape[0] / batch_size
    y_hat = []
    accu = []
    for batch in xrange(n_batch):
        if (isinstance(MODEL, BULBUL)):
            x_batch = asarray([
                base.logfbank(x,
                              samplerate=22050,
                              winlen=0.046,
                              winstep=0.010,
                              nfilt=80,
                              nfft=1024,
                              lowfreq=10,
                              highfreq=11000).T
                for x in X[batch * batch_size:(batch + 1) *
                           batch_size].astype('float32')
            ]).astype('float32')
        else:
            x_batch = X[batch * batch_size:(batch + 1) *
                        batch_size].astype('float32')
        y_batch = Y[batch * batch_size:(batch + 1) *
                    batch_size].astype('int32')
        accu.append(MODEL.test(x_batch, y_batch))
        y_hat.append(MODEL.predict(x_batch))
    accuracy = array(accu).mean()
    print 'ACCURACY ', accuracy
    y_hat = concatenate(y_hat, axis=0)
    y_hat = vstack([y_hat.reshape((1, -1)), 1 - y_hat.reshape((1, -1))]).T
    return roc_auc_score(Y[:shape(y_hat)[0]], y_hat.argmin(axis=1)), accuracy
Beispiel #3
0
    def comp_feat(self, sig, rate):
        '''
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        '''

        feat, energy = base.logfbank(sig, rate, self.conf)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        #mean and variance normalize the features
        if self.conf['mvn'] == 'True':
            feat = (feat - feat.mean(0)) / feat.std(0)

        return feat
Beispiel #4
0
def trainit(X_train, Y_train, X_test, Y_test, batch_size, n_epochs, MODEL,
            index_0, index_1, l_r):
    n_batch = X_train.shape[0] / batch_size
    train_error = []
    test_error = []
    filters = []
    representation = []
    accuracy = []
    for n_epoch in xrange(n_epochs):
        if (n_epoch < 10 or n_epoch == (n_epochs - 1)):
            filters.append(MODEL.get_filters())
        for batch in xrange(n_batch):
            index_0_batch = random.sample(index_0, batch_size / 2)
            index_1_batch = random.sample(index_1, batch_size / 2)
            index_batch = concatenate([index_0_batch, index_1_batch])
            x_batch = X_train[index_batch]
            y_batch = Y_train[index_batch]
            if (isinstance(MODEL, BULBUL)):
                x_batch = asarray([
                    base.logfbank(x,
                                  samplerate=22050,
                                  winlen=0.046,
                                  winstep=0.010,
                                  nfilt=80,
                                  nfft=1024,
                                  lowfreq=10,
                                  highfreq=11000).T for x in x_batch
                ]).astype('float32')
            train_error.append(
                MODEL.train(x_batch, y_batch, l_r.astype('float32')))
            if batch % 40 == 0:
                print 'batch n_', batch, 'out of', n_batch, ': ', 'training error=', train_error[
                    -1]
        auc, accu = testit(X_test, Y_test, batch_size, MODEL)
        test_error.append(auc)
        accuracy.append(accu)
        print "epoch n_", n_epoch, ' AUC= ', test_error[-1]
    index_0 = find(Y_test == 0)
    index_1 = find(Y_test == 1)
    for i in xrange(5):
        index_batch = concatenate([
            index_0[i * batch_size / 2:(i + 1) * batch_size / 2],
            index_1[i * batch_size / 2:(i + 1) * batch_size / 2]
        ])
        x_batch = X_train[index_batch]
        y_batch = Y_train[index_batch]
        if (isinstance(MODEL, BULBUL)):
            x_batch = asarray([
                base.logfbank(x,
                              samplerate=22050,
                              winlen=0.046,
                              winstep=0.010,
                              nfilt=80,
                              nfft=1024,
                              lowfreq=10,
                              highfreq=11000).T for x in x_batch
            ]).astype('float32')
        representation.append([MODEL.get_repr(x_batch), y_batch])
    f = open(name, 'wb')
    cPickle.dump([train_error, test_error, accuracy, filters, representation],
                 f)
    f.close()
Beispiel #5
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

#!/usr/bin/env python
''' Example for sigproc.py '''

# pylint: skip-file

import scipy.io.wavfile as wav

from base import mfcc
from base import delta
from base import logfbank

if __name__ == '__main__':
    (rate, sig) = wav.read("english.wav")
    mfcc_feat = mfcc(sig, rate)
    d_mfcc_feat = delta(mfcc_feat, 2)
    fbank_feat = logfbank(sig, rate)

    print(fbank_feat[1:3, :])