Esempio n. 1
0
##  You should have received a copy of the GNU General Public License
##  along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
import numpy
import keras
import kaldiIO
from signal import signal, SIGPIPE, SIG_DFL

if __name__ == '__main__':
    model = sys.argv[1]

    if not model.endswith('.h5'):
        raise TypeError(
            'Unsupported model type. Please use h5 format. Update Keras if needed'
        )

    ## Load model
    m = keras.models.load_model(model)

    arkIn = sys.stdin.buffer
    arkOut = sys.stdout.buffer
    encoding = sys.stdout.encoding
    signal(SIGPIPE, SIG_DFL)

    uttId, featMat = kaldiIO.readUtterance(arkIn)
    while uttId:
        logProbMat = numpy.log(m.predict(featMat))
        kaldiIO.writeUtterance(uttId, logProbMat, arkOut, encoding)
        uttId, featMat = kaldiIO.readUtterance(arkIn)
Esempio n. 2
0
##  You should have received a copy of the GNU General Public License
##  along with this program.  If not, see <http://www.gnu.org/licenses/>.


import sys
import numpy
import keras
import kaldiIO
from signal import signal, SIGPIPE, SIG_DFL

if __name__ == '__main__':
    model = sys.argv[1]

    if not model.endswith('.h5'):
        raise TypeError ('Unsupported model type. Please use h5 format. Update Keras if needed')

    ## Load model
    m = keras.models.load_model (model)

    arkIn = sys.stdin.buffer
    arkOut = sys.stdout.buffer
    encoding = sys.stdout.encoding
    signal (SIGPIPE, SIG_DFL)

    uttId, featMat = kaldiIO.readUtterance(arkIn)
    while uttId:
        logProbMat = numpy.log (m.predict (featMat))
        kaldiIO.writeUtterance(uttId, logProbMat, arkOut, encoding)
        uttId, featMat = kaldiIO.readUtterance(arkIn)
    
    print('Loading models ....')
    model_list = [
        keras.models.load_model('pavans_1024x4/' + m) for m in model_list
    ]
    print('Loading models, DONE')

    # Splice features
    p1 = Popen([
        'splice-feats', '--print-args=false', '--left-context=5',
        '--right-context=5', 'scp:' + data + '/feats.scp', 'ark:-'
    ],
               stdout=PIPE)

    f = open(location + '/temp.ark', 'wb')
    st = time.time()
    while True:
        uid, featMat = readUtterance(p1.stdout)
        if uid == None:
            print('Reached the end of feats.scp')
            break
        print('Processing utt id: ' + uid)
        #log_avg_prediction = np.log(arithmetic_mean(featMat, model_list, weights))
        log_avg_prediction = np.log(
            geometric_mean(featMat, model_list, weights))
        #writeUtteranceText(uid, log_avg_prediction, f)
        kaldiIO.writeUtterance(uid, log_avg_prediction, f, encoding)
    et = time.time()
    print('Time taken: ', et - st)
    f.close()
    ## Save teacher predictions on disk
    st = time.time()
    outfile = data_tr + '/teacher_predictions.ark'
    print('Writing teacher predictions...', outfile)
    p1 = Popen([
        'splice-feats', '--print-args=false', '--left-context=5',
        '--right-context=5', 'scp:' + data_tr + '/feats.scp', 'ark:-'
    ],
               stdout=PIPE)
    with open('temp_teacher.ark', 'wb') as f:
        while True:
            uid, featMat = kaldiIO.readUtterance(p1.stdout)
            if uid == None:
                break
            avg_prediction = geometric_mean(featMat, model_list, weights)
            kaldiIO.writeUtterance(uid, avg_prediction, f, sys.stdout.encoding)
    p1.stdout.close()
    et = time.time()
    os.rename('temp_teacher.ark', outfile)
    print('Done writing teacher predictions for ', data_tr, '. Time taken: ',
          et - st)

    ## Initialize data generator
    trGen = dataGenerator_student_tr(data_tr, ali_tr, sgmm,
                                     learning['batchSize'])
    cvGen = dataGenerator_student_cv(data_cv, ali_cv, sgmm,
                                     learning['batchSize'])

    ## Define DNN architecture and initialize weights
    m = keras.models.Sequential([
        keras.layers.Dense(1024, activation='relu', input_dim=429),