Esempio n. 1
0
                          num_hidden_proj=num_hidden_proj)
        data_names = [x[0] for x in data_test.provide_data]
        label_names = ['softmax_label']
        module = mx.mod.Module(sym,
                               context=contexts,
                               data_names=data_names,
                               label_names=label_names)
    # set the parameters
    module.bind(data_shapes=data_test.provide_data,
                label_shapes=None,
                for_training=False)
    module.set_params(arg_params=arg_params, aux_params=aux_params)

    #kaldiWriter = KaldiWriteOut(None, out_file)
    #kaldiWriter.open_or_fd()
    kaldiWriter = KaldiWriteOut(out_dir + "/feats.scp", out_dir + "/feats.ark")
    kaldiWriter.open()
    for preds, i_batch, batch in module.iter_predict(data_test):
        label = batch.label[0].asnumpy().astype('int32')
        posteriors = preds[0].asnumpy().astype('float32')
        # copy over states
        if decoding_method == METHOD_BUCKETING:
            for (ind, utt) in enumerate(batch.utt_id):
                if utt != "GAP_UTT":
                    posteriors = np.log(posteriors[:label[0][0], 1:] +
                                        1e-20) - np.log(
                                            data_train.label_mean).T
                    kaldiWriter.write(utt, posteriors)
        elif decoding_method == METHOD_SIMPLE:
            for (ind, utt) in enumerate(batch.utt_id):
                if utt != "GAP_UTT":
Esempio n. 2
0
                          num_label=label_dim,
                          output_states=True,
                          num_hidden_proj=num_hidden_proj)
        data_names = [x[0] for x in data_test.provide_data]
        label_names = ['softmax_label']
        module = mx.mod.Module(sym,
                               context=contexts,
                               data_names=data_names,
                               label_names=label_names)
    # set the parameters
    module.bind(data_shapes=data_test.provide_data,
                label_shapes=None,
                for_training=False)
    module.set_params(arg_params=arg_params, aux_params=aux_params)

    kaldiWriter = KaldiWriteOut(None, out_file)
    kaldiWriter.open_or_fd()

    for preds, i_batch, batch in module.iter_predict(data_test):
        #pred_label = np.array(preds[0].asnumpy().argmax(axis=1))
        label = batch.label[0].asnumpy().astype('int32')
        posteriors = preds[0].asnumpy().astype('float32')[0]
        #print np.sum(posteriors[1][:])
        # copy over states
        if decoding_method == METHOD_BUCKETING:
            for (ind, utt) in enumerate(batch.utt_id):
                if utt != "GAP_UTT":
                    #print sum(posteriors[0,:])
                    posteriors = np.log(posteriors[:label[0][0], 1:] +
                                        1e-20) - np.log(
                                            data_train.label_mean).T
Esempio n. 3
0
        truncate_len=20
        data_test = TruncatedSentenceIter(test_sets, batch_size, init_states,
                                         truncate_len, feat_dim=feat_dim,
                                         do_shuffling=False, pad_zeros=True, has_label=True)

        sym = lstm_unroll(num_lstm_layer, truncate_len, feat_dim, num_hidden=num_hidden,
                          num_label=label_dim, output_states=True, num_hidden_proj=num_hidden_proj)
        data_names = [x[0] for x in data_test.provide_data]
        label_names = ['softmax_label']
        module = mx.mod.Module(sym, context=contexts, data_names=data_names,
                               label_names=label_names)
    # set the parameters
    module.bind(data_shapes=data_test.provide_data, label_shapes=None, for_training=False)
    module.set_params(arg_params=arg_params, aux_params=aux_params)
    
    kaldiWriter = KaldiWriteOut(None, out_file)
    kaldiWriter.open_or_fd()

    for preds, i_batch, batch in module.iter_predict(data_test):
        #pred_label = np.array(preds[0].asnumpy().argmax(axis=1))
        label = batch.label[0].asnumpy().astype('int32')
        posteriors = preds[0].asnumpy().astype('float32')[0]
        #print np.sum(posteriors[1][:])
        # copy over states
        if decoding_method == METHOD_BUCKETING:
            for (ind, utt) in enumerate(batch.utt_id):
                if utt != "GAP_UTT":
                    #print sum(posteriors[0,:])
                    posteriors = np.log(posteriors[:label[0][0],1:] + 1e-20) - np.log(data_train.label_mean).T
                    kaldiWriter.write(utt, posteriors)
        elif decoding_method == METHOD_SIMPLE:
Esempio n. 4
0
    init_states, test_sets = prepare_data(args)
    state_names = [x[0] for x in init_states]

    batch_size = args.config.getint('train', 'batch_size')
    num_hidden = args.config.getint('arch', 'num_hidden')
    num_lstm_layer = args.config.getint('arch', 'num_lstm_layer')
    feat_dim = args.config.getint('data', 'xdim')
    label_dim = args.config.getint('data', 'ydim')
    out_file = args.config.get('data', 'out_file')
    num_epoch = args.config.getint('train', 'num_epoch')
    model_name = get_checkpoint_path(args)
    logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s')

    # load the model
    label_mean = np.zeros((label_dim,1), dtype='float32')
    data_test = TruncatedSentenceIter(test_sets, batch_size, init_states,
                                         20, feat_dim=feat_dim,
                                         do_shuffling=False, pad_zeros=True, has_label=True)

    for i, batch in enumerate(data_test.labels):
        hist, edges = np.histogram(batch.flat, bins=range(0,label_dim+1))
        label_mean += hist.reshape(label_dim,1)

    kaldiWriter = KaldiWriteOut(None, out_file)
    kaldiWriter.open_or_fd()
    kaldiWriter.write("label_mean", label_mean)


    args.config.write(sys.stderr)
Esempio n. 5
0
    num_hidden = args.config.getint('arch', 'num_hidden')
    num_lstm_layer = args.config.getint('arch', 'num_lstm_layer')
    feat_dim = args.config.getint('data', 'xdim')
    label_dim = args.config.getint('data', 'ydim')
    out_file = args.config.get('data', 'out_file')
    num_epoch = args.config.getint('train', 'num_epoch')
    model_name = get_checkpoint_path(args)
    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)-15s %(message)s')

    # load the model
    label_mean = np.zeros((label_dim, 1), dtype='float32')
    data_test = TruncatedSentenceIter(test_sets,
                                      batch_size,
                                      init_states,
                                      20,
                                      feat_dim=feat_dim,
                                      do_shuffling=False,
                                      pad_zeros=True,
                                      has_label=True)

    for i, batch in enumerate(data_test.labels):
        hist, edges = np.histogram(batch.flat, bins=range(0, label_dim + 1))
        label_mean += hist.reshape(label_dim, 1)

    kaldiWriter = KaldiWriteOut(None, out_file)
    kaldiWriter.open_or_fd()
    kaldiWriter.write("label_mean", label_mean)

    args.config.write(sys.stderr)