Ejemplo n.º 1
0
def main(args):
    if args.use_ivectors:
        args.input_dim = args.input_dim + args.ivector_dim
    input_data = T.ftensor3('input_data')
    input_mask = T.fmatrix('input_mask')

    network = models.deep_bidir_lstm_alex(input_var=input_data,
                                          mask_var=input_mask,
                                          input_dim=args.input_dim,
                                          num_units_list=[args.num_nodes] *
                                          args.num_layers,
                                          output_dim=args.output_dim)

    network_params = get_all_params(network, trainable=True)

    print('Loading Parameters...', file=sys.stderr)
    if args.model:
        with open(args.model, 'rb') as f:
            pretrain_network_params_val,  pretrain_update_params_val, \
                    pretrain_total_epoch_cnt = pickle.load(f)

            set_model_param_value(network_params, pretrain_network_params_val)
    else:
        print('Must specfiy network to load', file=sys.stderr)
        sys.exit(1)

    ff_fn = ff(input_data=input_data, input_mask=input_mask, network=network)
    feat_stream = fuel_utils.get_feat_stream(args.data_path,
                                             args.dataset,
                                             args.batch_size,
                                             use_ivectors=args.use_ivectors)
    uttid_stream = fuel_utils.get_uttid_stream(args.data_path, args.dataset,
                                               args.batch_size)

    writer = kaldi_io.BaseFloatMatrixWriter(args.wxfilename)

    for batch_idx, (feat_batch, uttid_batch) in enumerate(
            zip(feat_stream.get_epoch_iterator(),
                uttid_stream.get_epoch_iterator())):
        input_data, input_mask = feat_batch
        feat_lens = input_mask.sum(axis=1)

        print('Feed-forwarding...', file=sys.stderr)
        net_output = ff_fn(input_data, input_mask)

        print('Writing outputs...', file=sys.stderr)
        for out_idx, (output,
                      uttid) in enumerate(zip(net_output[0], uttid_batch[0])):
            valid_len = feat_lens[out_idx]
            writer.write(uttid.encode('ascii'), numpy.log(output[:valid_len]))

    writer.close()
Ejemplo n.º 2
0
def main(options):
    input_data = T.ftensor3('input_data')
    input_mask = T.fmatrix('input_mask')
    target_data = T.imatrix('target_data')
    target_mask = T.fmatrix('target_mask')

    network = build_network(input_data=input_data,
                            input_mask=input_mask,
                            num_inputs=options['num_inputs'],
                            num_units_list=options['num_units_list'],
                            num_outputs=options['num_outputs'],
                            dropout_ratio=options['dropout_ratio'],
                            use_layer_norm=options['use_layer_norm'],
                            learn_init=True,
                            grad_clipping=1.0)
    network_params = get_all_params(network, trainable=True)

    if options['reload_model']:
        print('Loading model...')
        pretrain_network_params_val,  pretrain_update_params_val, pretrain_total_batch_cnt = pickle.load(open(options['reload_model'], 'rb'))
        set_model_param_value(network_params, pretrain_network_params_val)
    else:
        print 'Must specfiy network to load'
        sys.exit(1)

    ff_fn = ff(input_data=input_data, input_mask=input_mask, network=network)
    feat_stream = get_feat_stream(options['data_path'], options['dataset'], options['batch_size']) 
    uttid_stream = get_uttid_stream(options['data_path'], options['dataset'], options['batch_size']) 
    
    writer = kaldi_io.BaseFloatMatrixWriter(options['save_path'])

    for batch_idx, (feat_batch, uttid_batch) in enumerate(zip(feat_stream.get_epoch_iterator(), uttid_stream.get_epoch_iterator())):
        print 'Processing batch {}'.format(batch_idx)
        input_data, input_mask = feat_batch 

        net_output = ff_fn(input_data, input_mask)

        for output, uttid in zip(net_output[0], uttid_batch[0]):
            writer.write(uttid.encode('ascii'), output)

    writer.close()
Ejemplo n.º 3
0
    if args.padding_left is not None: padding_left = int(args.padding_left)

    padding_right = padding
    if args.padding_right is not None: padding_right = int(args.padding_right)

    if padding_left < 0 or padding_right < 0:
        logging.error("Padding can't be negative!")
        sys.exit(1)

    count = 0
    logging.info("Padding with %d in the left and %d on the right",
                 padding_left, padding_right)

    #should use with, but if something happens the files will get closed anyways
    reader = kaldi_io.SequentialBaseFloatMatrixReader(args.in_rxfilename)
    writer = kaldi_io.BaseFloatMatrixWriter(args.out_wxfilename)

    size_writer = None
    if args.orig_size_wxfilename is not None:
        size_writer = kaldi_io.PythonWriter(args.orig_size_wxfilename)

    for name, value in reader:
        count += 1
        if padding_left + padding_right == 0:
            padded = value
        else:
            num_frames, frame_dim = value.shape
            padded = np.empty(shape=(num_frames + padding_left + padding_right,
                                     frame_dim),
                              dtype=value.dtype)
Ejemplo n.º 4
0
config.inter_op_parallelism_threads = 1
keras.backend.tensorflow_backend.set_session(tf.Session(config=config))

if __name__ == '__main__':
    model = sys.argv[1]
    left_context = int(sys.argv[2])
    right_context = int(sys.argv[3])

    if not model.endswith('.h5'):
        raise TypeError(
            'Unsupported model type. Please use h5 format. Update Keras if needed'
        )

    m = keras.models.load_model(model)
    with kaldi_io.SequentialBaseFloatMatrixReader("ark:-") as arkIn, \
            kaldi_io.BaseFloatMatrixWriter("ark,t:-") as arkOut:
        signal(SIGPIPE, SIG_DFL)

        for utt, utt_feats in arkIn:
            feats = np.zeros(
                (utt_feats.shape[0] + left_context + right_context,
                 utt_feats.shape[1]))
            feats[:left_context, :] = utt_feats[0]
            feats[-right_context:, :] = utt_feats[-1]
            feats[left_context:-right_context, :] = utt_feats
            feats = np.expand_dims(feats, 0)

            logProbMat = np.log(m.predict(feats)[0])
            logProbMat[logProbMat == -np.inf] = -100
            arkOut.write(utt, logProbMat)
Ejemplo n.º 5
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    config, model = get_config_and_model(args)

    dataset = config['Datasets'][args.subset]
    # Remove the training graph generator from the dataset. The testing
    # data may have characters for which we don't have CD symbols
    # and the graphs are not needed to compute the logits
    dataset.dataset.graph_gen = None
    owriter = kaldi_io.BaseFloatMatrixWriter(args.out_wspec)

    for j, batch in enumerate(dataset):
        sys.stderr.write("Processing batch %d/%d\n" % (j + 1, len(dataset)))
        feature_lens = Variable(batch['features'][1])
        features = Variable(batch['features'][0])
        speakers = batch['spkids']

        if Globals.cuda:
            features = features.cuda()

        with torch.no_grad():
            encoded, encoded_lens = model.encoder(features, feature_lens,
                                                  speakers)
            # t x bsz x num_classes
            logprobs = model.decoder.logits(encoded, encoded_lens)
            logprobs = logprobs.data.cpu().numpy()

        # transfer probability mass from hash `#` to blank `<pad>`
        if args.transfer_hash_prob:
            blank_probs = np.exp(logprobs[:, :, 0])
            hash_probs = np.exp(logprobs[:, :, 3])
            blank_probs += hash_probs - EPSILON
            hash_probs = EPSILON
            logprobs[:, :, 0] = np.log(blank_probs)
            logprobs[:, :, 3] = np.log(hash_probs)

        t, bsz, num_classes = logprobs.shape

        if args.imitate_biphones:
            logprobs = np.tile(logprobs, (1, 1, num_classes))
            num_classes = num_classes**2
            if not args.block_normalize:
                num_mono = int(np.round(num_classes**0.5))
                z = np.exp(logprobs).sum(axis=2, keepdims=True)
                # This epsilon has to be really tiny,
                # otherwise not normalizes properly
                logprobs -= np.log(z + EPSILON)
        elif args.block_normalize:
            num_mono = int(np.round(num_classes**0.5))
            z = np.exp(logprobs).reshape(t, bsz, num_mono, num_mono)
            z = z.sum(axis=3).repeat(num_mono, axis=2)
            logprobs -= np.log(z + EPSILON)
        elif args.block_marginalize:
            print("Block-marginalizing probabilities.")
            num_symbols = int(np.round(num_classes**0.5))
            probs = np.exp(logprobs)
            probs = (
                probs.reshape(t, bsz, num_symbols, num_symbols).sum(axis=2) /
                num_symbols)
            logprobs = np.log(probs)
            assert not np.any(np.isnan(logprobs))
        for i in np.argsort(batch['uttids']):
            example_len = encoded_lens[i]
            owriter[batch['uttids'][i]] = logprobs[:example_len, i, :]
Ejemplo n.º 6
0
model_name = open(args.model_file, 'r').read()
nnet.read(model_name)

if args.prior_counts is not None:
    prior_counts = np.genfromtxt(args.prior_counts)
    priors = prior_counts / prior_counts.sum()
    log_priors = np.log(priors)

# here we are doing context window and feature normalization
feats += ' splice-feats --print-args=false --left-context='+str(splice) + \
        ' --right-context='+str(splice) + ' ark:- ark:-|'
feats += ' apply-cmvn --print-args=false --norm-vars=true ' + srcdir + '/cmvn.mat ark:- ark:- |'

count = 0
reader = kaldi_io.SequentialBaseFloatMatrixReader(feats)
writer = kaldi_io.BaseFloatMatrixWriter('ark:-')

for uid, feats in reader:
    nnet_out = nnet.predict(feats, no_softmax=args.no_softmax)
    if args.apply_log:
        nnet_out = np.log(nnet_out)

    if args.prior_counts is not None:
        log_likes = nnet_out - log_priors
        nnet_out = log_likes

    writer.write(uid, nnet_out)

    count += 1
    if args.verbose and count % 10 == 0:
        logger.info("LOG (nnet_forward.py) %d utterances processed" % count)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config',
                        type=str,
                        default=None,
                        help='config file (Kaldi format)')
    parser.add_argument(
        '--frame-length',
        type=float,
        default=25,
        help='Frame length in milliseconds (float, default = 25)')
    parser.add_argument(
        '--frame-shift',
        type=float,
        default=10,
        help='Frame shift in milliseconds (float, default = 10)')
    parser.add_argument(
        '--window-type',
        type=str,
        default='hamming',
        help=
        'Type of window ("hamming"|"hanning") (string, default = "hamming")')
    parser.add_argument(
        '--complex-format',
        type=str,
        default='real-imaginary',
        help='Format of complex numbers ("real-imaginary"|"magnitude-phase") '
        + '(string, default = "real-imaginary")')
    parser.add_argument('wav_scp',
                        metavar='IN',
                        type=str,
                        help='WAV scp files (do not accept command line)')
    parser.add_argument('feats_wspecifier',
                        metavar='OUT',
                        type=str,
                        help='<feats-wspecifier>')
    args = parser.parse_args()

    # config parser without section
    if args.config is not None:
        ini_str = '[root]\n' + open(args.config, 'r').read()
        ini_str = ini_str.replace('--', '').replace(
            '-', '_')  # remove '--' in the kaldi config
        ini_fp = StringIO.StringIO(ini_str)
        config = ConfigParser.RawConfigParser()
        config.readfp(ini_fp)

        # set config file values as defaults
        parser.set_defaults(**dict(config.items('root')))
        args = parser.parse_args()

    # logging info
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
    )
    for arg in vars(args):
        logging.info(arg + ": " + str(getattr(args, arg)))

    with open(args.wav_scp, 'r') as f:
        scp = [x.split() for x in f.readlines()]  # list of [utt_id, wav_name]

    writer = kaldi_io.BaseFloatMatrixWriter(args.feats_wspecifier)

    for x in scp:
        if len(x) != 2:
            sys.exit("wav.scp must be (utt_id, WAV)")
        (rate, sig) = wav.read(x[1])
        feat = cspec(sig, samplerate=rate)
        if args.complex_format is 'real-imaginary':
            feat = np.hstack((feat.real, feat.imag))
        elif args.complex_format is 'magnitude-phase':
            feat = np.hstack((feat.absolute, feat.angles))
        else:
            sys.exit("do not support a complex number format of " +
                     args.complex_format)
        writer.write(x[0], feat)
Ejemplo n.º 8
0
 def start(self):
     self.owriter = kaldi_io.BaseFloatMatrixWriter('ark:'+self.filename)