Esempio n. 1
0
def main():
    desc = 'Extract features with DNN. Output to Kaldi ark.'
    parser = common.init_argparse(desc)
    parser.add_argument('model_in', help='Model that can be read by load_dnn')
    parser.add_argument('feats_scp', help='scp of input features')
    parser.add_argument('ark_out', help='Output ark file')
    parser.add_argument('--output-layer', type=int, default=-2,
                        help='Layer to use for extracting features. ' + \
                             'Negative index can be used. For example, ' + \
                             '-1 means the last layer, and so on.')
    parser.add_argument('--context',
                        type=int,
                        default=8,
                        help='Number of context frames for splicing')
    parser.add_argument('--padding', default='replicate',
                        help='What to do with out-of-bound frames. Valid ' + \
                             'values: [replicate|zero]')
    parser.add_argument('--ivectors', help='Utterance i-vectors to append')
    parser.add_argument('--chunk-size',
                        default='300m',
                        help='Chunk size for data buffering')
    args = parser.parse_args()

    io.log('Initializing dataset')
    ivectors = None if args.ivectors is None else \
            io.ivector_ark_read(args.ivectors, dtype=theano.config.floatX)
    dataset = init_dataset(args.feats_scp, args.context, args.padding,
                           ivectors)
    io.log('Initializing model')
    dnn = load_dnn(args.model_in)

    # Initializing shared_ds according to chunk_size
    num_items = get_num_items(args.chunk_size, theano.config.floatX)
    max_frames = num_items / dataset.get_dim()
    max_utt_frames = np.max(
        map(dataset.get_num_frames_by_utt_name, dataset.get_utt_names()))
    common.CHK_GE(max_frames, max_utt_frames)
    x = np.zeros((max_frames, dataset.get_dim()), dtype=theano.config.floatX)
    io.log('...getting extraction function')
    extract_fn = dnn.build_extract_feat_function(args.output_layer)
    io.log('Got it!')

    io.log('** Begin outputting to {} **'.format(args.ark_out))
    ark_out = KaldiWriteOut(args.ark_out)
    utt_names, utt_frames, total_frames = [], [], 0
    for utt in dataset.get_utt_names():
        frames = dataset.get_num_frames_by_utt_name(utt)
        if total_frames + frames > max_frames:
            __extract(extract_fn, ark_out, dataset, x, utt_names, utt_frames)
            utt_names, utt_frames, total_frames = [], [], 0
        utt_names.append(utt)
        utt_frames.append(frames)
        total_frames += frames
    __extract(extract_fn, ark_out, dataset, x, utt_names, utt_frames)
    ark_out.close()
Esempio n. 2
0
    input_shape_train = conv_configs[0]['input_shape']
    input_shape_1 = (input_shape_train[1], input_shape_train[2],
                     input_shape_train[3])

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    cnn = CNN_Forward(numpy_rng=rng,
                      theano_rng=theano_rng,
                      conv_layer_configs=conv_configs,
                      use_fast=use_fast)
    _file2nnet(cnn.conv_layers,
               set_layer_num=len(conv_configs),
               filename=cnn_param_file)
    out_function = cnn.build_out_function()

    log('> ... processing the data')

    while True:
        uttid, in_matrix = kaldiread.read_next_utt()
        if uttid == '':
            break
        in_matrix = numpy.reshape(in_matrix,
                                  (in_matrix.shape[0], ) + input_shape_1)
        out_matrix = out_function(in_matrix)
        kaldiwrite.write_kaldi_mat(uttid, out_matrix)

    kaldiwrite.close()

    log('> ... the saved features are %s' % (out_ark_file))
Esempio n. 3
0
    rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(rng.randint(2 ** 30))
    cfg.init_activation() 

    cnn = CNN_Forward(numpy_rng = rng, theano_rng=theano_rng, conv_layer_configs = conv_configs, use_fast = use_fast)
    #cnn = CNNV(numpy_rng = rng, theano_rng=theano_rng, cfg=cfg)
    _file2nnet(cnn.conv_layers, set_layer_num = len(conv_configs), filename=cnn_param_file)
    out_function = cnn.build_out_function()
    #out_function = cnn.build_extract_feat_function(-1)

    #print cnn.conv_layers[1].filter_shape
    model = DNNV(numpy_rng = rng, theano_rng = theano_rng, cfg = cfg, input=cnn.conv_layers[1].output)
    _file2nnet(model.layers, set_layer_num = len(model.layers)+len(conv_configs), start_layer=len(conv_configs), filename=cnn_param_file)

    log('> ... processing the data')

    while True:
        uttid, in_matrix = kaldiread.read_next_utt()
        if uttid == '':
            break
        in_matrix = numpy.reshape(in_matrix, (in_matrix.shape[0],) + input_shape_1)
        extract_function = model.build_extract_feat_function(layer_index)
        mid_matrix = out_function(in_matrix)
        out_matrix = extract_function(mid_matrix)
        kaldiwrite.write_kaldi_mat(uttid, out_matrix)

    kaldiwrite.close()

    log('> ... the saved features are %s' % (out_ark_file))