Exemple #1
0
def load_river_network(nnet_param = 'neural_network/river_network_params', nnet_cfg = 'neural_network/river_network_cfg'):
    cfg = cPickle.load(smart_open(nnet_cfg,'r'))
    cfg.init_activation()
    model = DNN(numpy_rng=numpy_rng, cfg = cfg)
    _file2nnet(model.layers, filename = nnet_param)
    get_river_probs = model.build_extract_feat_function(-1)
    return get_river_probs
log('> ... setting up the model and loading parameters')
numpy_rng = np.random.RandomState(89677)
theano_rng = RandomStreams(numpy_rng.randint(2**30))
cfg_dnn = cPickle.load(open(filename, 'r'))
cfg_dnn.init_activation()
model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg_dnn)

# load model parameters
_file2nnet(model.layers, filename=wdir + '/rbm.param')

# initialize data reading
cfg_dnn.init_data_reading_test(train_data_spec)

# get the function for feature extraction
log('> ... getting the feat-extraction function')
extract_func = model.build_extract_feat_function(-1)

output_mat = None  # store the features for all the data in memory
log('> ... generating features from the specified layer')
while (not cfg_dnn.test_sets.is_finish()):  # loop over the data
    cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy)
    batch_num = int(math.ceil(cfg_dnn.test_sets.cur_frame_num / batch_size))

for batch_index in xrange(batch_num):  # loop over mini-batches
    start_index = batch_index * batch_size
    end_index = min((batch_index + 1) * batch_size,
                    cfg_dnn.test_sets.cur_frame_num
                    )  # the residue may be smaller than a mini-batch
    output = extract_func(cfg_dnn.test_x.get_value()[start_index:end_index])
    if output_mat is None:
        output_mat = output
    log('> ... model type: %s' % cfg.model_type)
    if cfg.model_type == 'DNN':
        model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'CNN':
        model = CNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'DNNV':
        model = DNNV(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'CNNV':
        model = CNNV(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)

    # load model parameters
    _file2nnet(model.layers, filename=nnet_param)

    # get the function for feature extraction
    log('> ... getting the feat-extraction function')
    extract_func = model.build_extract_feat_function(layer_index)

    kaldiread = KaldiReadIn(in_scp_file)
    kaldiwrite = KaldiWriteOut(out_ark_file)
    log('> ... processing the data')
    utt_number = 0
    while True:
        uttid, in_matrix = kaldiread.read_next_utt()
        if uttid == '':
            break
#        in_matrix = numpy.reshape(in_matrix, (in_matrix.shape[0],) + input_shape_1)
        out_matrix = extract_func(in_matrix)
        kaldiwrite.write_kaldi_mat(uttid, out_matrix)
        utt_number += 1
        if utt_number % 100 == 0:
            log('> ... processed %d utterances' % (utt_number))
Exemple #4
0
    cfg.init_activation()
    model = None
    if cfg.model_type == 'DNN':
        model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg)
    elif cfg.model_type == 'CNN':
        model = CNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, testing = True)

    # load model parameters
    _file2nnet(model.layers, filename = nnet_param)

    # initialize data reading
    cfg.init_data_reading_test(data_spec)

    # get the function for feature extraction
    log('> ... getting the feat-extraction function')
    extract_func = model.build_extract_feat_function(layer_index)

    output_mats = []    # store the features for all the data in memory. TODO: output the features in a streaming mode
    log('> ... generating features from the specified layer')
    while (not cfg.test_sets.is_finish()):  # loop over the data
        cfg.test_sets.load_next_partition(cfg.test_xy)
        batch_num = int(math.ceil(1.0 * cfg.test_sets.cur_frame_num / batch_size))

        for batch_index in xrange(batch_num):  # loop over mini-batches
            start_index = batch_index * batch_size
            end_index = min((batch_index+1) * batch_size, cfg.test_sets.cur_frame_num)  # the residue may be smaller than a mini-batch
            output = extract_func(cfg.test_x.get_value()[start_index:end_index])
            output_mats.append(output)

    output_mat = numpy.concatenate(output_mats)
Exemple #5
0
log('> ... setting up the model and loading parameters')
numpy_rng = np.random.RandomState(89677)
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
cfg_dnn = cPickle.load(open(filename,'r'))
cfg_dnn.init_activation()
model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg_dnn)

# load model parameters
_file2nnet(model.layers, filename = wdir + '/rbm.param')

# initialize data reading
cfg_dnn.init_data_reading_test(train_data_spec)

# get the function for feature extraction
log('> ... getting the feat-extraction function')
extract_func = model.build_extract_feat_function(-1)

output_mat = None  # store the features for all the data in memory
log('> ... generating features from the specified layer')
while (not cfg_dnn.test_sets.is_finish()):  # loop over the data
    cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy)
    batch_num = int(math.ceil(cfg_dnn.test_sets.cur_frame_num / batch_size))

for batch_index in xrange(batch_num):  # loop over mini-batches
    start_index = batch_index * batch_size
    end_index = min((batch_index+1) * batch_size, cfg_dnn.test_sets.cur_frame_num)  # the residue may be smaller than a mini-batch
    output = extract_func(cfg_dnn.test_x.get_value()[start_index:end_index])
    if output_mat is None:
        output_mat = output
    else:
        output_mat = np.concatenate((output_mat, output)) # this is not efficient
Exemple #6
0
def main(arg_elements):

    # check the arguments
    arguments = parse_arguments(arg_elements)
    required_arguments = [
        'data', 'nnet_param', 'nnet_cfg', 'output_file', 'layer_index',
        'batch_size'
    ]
    for arg in required_arguments:
        if arguments.has_key(arg) == False:
            print "Error: the argument %s has to be specified" % (arg)
            exit(1)

    # mandatory arguments
    data_spec = arguments['data']
    nnet_param = arguments['nnet_param']
    nnet_cfg = arguments['nnet_cfg']
    output_file = arguments['output_file']
    layer_index = int(arguments['layer_index'])
    batch_size = int(arguments['batch_size'])
    argmax = arguments.has_key('argmax') and string2bool(arguments['argmax'])

    # load network configuration and set up the model
    log('> ... setting up the model and loading parameters')
    numpy_rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(numpy_rng.randint(2**30))
    cfg = cPickle.load(smart_open(nnet_cfg, 'r'))
    cfg.init_activation()
    model = None
    if cfg.model_type == 'DNN':
        model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg)
    elif cfg.model_type == 'CNN':
        model = CNN(numpy_rng=numpy_rng,
                    theano_rng=theano_rng,
                    cfg=cfg,
                    testing=True)

    # load model parameters
    _file2nnet(model.layers, filename=nnet_param)

    # initialize data reading
    cfg.init_data_reading_test(data_spec)

    # get the function for feature extraction
    log('> ... getting the feat-extraction function')
    extract_func = model.build_extract_feat_function(layer_index)

    output_mats = [
    ]  # store the features for all the data in memory. TODO: output the features in a streaming mode
    log('> ... generating features from the specified layer')
    while (not cfg.test_sets.is_finish()):  # loop over the data
        cfg.test_sets.load_next_partition(cfg.test_xy)
        batch_num = int(
            math.ceil(1.0 * cfg.test_sets.cur_frame_num / batch_size))

        for batch_index in xrange(batch_num):  # loop over mini-batches
            start_index = batch_index * batch_size
            end_index = min((batch_index + 1) * batch_size,
                            cfg.test_sets.cur_frame_num
                            )  # the residue may be smaller than a mini-batch
            output = extract_func(
                cfg.test_x.get_value()[start_index:end_index])
            output_mats.append(output)

    output_mat = numpy.concatenate(output_mats)
    if argmax:
        output_mat = output_mat.argmax(axis=1)

    # output the feature representations using pickle
    f = smart_open(output_file, 'wb')
    cPickle.dump(output_mat, f, cPickle.HIGHEST_PROTOCOL)

    log('> ... the features are stored in ' + output_file)