Ejemplo n.º 1
0
def read_data_args(data_spec):
    elements = data_spec.split(",")
    pfile_path_list = glob.glob(elements[0])
    dataset_args = {}
    # default settings
    dataset_args['type'] = 'pickle'
    dataset_args['random'] = False
    dataset_args['stream'] = False
    dataset_args[
        'partition'] = 1024 * 1024 * 600  # by default the partition size is 600m if stream is True
    dataset_args['lcxt'] = 0
    dataset_args['rcxt'] = 0

    # the type of the data: pickle, pfile   TO-DO: HDF5
    if '.pickle' in data_spec or '.pkl' in data_spec:
        dataset_args['type'] = 'pickle'
    elif '.pfile' in data_spec:
        dataset_args['type'] = 'pfile'
    elif '.scp' in data_spec:
        dataset_args['type'] = 'kaldi'
    else:
        dataset_args['type'] = ''

    for i in range(1, len(elements)):
        element = elements[i]
        arg_value = element.split("=")
        value = arg_value[1]
        key = arg_value[0]
        if key == 'partition':
            dataset_args['partition'] = 1024 * 1024 * int(
                value.replace('m', ''))
        elif key == 'stream':
            dataset_args['stream'] = string_2_bool(
                value)  # not supported for now
        elif key == 'random':
            dataset_args['random'] = string_2_bool(value)
        elif key == 'label':
            dataset_args['label'] = value
        elif key == 'lcxt':
            dataset_args['lcxt'] = int(value)
        elif key == 'rcxt':
            dataset_args['rcxt'] = int(value)
        elif key == 'context':
            value = tuple(int(x) for x in value.split(':'))
            if len(value) == 1: value += value
            dataset_args['lcxt'], dataset_args['rcxt'] = value
        elif key == 'ignore-label':
            dataset_args['ignore-label'] = parse_ignore_label(value)
        elif key == 'map-label':
            dataset_args['map-label'] = parse_map_label(value)
        else:
            dataset_args[key] = value
    return pfile_path_list, dataset_args
Ejemplo n.º 2
0
def read_data_args(data_spec):
    elements = data_spec.split(",")
    pfile_path_list = glob.glob(elements[0])
    dataset_args = {}
    # default settings
    dataset_args['type'] = 'pickle'
    dataset_args['random'] = False
    dataset_args['stream'] = False
    dataset_args['partition'] = 1024 * 1024 * 600  # by default the partition size is 600m if stream is True
    dataset_args['lcxt'] = 0
    dataset_args['rcxt'] = 0

    # the type of the data: pickle, pfile   TO-DO: HDF5
    if '.pickle' in data_spec or '.pkl' in data_spec:
        dataset_args['type'] = 'pickle'
    elif '.pfile' in data_spec:
        dataset_args['type'] = 'pfile'
    elif '.scp' in data_spec:
        dataset_args['type'] = 'kaldi'
    elif '.ark' in data_spec:
	dataset_args['type'] = 'ark'
    else:
        dataset_args['type'] = ''

    for i in range(1, len(elements)):
        element = elements[i]
        arg_value = element.split("=")
        value = arg_value[1]
        key = arg_value[0]
        if key == 'partition':
            dataset_args['partition'] = 1024 * 1024 * int(value.replace('m',''))
        elif key == 'stream':
            dataset_args['stream'] = string_2_bool(value) # not supported for now
        elif key == 'random':
            dataset_args['random'] = string_2_bool(value)
        elif key == 'label':
            dataset_args['label'] = value
        elif key == 'lcxt':
            dataset_args['lcxt'] = int(value)
        elif key == 'rcxt':
            dataset_args['rcxt'] = int(value)
        elif key == 'context':
            value = tuple(int(x) for x in value.split(':'))
            if len(value) == 1: value += value
            dataset_args['lcxt'], dataset_args['rcxt'] = value
        elif key == 'ignore-label':
            dataset_args['ignore-label'] = parse_ignore_label(value)
        elif key == 'map-label':
            dataset_args['map-label'] = parse_map_label(value)
        else:
            dataset_args[key] = value
    return pfile_path_list, dataset_args
Ejemplo n.º 3
0
def read_data_args(data_spec):
    elements = data_spec.split(",")
    pfile_path = elements[0]
    dataset_args = {}
    for i in range(1, len(elements)):
        element = elements[i]
        arg_value = element.split("=")
        value = arg_value[1]
        key = arg_value[0]
        if key == 'partition':
            dataset_args['partition'] = 1024 * 1024 * int(value.replace('m',''))
        elif key == 'stream':
            dataset_args['stream'] = string_2_bool(value) # not supported for now
        elif key == 'random':
            dataset_args['random'] = string_2_bool(value)
        else:
            dataset_args[key] = int(value)  # left context & right context; maybe different
    return pfile_path, dataset_args
Ejemplo n.º 4
0
def read_data_args(data_spec):
    elements = data_spec.split(",")
    pfile_path_list = glob.glob(elements[0])
    dataset_args = {}
    # default settings
    dataset_args['type'] = 'pickle'
    dataset_args['random'] = False
    dataset_args['stream'] = False
    dataset_args[
        'partition'] = 1024 * 1024 * 600  # by default the partition size is 600m if stream is True

    # the type of the data: pickle, pfile   TO-DO: HDF5
    if '.pickle' in data_spec:
        dataset_args['type'] = 'pickle'
    elif '.pfile' in data_spec:
        dataset_args['type'] = 'pfile'
    elif '.scp' in data_spec:
        dataset_args['type'] = 'kaldi'
    else:
        dataset_args['type'] = ''

    for i in range(1, len(elements)):
        element = elements[i]
        arg_value = element.split("=")
        value = arg_value[1]
        key = arg_value[0]
        if key == 'partition':
            dataset_args['partition'] = 1024 * 1024 * int(
                value.replace('m', ''))
        elif key == 'stream':
            dataset_args['stream'] = string_2_bool(
                value)  # not supported for now
        elif key == 'random':
            dataset_args['random'] = string_2_bool(value)
        elif key == 'label':
            dataset_args['label'] = value
        else:
            dataset_args[
                key] = value  # left context & right context; maybe different
    return pfile_path_list, dataset_args
Ejemplo n.º 5
0
def read_data_args(data_spec):
    elements = data_spec.split(",")
    pfile_path_list = glob.glob(elements[0])
    dataset_args = {}
    # default settings
    dataset_args['type'] = 'pickle'
    dataset_args['random'] = False
    dataset_args['stream'] = False
    dataset_args['partition'] = 1024 * 1024 * 600  # by default the partition size is 600m if stream is True

    # the type of the data: pickle, pfile   TO-DO: HDF5
    if '.pickle' in data_spec:
        dataset_args['type'] = 'pickle'
    elif '.pfile' in data_spec:
        dataset_args['type'] = 'pfile'
    elif '.scp' in data_spec:
        dataset_args['type'] = 'kaldi'
    else:
        dataset_args['type'] = ''

    for i in range(1, len(elements)):
        element = elements[i]
        arg_value = element.split("=")
        value = arg_value[1]
        key = arg_value[0]
        if key == 'partition':
            dataset_args['partition'] = 1024 * 1024 * int(value.replace('m',''))
        elif key == 'stream':
            dataset_args['stream'] = string_2_bool(value) # not supported for now
        elif key == 'random':
            dataset_args['random'] = string_2_bool(value)
        elif key == 'label':
            dataset_args['label'] = value
        else:
            dataset_args[key] = value  # left context & right context; maybe different
    return pfile_path_list, dataset_args
Ejemplo n.º 6
0
    in_scp_file = arguments['in_scp_file']
    out_ark_file = arguments['out_ark_file']
    cnn_param_file = arguments['cnn_param_file']
    cnn_cfg_file = arguments['cnn_cfg_file']
    # network structure
    cfg = cPickle.load(open(cnn_cfg_file, 'r'))

    conv_configs = cfg.conv_layer_configs
    conv_layer_number = len(conv_configs)
    for i in xrange(conv_layer_number):
        conv_configs[i]['activation'] = cfg.conv_activation

    # whether to use the fast mode
    use_fast = cfg.use_fast
    if arguments.has_key('use_fast'):
        use_fast = string_2_bool(arguments['use_fast'])

    kaldiread = KaldiReadIn(in_scp_file)
    kaldiwrite = KaldiWriteOut(out_ark_file)

    log('> ... setting up the CNN convolution layers')
    input_shape_train = conv_configs[0]['input_shape']
    input_shape_1 = (input_shape_train[1], input_shape_train[2],
                     input_shape_train[3])

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    cnn = CNN_Forward(numpy_rng=rng,
                      theano_rng=theano_rng,
                      conv_layer_configs=conv_configs,
Ejemplo n.º 7
0
    cnn_param_file = arguments['cnn_param_file']
    cnn_cfg_file = arguments['cnn_cfg_file']
    layer_index = int(arguments['layer_index'])

    # network structure
    cfg = cPickle.load(open(cnn_cfg_file,'r'))

    conv_configs = cfg.conv_layer_configs
    conv_layer_number = len(conv_configs)
    for i in xrange(conv_layer_number):
        conv_configs[i]['activation'] = cfg.conv_activation

    # whether to use the fast mode
    use_fast = cfg.use_fast
    if arguments.has_key('use_fast'):
        use_fast = string_2_bool(arguments['use_fast'])

    kaldiread = KaldiReadIn(in_scp_file)
    kaldiwrite = KaldiWriteOut(out_ark_file)


    log('> ... setting up the CNN convolution layers')
    input_shape_train = conv_configs[0]['input_shape']
    input_shape_1 = (input_shape_train[1], input_shape_train[2], input_shape_train[3])

    rng = numpy.random.RandomState(89677)
    theano_rng = RandomStreams(rng.randint(2 ** 30))
    cfg.init_activation() 

    cnn = CNN_Forward(numpy_rng = rng, theano_rng=theano_rng, conv_layer_configs = conv_configs, use_fast = use_fast)
    #cnn = CNNV(numpy_rng = rng, theano_rng=theano_rng, cfg=cfg)