def read_data_args(data_spec): elements = data_spec.split(",") pfile_path_list = glob.glob(elements[0]) dataset_args = {} # default settings dataset_args['type'] = 'pickle' dataset_args['random'] = False dataset_args['stream'] = False dataset_args[ 'partition'] = 1024 * 1024 * 600 # by default the partition size is 600m if stream is True dataset_args['lcxt'] = 0 dataset_args['rcxt'] = 0 # the type of the data: pickle, pfile TO-DO: HDF5 if '.pickle' in data_spec or '.pkl' in data_spec: dataset_args['type'] = 'pickle' elif '.pfile' in data_spec: dataset_args['type'] = 'pfile' elif '.scp' in data_spec: dataset_args['type'] = 'kaldi' else: dataset_args['type'] = '' for i in range(1, len(elements)): element = elements[i] arg_value = element.split("=") value = arg_value[1] key = arg_value[0] if key == 'partition': dataset_args['partition'] = 1024 * 1024 * int( value.replace('m', '')) elif key == 'stream': dataset_args['stream'] = string_2_bool( value) # not supported for now elif key == 'random': dataset_args['random'] = string_2_bool(value) elif key == 'label': dataset_args['label'] = value elif key == 'lcxt': dataset_args['lcxt'] = int(value) elif key == 'rcxt': dataset_args['rcxt'] = int(value) elif key == 'context': value = tuple(int(x) for x in value.split(':')) if len(value) == 1: value += value dataset_args['lcxt'], dataset_args['rcxt'] = value elif key == 'ignore-label': dataset_args['ignore-label'] = parse_ignore_label(value) elif key == 'map-label': dataset_args['map-label'] = parse_map_label(value) else: dataset_args[key] = value return pfile_path_list, dataset_args
def read_data_args(data_spec): elements = data_spec.split(",") pfile_path_list = glob.glob(elements[0]) dataset_args = {} # default settings dataset_args['type'] = 'pickle' dataset_args['random'] = False dataset_args['stream'] = False dataset_args['partition'] = 1024 * 1024 * 600 # by default the partition size is 600m if stream is True dataset_args['lcxt'] = 0 dataset_args['rcxt'] = 0 # the type of the data: pickle, pfile TO-DO: HDF5 if '.pickle' in data_spec or '.pkl' in data_spec: dataset_args['type'] = 'pickle' elif '.pfile' in data_spec: dataset_args['type'] = 'pfile' elif '.scp' in data_spec: dataset_args['type'] = 'kaldi' elif '.ark' in data_spec: dataset_args['type'] = 'ark' else: dataset_args['type'] = '' for i in range(1, len(elements)): element = elements[i] arg_value = element.split("=") value = arg_value[1] key = arg_value[0] if key == 'partition': dataset_args['partition'] = 1024 * 1024 * int(value.replace('m','')) elif key == 'stream': dataset_args['stream'] = string_2_bool(value) # not supported for now elif key == 'random': dataset_args['random'] = string_2_bool(value) elif key == 'label': dataset_args['label'] = value elif key == 'lcxt': dataset_args['lcxt'] = int(value) elif key == 'rcxt': dataset_args['rcxt'] = int(value) elif key == 'context': value = tuple(int(x) for x in value.split(':')) if len(value) == 1: value += value dataset_args['lcxt'], dataset_args['rcxt'] = value elif key == 'ignore-label': dataset_args['ignore-label'] = parse_ignore_label(value) elif key == 'map-label': dataset_args['map-label'] = parse_map_label(value) else: dataset_args[key] = value return pfile_path_list, dataset_args
def read_data_args(data_spec): elements = data_spec.split(",") pfile_path = elements[0] dataset_args = {} for i in range(1, len(elements)): element = elements[i] arg_value = element.split("=") value = arg_value[1] key = arg_value[0] if key == 'partition': dataset_args['partition'] = 1024 * 1024 * int(value.replace('m','')) elif key == 'stream': dataset_args['stream'] = string_2_bool(value) # not supported for now elif key == 'random': dataset_args['random'] = string_2_bool(value) else: dataset_args[key] = int(value) # left context & right context; maybe different return pfile_path, dataset_args
def read_data_args(data_spec): elements = data_spec.split(",") pfile_path_list = glob.glob(elements[0]) dataset_args = {} # default settings dataset_args['type'] = 'pickle' dataset_args['random'] = False dataset_args['stream'] = False dataset_args[ 'partition'] = 1024 * 1024 * 600 # by default the partition size is 600m if stream is True # the type of the data: pickle, pfile TO-DO: HDF5 if '.pickle' in data_spec: dataset_args['type'] = 'pickle' elif '.pfile' in data_spec: dataset_args['type'] = 'pfile' elif '.scp' in data_spec: dataset_args['type'] = 'kaldi' else: dataset_args['type'] = '' for i in range(1, len(elements)): element = elements[i] arg_value = element.split("=") value = arg_value[1] key = arg_value[0] if key == 'partition': dataset_args['partition'] = 1024 * 1024 * int( value.replace('m', '')) elif key == 'stream': dataset_args['stream'] = string_2_bool( value) # not supported for now elif key == 'random': dataset_args['random'] = string_2_bool(value) elif key == 'label': dataset_args['label'] = value else: dataset_args[ key] = value # left context & right context; maybe different return pfile_path_list, dataset_args
def read_data_args(data_spec): elements = data_spec.split(",") pfile_path_list = glob.glob(elements[0]) dataset_args = {} # default settings dataset_args['type'] = 'pickle' dataset_args['random'] = False dataset_args['stream'] = False dataset_args['partition'] = 1024 * 1024 * 600 # by default the partition size is 600m if stream is True # the type of the data: pickle, pfile TO-DO: HDF5 if '.pickle' in data_spec: dataset_args['type'] = 'pickle' elif '.pfile' in data_spec: dataset_args['type'] = 'pfile' elif '.scp' in data_spec: dataset_args['type'] = 'kaldi' else: dataset_args['type'] = '' for i in range(1, len(elements)): element = elements[i] arg_value = element.split("=") value = arg_value[1] key = arg_value[0] if key == 'partition': dataset_args['partition'] = 1024 * 1024 * int(value.replace('m','')) elif key == 'stream': dataset_args['stream'] = string_2_bool(value) # not supported for now elif key == 'random': dataset_args['random'] = string_2_bool(value) elif key == 'label': dataset_args['label'] = value else: dataset_args[key] = value # left context & right context; maybe different return pfile_path_list, dataset_args
in_scp_file = arguments['in_scp_file'] out_ark_file = arguments['out_ark_file'] cnn_param_file = arguments['cnn_param_file'] cnn_cfg_file = arguments['cnn_cfg_file'] # network structure cfg = cPickle.load(open(cnn_cfg_file, 'r')) conv_configs = cfg.conv_layer_configs conv_layer_number = len(conv_configs) for i in xrange(conv_layer_number): conv_configs[i]['activation'] = cfg.conv_activation # whether to use the fast mode use_fast = cfg.use_fast if arguments.has_key('use_fast'): use_fast = string_2_bool(arguments['use_fast']) kaldiread = KaldiReadIn(in_scp_file) kaldiwrite = KaldiWriteOut(out_ark_file) log('> ... setting up the CNN convolution layers') input_shape_train = conv_configs[0]['input_shape'] input_shape_1 = (input_shape_train[1], input_shape_train[2], input_shape_train[3]) rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) cnn = CNN_Forward(numpy_rng=rng, theano_rng=theano_rng, conv_layer_configs=conv_configs,
cnn_param_file = arguments['cnn_param_file'] cnn_cfg_file = arguments['cnn_cfg_file'] layer_index = int(arguments['layer_index']) # network structure cfg = cPickle.load(open(cnn_cfg_file,'r')) conv_configs = cfg.conv_layer_configs conv_layer_number = len(conv_configs) for i in xrange(conv_layer_number): conv_configs[i]['activation'] = cfg.conv_activation # whether to use the fast mode use_fast = cfg.use_fast if arguments.has_key('use_fast'): use_fast = string_2_bool(arguments['use_fast']) kaldiread = KaldiReadIn(in_scp_file) kaldiwrite = KaldiWriteOut(out_ark_file) log('> ... setting up the CNN convolution layers') input_shape_train = conv_configs[0]['input_shape'] input_shape_1 = (input_shape_train[1], input_shape_train[2], input_shape_train[3]) rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(rng.randint(2 ** 30)) cfg.init_activation() cnn = CNN_Forward(numpy_rng = rng, theano_rng=theano_rng, conv_layer_configs = conv_configs, use_fast = use_fast) #cnn = CNNV(numpy_rng = rng, theano_rng=theano_rng, cfg=cfg)