def Dense(shape, init=init_default_or_glorot_uniform, activation=activation_default_or_None, input_rank=None, map_rank=None, bias=bias_default_or_True, init_bias=init_bias_default_or_0): activation = _resolve_activation(activation) bias = bias if _is_given(bias) else _current_default_options.bias output_shape = _as_tuple(shape) if input_rank is not None and map_rank is not None: raise ValueError( "Dense: input_rank and map_rank cannot be specified at the same time." ) # determine meaning of axes # W gets dimension (input_shape + shape) # where input_shape is determined as: # - by default, equal to the dimensions of the input passed to Dense() # - if input_rank is given, then the last 'input_rank' dimensions of the input (all others are not reduced over) # - if map_rank is given, then the all but the first 'map_rank' dimensions of the input (those are not reduced over) # where input_rank and map_rank are mutuallly exclusive. #output_rank = -len(output_shape) # support outputs with tensor layouts # BUGBUG: Should this be a negative number now, since output is the last axis in Python? output_rank = len(output_shape) # support outputs with tensor layouts # If input_rank not given then pass a single _INFERRED; map_rank if given will determine the input_rank. # The dimension inference may still create multiple axes. input_shape = _INFERRED * (input_rank if input_rank is not None else 1) if input_rank is not None: UntestedBranchError("Dense, input_rank option not implemented") infer_input_rank_to_map = -1 # means map_rank is not specified; input_rank rules elif map_rank is None: infer_input_rank_to_map = 0 # neither given: default to 'infer W to use all input dims' else: UntestedBranchError("Dense, map_rank option not implemented") infer_input_rank_to_map = map_rank # infer W to use all input dims except the first static 'map_rank' ones # parameters bound to this Function init_weights = _initializer_for(init, Record(output_rank=output_rank)) W = Parameter(input_shape + output_shape, init=init_weights, name='W') b = Parameter(output_shape, init=init_bias, name='b') if bias else None # expression of this function x = Placeholder(name='dense_arg') apply_x = times(x, W, output_rank=output_rank, infer_input_rank_to_map=infer_input_rank_to_map) if b: apply_x = apply_x + b apply_x = apply_x >> activation return Block(apply_x, 'Dense', Record(W=W, b=b))
def Convolution(filter_shape, # e.g. (3,3) num_filters=None, # e.g. 64 or None (which means 1 channel and don't add a dimension_ activation=activation_default_or_None, init=init_default_or_glorot_uniform, pad=pad_default_or_False, strides=1, sharing=True, # (must be True currently) bias=bias_default_or_True, init_bias=init_bias_default_or_0, reduction_rank=1, # (must be 1 currently) transpose=False, # (must be False currently) max_temp_mem_size_in_samples=0): #UntestedBranchError("Convolution") activation = _resolve_activation(activation) pad = pad if _is_given(pad ) else _current_default_options.pad bias = bias if _is_given(bias) else _current_default_options.bias # TODO: there must be a Python trick to do this as a function call on locals or so if reduction_rank != 1: NotImplementedError("Convolution: reduction_rank other than 1 currently not supported") if transpose: NotImplementedError("Convolution: transpose option currently not supported") if not sharing: NotImplementedError("Convolution: sharing option currently must be True") output_channels_shape = _as_tuple(num_filters) output_rank = len(output_channels_shape) filter_rank = len(filter_shape) kernel_shape = _INFERRED * reduction_rank + filter_shape # kernel := filter plus reductionDims # parameters bound to this Function #init_kernel = glorot_uniform(filter_rank=-filter_rank, output_rank=1) init_kernel = _initializer_for(init, Record(filter_rank=filter_rank, output_rank=-1)) # BUGBUG: It is very confusing that output_rank is negative, esp. since that means count from the start. Solution: add a flag W = Parameter(output_channels_shape + kernel_shape, init=init_kernel, name='W') # (K, C, H, W) aka [ W x H x C x K ] b = Parameter(output_channels_shape + (1,) * len(filter_shape), init=init_bias, name='b') if bias else None # (K, 1, 1) aka [ 1 x 1 x K ] # expression x = Placeholder(name='convolution_arg') # TODO: update the parameter order of convolution() to match the optional ones as in here? (options order matches Keras) apply_x = convolution (W, x, strides=_as_tuple(strides), sharing=_as_tuple(sharing), auto_padding=_as_tuple(pad), # TODO: can we rename auto_padding to pad? transpose=transpose, max_temp_mem_size_in_samples=max_temp_mem_size_in_samples) if bias: apply_x = apply_x + b apply_x = apply_x >> activation return Block(apply_x, 'Convolution', Record(W=W, b=b))
def Recurrence(over, go_backwards=False, initial_state=initial_state_default_or_None): # helper to compute previous value # can take a single Variable/Function or a tuple initial_state = initial_state if _is_given(initial_state) else _current_default_options.initial_state # if initial state is given and a numeric constant, then turn it into a Constant() object if np.isscalar(initial_state): initial_state = Constant(initial_state, shape=(1)) # TODO: This should be automatically done inside the API. def previous_hook(state): if isinstance (state, tuple): # if multiple then apply to each element return tuple([previous_hook(s) for s in state]) # not a tuple: must be a 'scalar', i.e. a single element return past_value (state, initial_state) if not go_backwards else \ future_value(state, initial_state) x = Placeholder(name='recurrence_arg') state_forward = over.create_placeholder() # create a placeholder or a tuple of placeholders prev_state = previous_hook(state_forward) # delay (h, c) f_x_h_c = over(x, prev_state) # apply the recurrent over # this returns a Function (x, (h_prev, c_prev)) -> (h, c) h_c = f_x_h_c.outputs replacements = { value_forward: value for (value_forward, value) in zip(list(_as_tuple(state_forward)), h_c) } f_x_h_c.replace_placeholders(replacements) # resolves state_forward := h_c h = f_x_h_c.outputs[0] # 'h' is a Variable (the output of a Function that computed it) if _trace_layers: _log_node(h) _log_node(combine([h.owner])) apply_x = combine([h]) # the Function that yielded 'h', so we get to know its inputs # apply_x is a Function x -> h return Block(apply_x, 'Recurrence', Record(over=over))
def Stabilizer( steepness=4, enable_self_stabilization=enable_self_stabilization_default_or_False): if _is_given(enable_self_stabilization): raise NotImplementedError( 'Stagbilizer: enable_self_stabilization flag not implemented yet') #enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization #if not enable_self_stabilization: # disabled (typically through global option) # return identity # parameters bound to this Function param = Parameter((1), init=0.99537863, name='stabilizer_param' ) # 1/steepness*ln (e^steepness-1) for steepness==4 #param = Parameter((1), init=1, name='stabilizer_param') # 1/steepness*ln (e^steepness-1) for steepness==4 # TODO: compute this strange value directly in Python # expression x = Placeholder(name='stabilizer_arg') # sharpened Softplus: 1/steepness ln(1+e^{steepness*beta}) # this behaves linear for weights around 1, yet guarantees positiveness # TODO: risk of confusion; can these functions be namespaced? beta = log(1 + exp(steepness * param)) * ( 1 / steepness ) # perf BUGBUG: "log() / steepness" should optimize to the samething apply_x = beta * x return Block(apply_x, 'Stabilizer', Record(beta=beta))
def __init__(self, deserializers=None, randomize=True, randomization_window=DEFAULT_RANDOMIZATION_WINDOW_IN_CHUNKS, sample_based_randomization_window=False, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES, multithreaded_deserializer=None): if not isinstance(deserializers, (list, tuple)): deserializers = [deserializers ] # allow passing a single item or a list reader_config = _ReaderConfig( deserializers=deserializers, randomize=randomize, randomization_window=randomization_window, sample_based_randomization_window=sample_based_randomization_window, epoch_size=epoch_size, distributed_after=distributed_after, multithreaded_deserializer=multithreaded_deserializer) source = reader_config.minibatch_source() # transplant into this class instance self.__dict__ = source.__dict__ # transplant all members of deserializers into a record called streams streams = {} for si in self.stream_infos(): streams[si.m_name] = si from ..utils import Record self.streams = Record(**streams)
def Embedding(shape=None, init=None, weights=None): if init is not None or weights is not None: raise ValueError('Embedding: init and weights options are mutually exclusive') # parameters bound to this Function: # no weights given: learn the embedding if weights is None: if shape is None: raise ValueError('Embedding: output shape must be specified') if init is None: init = init_default_or_glorot_uniform shape = _as_tuple(shape) weight_shape = _INFERRED + shape E = Parameter(weight_shape, init=init, name='E') # weights given: use them as constant else: UntestedBranchError("Embedding, from constant") import numpy as np if not isinstance(weights, array): # TODO: is this the correct test for a numpy array UntestedBranchError("Embedding, from constant that is not an array") # TODO: can 'weights' be a CNTK object? Then how to do this? raise ValueError('Embedding: weights must be a numpy array') weight_shape = np.shape(weights) if shape is not None: # user may give shape, then it must match if len(shape) >= len(weight_shape) or weight_shape[-len(shape):] != shape: raise ValueError('Embedding: shape parameter must match weights') E = Constant(weights, name='E') # expression x = Placeholder(name='embedding_arg') apply_x = times(x, E) return Block(apply_x, 'Embedding', Record(E=E))
def StreamDef(field, shape=None, is_sparse=False, transforms=None): # note: the names used inside here are required by the C++ code which looks them up in a dictionary config = dict(stream_alias=field, is_sparse=is_sparse) if shape is not None: config['dim'] = shape if transforms is not None: config['transforms'] = transforms return Record(**config)
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT): if not isinstance(deserializers, (list,tuple)): deserializers = [deserializers] # allow passing a single item or a list reader_config = ReaderConfig(deserializers=deserializers, randomize=randomize, epoch_size=epoch_size) source = minibatch_source(reader_config) # transplant into this class instance self.__dict__ = source.__dict__ # transplant all members of deserializers into a record called streams streams = {} for si in self.stream_infos(): streams[si.m_name] = si from ..utils import Record self.streams = Record(**streams)
def StreamDef(field=None, shape=None, is_sparse=False, transforms=None, context=None, scp=None, mlf=None, broadcast=None): ''' Configuration of a stream for use with the builtin Deserializers. The meanings of some configuration keys have a mild dependency on the exact deserializer, and certain keys are meaningless for certain deserializers. Args: field (str): this is the name of the stream: * for CTFDeserializer the name is inside the CTF file * for ImageDeserializer the acceptable names are `image` or `label` * for HTKFeatureDeserializer and HTKMLFDeserializer only the default value of None is acceptable shape (int, tuple): dimensions of this stream. HTKFeatureDeserializer, HTKMLFDeserializer, and CTFDeserializer read data as flat arrays. If you need different shapes you can :func:`~cntk.ops.reshape` it later. is_sparse (bool): whether the provided data is sparse. `False` by default, unless mlf is provided. transforms (list): list of transforms to be applied by the Deserializer. Currently only ImageDeserializer supports transforms. context (tuple): left and right context to consider when reading in HTK data. Only supported by HTKFeatureDeserializer. scp (str, list): scp files for HTK data mlf (str, list): mlf files for HTK data broadcast (bool): whether the features in this stream should be broadcast to the whole sequence (useful in e.g. ivectors with HTK) ''' config = dict(stream_alias=field, is_sparse=is_sparse) if shape is not None: config['dim'] = shape if transforms is not None: config['transforms'] = transforms if context is not None: config['context'] = context if scp is not None: config['scp'] = scp if mlf is not None: config['mlf'] = mlf config['is_sparse'] = True if broadcast is not None: config['broadcast'] = broadcast return Record(**config)
def LayerStack(N, constructor): from inspect import signature takes_arg = len(signature(constructor).parameters) > 0 # helper to call the layer constructor def call(i): if takes_arg: return constructor(i) # takes an arg: pass it else: return constructor() # takes no arg: call without, that's fine too layers = [call(i) for i in range(N)] apply_x = Sequential(layers) return Block(apply_x, 'LayerStack', Record(layers=layers))
def LayerNormalization(initial_scale=1, initial_bias=0): UntestedBranchError("LayerNormalization") # parameters bound to this Function scale = Parameter((1), init=initial_scale) # TODO: offer Softplus version for protection, as for Stabilizer bias = Parameter((1), init=initial_bias) # expression x = Placeholder(name='layer_normalization_arg') mean = reduce_mean (x) # normalize w.r.t. actual sample statistics x0 = x - mean; std = sqrt (reduce_mean (x0 * x0)) #x_hat = element_divide (x0, std) x_hat = x0 / std apply_x = x_hat * scale + bias # denormalize with learned parameters return Block(apply_x, 'LayerNormalization', Record(scale=scale, bias=bias))
def __init__(self, layer_subset=None, **kwargs): if layer_subset: raise NotImplementedError('default_options not yet implemented per layer') new_options = kwargs # TODO: layer subset # dict() make a deep copy of _current_default_options.__dict__ into merged_options. merged_options = dict(_current_default_options.__dict__) # we merge the newly provided options with the current defaults # Only names that already exist may be used (cannot create new default variables). # TODO: we may consider a more generic mechanism where one can overwrite all, if Python allows that. for key in new_options: try: merged_options[key] except: raise TypeError("default_options() got an unexpected keyword argument '{}'".format(key)) merged_options[key] = new_options[key] self.new_default_options = Record(**merged_options) # this is the new options record that entering the with section will activate
def Sequential(layers): if not isinstance( layers, (list, tuple) ): # to support nested lists, run every item recursively through Sequential() return layers #apply_x = identity #for layer in layers: # def _is_string(obj): # return isinstance(obj, str) # TODO: different in Python 2 # if _is_string(layer): # UntestedBranchError("Sequential variable names") # BUGBUG: name gets lost in both Variable and resulting function once applied, so dict not usable for now for data, only for parameers # apply_x = combine([apply_x.output], name=layer) # attrs[layer] = apply_x # else: # apply_x = apply_x >> Sequential(layer) #attrs['layers'] = [layer for layer in layers if not _is_string(layer)] from functools import reduce apply_x = reduce(lambda f, g: f >> Sequential(g), layers, identity) attrs = Record(layers=layers) return Block(apply_x, 'Sequential', attrs)
def BatchNormalization(map_rank=None, # if given then normalize only over this many dimensions. E.g. 1 to tie all (h,w) in a (C, H, W)-shaped input init_scale=1, normalization_time_constant=5000, blend_time_constant=0, epsilon=0.00001, use_cntk_engine=True): # TODO: make map_rank a default option, once per-layer type defaults are implemented # parameters bound to this Function norm_shape = _INFERRED if map_rank is not None and map_rank != 1: UntestedBranchError("BatchNormalization map_rank can only be 1 or None for now") scale = Parameter(norm_shape, init=init_scale) bias = Parameter(norm_shape, init=0) run_mean = Constant(0, shape=norm_shape) # note: these are not really constants; they are updated differently run_variance = Constant(0, shape=norm_shape) # expression x = Placeholder(name='batch_normalization_arg') apply_x = batch_normalization(x, scale, bias, run_mean, run_variance, map_rank == 1, normalization_time_constant=normalization_time_constant, blend_time_constant=blend_time_constant, epsilon=epsilon, #use_cntk_engine=use_cntk_engine) use_cudnn_engine=not use_cntk_engine) return Block(apply_x, 'BatchNormalization', Record(scale=scale, bias=bias, mean=run_mean, variance=run_variance))
def StreamDef(shape, is_sparse, alias): from cntk.utils import Record return Record(dim=shape, is_sparse=is_sparse, stream_alias=alias)
def _Infer(shape, axis): return Record(shape=_as_tuple(shape), axis=axis, with_shape=lambda new_shape: _Infer(new_shape, axis))
_INFERRED = (InferredDimension,) # as a tuple, makes life easier # call this for all untested branches def UntestedBranchError(name): raise NotImplementedError("Untested code branch: " + name) # This record contains the defaults for a number of optional parameters to layers. # These can be overwritten temporarily by saying # with default_options(init=..., ...): # # code block within which the changed defaults are active _current_default_options = Record( init=glorot_uniform(), activation=None, # Dense() and Convolution() have no activation by default pad=False, # BUGBUG: not done for pooling at present. Need a special default? How to name? # ^^ This should be addressed by allowing configs per layer type. # To be addressed as a per-layer default. See default_options below. bias=True, init_bias=0, enable_self_stabilization=False, # Stabilizer() and LSTM() initial_state=None, # Recurrence() use_peepholes=False # LSTM() ) _default_sentinel = '(default)' # This is a singleton sentinel value we recognize and replace in _initializer_for() _default_sentinel_init = '(init default)' # use different ones for init andinit_bias so we can distinguish them in _initializer_for() _default_sentinel_init_bias = '(init_bias default)' # in function signatures we use symbols that indicate the default default in their name init_default_or_glorot_uniform = _default_sentinel_init activation_default_or_None = _default_sentinel init_bias_default_or_0 = _default_sentinel_init_bias bias_default_or_True = _default_sentinel pad_default_or_False = _default_sentinel
def create_reader(path): return CNTKTextFormatMinibatchSource(path, streams=Record( query = StreamDef(shape=input_dim, is_sparse=True, alias='S0'), intent_unused = StreamDef(shape=num_intents, is_sparse=True, alias='S1'), # BUGBUG: unused, and should infer dim slot_labels = StreamDef(shape=label_dim, is_sparse=True, alias='S2') ))
def _Infer(shape, axis): from cntk.utils import Record, _as_tuple return Record(shape=_as_tuple(shape), axis=axis, with_shape = lambda new_shape: _Infer(new_shape, axis))
def __enter__(self): _OptionsContextManager._current_default_overrides = Record( _scope=self.scope, _outer=_OptionsContextManager._current_default_overrides, **self.kwargs) # insert new scope at head of link return self