def Placeholder(_inf, name='placeholder'): p = placeholder_variable(shape=_as_tuple(_inf.shape), dynamic_axes=_inf.axis, name=name) _name_node(p, name) if _trace_layers: print("new " + _node_description(p)) return p
def Placeholder(shape=None, name='placeholder'): if shape is not None: p = placeholder_variable(shape=shape, name=name) # TODO: use (*args, **kwargs)? else: p = placeholder_variable(name=name) # TODO: use (*args, **kwargs)? _name_node(p, name) if _trace_layers: print("new " + _node_description(p)) return p
def LSTM(shape, cell_shape=None, use_peepholes=use_peepholes_default_or_False, init=init_default_or_glorot_uniform, init_bias=init_bias_default_or_0, enable_self_stabilization=enable_self_stabilization_default_or_False): # (x, (h, c)) use_peepholes = use_peepholes if _is_given(use_peepholes) else _current_default_options.use_peepholes enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization has_projection = cell_shape is not None has_aux = False if has_aux: UntestedBranchError("LSTM, has_aux option") shape = _as_tuple(shape) cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape if len(shape) != 1 or len(cell_shape) != 1: raise ValueError("LSTM: shape and cell_shape must be vectors (rank-1 tensors)") # otherwise we'd need to fix slicing and Param initializers stack_axis = -1 # stacking along the fastest-changing one, to match BS # determine stacking dimensions cell_shape_list = list(cell_shape) stacked_dim = cell_shape_list[0] cell_shape_list[stack_axis] = stacked_dim*4 cell_shape_stacked = tuple(cell_shape_list) # patched dims with stack_axis duplicated 4 times # parameters b = Parameter( cell_shape_stacked, init=init_bias, name='b') # a bias W = Parameter(_INFERRED + cell_shape_stacked, init=init, name='W') # input A = Parameter(_INFERRED + cell_shape_stacked, init=init, name='A') if has_aux else None # aux input (optional) H = Parameter(shape + cell_shape_stacked, init=init, name='H') # hidden-to-hidden Ci = Parameter( cell_shape, init=init, name='Ci') if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Cf = Parameter( cell_shape, init=init, name='Cf') if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Co = Parameter( cell_shape, init=init, name='Co') if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Wmr = Parameter(cell_shape + shape, init=init) if has_projection else None # final projection Sdh = Stabilizer() if enable_self_stabilization else identity Sdc = Stabilizer() if enable_self_stabilization else identity Sct = Stabilizer() if enable_self_stabilization else identity Sht = Stabilizer() if enable_self_stabilization else identity def create_hc_placeholder(): # we pass the known dimensions here, which makes dimension inference easier return (Placeholder(shape=shape, name='hPh'), Placeholder(shape=cell_shape, name='cPh')) # (h, c) # parameters to model function x = Placeholder(name='lstm_block_arg') prev_state = create_hc_placeholder() # formula of model function dh, dc = prev_state dhs = Sdh(dh) # previous values, stabilized dcs = Sdc(dc) # note: input does not get a stabilizer here, user is meant to do that outside # projected contribution from input(s), hidden, and bias proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \ b + times(x, W) + times(dhs, H) it_proj = slice (proj4, stack_axis, 0*stacked_dim, 1*stacked_dim) # split along stack_axis bit_proj = slice (proj4, stack_axis, 1*stacked_dim, 2*stacked_dim) ft_proj = slice (proj4, stack_axis, 2*stacked_dim, 3*stacked_dim) ot_proj = slice (proj4, stack_axis, 3*stacked_dim, 4*stacked_dim) # add peephole connection if requested def peep(x, c, C): return x + C * c if use_peepholes else x it = sigmoid (peep (it_proj, dcs, Ci)) # input gate(t) bit = it * tanh (bit_proj) # applied to tanh of input network ft = sigmoid (peep (ft_proj, dcs, Cf)) # forget-me-not gate(t) bft = ft * dc # applied to cell(t-1) ct = bft + bit # c(t) is sum of both ot = sigmoid (peep (ot_proj, Sct(ct), Co)) # output gate(t) ht = ot * tanh (ct) # applied to tanh(cell(t)) c = ct # cell value h = times(Sht(ht), Wmr) if has_projection else \ ht _name_node(h, 'h') if _trace_layers: _log_node(h) # this looks right _name_node(c, 'c') # TODO: figure out how to do scoping, and also rename all the apply... to expression apply_x_h_c = combine ([h, c]) # return to caller a helper function to create placeholders for recurrence # Note that this function will only exist in the object returned here, but not any cloned version of it. apply_x_h_c.create_placeholder = create_hc_placeholder #return Block(apply_x_h_c, 'LSTM') # BUGBUG: fails with "RuntimeError: A Function instance with more than one output cannot be implicitly converted to a Variable" return apply_x_h_c
def Input(*args, **kwargs): return _name_node(input_variable(*args, **kwargs), 'input')
def Constant(init, shape=None, name=''): p = constant (init, shape=shape, name=name) # TODO: use (*args, **kwargs) return _name_node(p, 'constant') # these are factory methods for things with state
def Parameter(shape, init, name=''): init = _initializer_for(init) p = parameter(shape, init=init, name=name) # TODO: use (*args, **kwargs) return _name_node(p, 'parameter') # these are factory methods for things with state
def _name_and_extend_Function(f, name=None): if name is not None: _name_node(f, name) _extend_Function(f)
def LSTM(shape, _inf, cell_shape=None, use_peepholes=False, init=_default_initializer, init_bias=0, enable_self_stabilization=False): # (x, (h, c)) has_projection = cell_shape is not None has_aux = False if has_aux: UntestedBranchError("LSTM, has_aux option") if enable_self_stabilization: UntestedBranchError("LSTM, enable_self_stabilization option") shape = _as_tuple(shape) cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape #stack_axis = -1 # stack_axis = 0 # BUGBUG: should be -1, i.e. the fastest-changing one, to match BS # determine stacking dimensions cell_shape_list = list(cell_shape) stacked_dim = cell_shape_list[0] cell_shape_list[stack_axis] = stacked_dim * 4 cell_shape_stacked = tuple( cell_shape_list) # patched dims with stack_axis duplicated 4 times # parameters b = Parameter(cell_shape_stacked, init=init_bias, name='b') # a bias W = Parameter(_inf.shape + cell_shape_stacked, init=init, name='W') # input A = Parameter(_inf.shape + cell_shape_stacked, init=init, name='A') if has_aux else None # aux input (optional) H = Parameter(shape + cell_shape_stacked, init=init, name='H') # hidden-to-hidden Ci = Parameter( cell_shape, init=init, name='Ci' ) if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Cf = Parameter( cell_shape, init=init, name='Cf' ) if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Co = Parameter( cell_shape, init=init, name='Co' ) if use_peepholes else None # cell-to-hiddden {note: applied elementwise} Wmr = ParameterTensor( cell_shape + shape, init=init, init_value_scale=init_value_scale ) if has_projection else None # final projection Sdh = Stabilizer(_inf=_inf.with_shape( shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(shape)) Sdc = Stabilizer(_inf=_inf.with_shape( cell_shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(cell_shape)) Sct = Stabilizer(_inf=_inf.with_shape( cell_shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(cell_shape)) Sht = Stabilizer(_inf=_inf.with_shape( shape)) if enable_self_stabilization else Identity( _inf=_inf.with_shape(shape)) def create_hc_placeholder(): return (Placeholder(_inf=_inf.with_shape(shape), name='hPh'), Placeholder(_inf=_inf.with_shape(cell_shape), name='cPh')) # (h, c) # parameters to model function x = Placeholder(_inf=_inf, name='lstm_block_arg') prev_state = create_hc_placeholder() # formula of model function dh, dc = prev_state dhs = Sdh(dh) # previous values, stabilized dcs = Sdc(dc) # note: input does not get a stabilizer here, user is meant to do that outside # projected contribution from input(s), hidden, and bias proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \ b + times(x, W) + times(dhs, H) it_proj = slice(proj4, stack_axis, 0 * stacked_dim, 1 * stacked_dim) # split along stack_axis bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim) ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim) ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim) # add peephole connection if requested def peep(x, c, C): return x + C * c if use_peepholes else x it = sigmoid(peep(it_proj, dcs, Ci)) # input gate(t) bit = it * tanh(bit_proj) # applied to tanh of input network ft = sigmoid(peep(ft_proj, dcs, Cf)) # forget-me-not gate(t) bft = ft * dc # applied to cell(t-1) ct = bft + bit # c(t) is sum of both ot = sigmoid(peep(ot_proj, Sct(ct), Co)) # output gate(t) ht = ot * tanh(ct) # applied to tanh(cell(t)) c = ct # cell value h = times(Sht(ht), Wmr) if has_projection else \ ht _name_node(h, 'h') if _trace_layers: _log_node(h) # this looks right _name_node(c, 'c') # TODO: figure out how to do scoping, and also rename all the apply... to expression apply_x_h_c = combine([h, c]) # return to caller a helper function to create placeholders for recurrence apply_x_h_c.create_placeholder = create_hc_placeholder _name_and_extend_Function(apply_x_h_c, 'LSTM') return apply_x_h_c
def Constant(init, shape=None, name=''): p = constant(init, shape, name=name) return _name_node( p, 'constant') # these are factory methods for things with state
def Parameter(shape, init, name=''): if init is None: raise "Parameter: init cannot be None" p = parameter(shape, init=init, name=name) return _name_node( p, 'parameter') # these are factory methods for things with state