Example #1
0
def Placeholder(_inf, name='placeholder'):
    p = placeholder_variable(shape=_as_tuple(_inf.shape),
                             dynamic_axes=_inf.axis,
                             name=name)
    _name_node(p, name)
    if _trace_layers:
        print("new " + _node_description(p))
    return p
Example #2
0
def Placeholder(shape=None, name='placeholder'):
    if shape is not None:
        p = placeholder_variable(shape=shape, name=name) # TODO: use (*args, **kwargs)?
    else:
        p = placeholder_variable(name=name) # TODO: use (*args, **kwargs)?
    _name_node(p, name)
    if _trace_layers:
        print("new " + _node_description(p))
    return p
Example #3
0
def LSTM(shape, cell_shape=None, use_peepholes=use_peepholes_default_or_False,
         init=init_default_or_glorot_uniform, init_bias=init_bias_default_or_0,
         enable_self_stabilization=enable_self_stabilization_default_or_False): # (x, (h, c))

    use_peepholes             = use_peepholes             if _is_given(use_peepholes)             else _current_default_options.use_peepholes
    enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization
    has_projection = cell_shape is not None
    has_aux = False

    if has_aux:
        UntestedBranchError("LSTM, has_aux option")

    shape = _as_tuple(shape)

    cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape
    if len(shape) != 1 or len(cell_shape) != 1:
        raise ValueError("LSTM: shape and cell_shape must be vectors (rank-1 tensors)")
        # otherwise we'd need to fix slicing and Param initializers

    stack_axis = -1  # stacking along the fastest-changing one, to match BS
    # determine stacking dimensions
    cell_shape_list = list(cell_shape)
    stacked_dim = cell_shape_list[0]
    cell_shape_list[stack_axis] = stacked_dim*4
    cell_shape_stacked = tuple(cell_shape_list)  # patched dims with stack_axis duplicated 4 times

    # parameters
    b  = Parameter(            cell_shape_stacked, init=init_bias, name='b')                              # a bias
    W  = Parameter(_INFERRED + cell_shape_stacked, init=init,      name='W')                              # input
    A  = Parameter(_INFERRED + cell_shape_stacked, init=init,      name='A') if has_aux else None         # aux input (optional)
    H  = Parameter(shape     + cell_shape_stacked, init=init,      name='H')                              # hidden-to-hidden
    Ci = Parameter(            cell_shape,         init=init,      name='Ci') if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Cf = Parameter(            cell_shape,         init=init,      name='Cf') if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Co = Parameter(            cell_shape,         init=init,      name='Co') if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}

    Wmr = Parameter(cell_shape + shape, init=init) if has_projection else None  # final projection

    Sdh = Stabilizer() if enable_self_stabilization else identity
    Sdc = Stabilizer() if enable_self_stabilization else identity
    Sct = Stabilizer() if enable_self_stabilization else identity
    Sht = Stabilizer() if enable_self_stabilization else identity

    def create_hc_placeholder():
        # we pass the known dimensions here, which makes dimension inference easier
        return (Placeholder(shape=shape, name='hPh'), Placeholder(shape=cell_shape, name='cPh')) # (h, c)

    # parameters to model function
    x = Placeholder(name='lstm_block_arg')
    prev_state = create_hc_placeholder()

    # formula of model function
    dh, dc = prev_state

    dhs = Sdh(dh)  # previous values, stabilized
    dcs = Sdc(dc)
    # note: input does not get a stabilizer here, user is meant to do that outside

    # projected contribution from input(s), hidden, and bias
    proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \
            b + times(x, W) + times(dhs, H)

    it_proj  = slice (proj4, stack_axis, 0*stacked_dim, 1*stacked_dim)  # split along stack_axis
    bit_proj = slice (proj4, stack_axis, 1*stacked_dim, 2*stacked_dim)
    ft_proj  = slice (proj4, stack_axis, 2*stacked_dim, 3*stacked_dim)
    ot_proj  = slice (proj4, stack_axis, 3*stacked_dim, 4*stacked_dim)

    # add peephole connection if requested
    def peep(x, c, C):
        return x + C * c if use_peepholes else x

    it = sigmoid (peep (it_proj, dcs, Ci))        # input gate(t)
    bit = it * tanh (bit_proj)                    # applied to tanh of input network

    ft = sigmoid (peep (ft_proj, dcs, Cf))        # forget-me-not gate(t)
    bft = ft * dc                                 # applied to cell(t-1)

    ct = bft + bit                                # c(t) is sum of both

    ot = sigmoid (peep (ot_proj, Sct(ct), Co))    # output gate(t)
    ht = ot * tanh (ct)                           # applied to tanh(cell(t))

    c = ct                                        # cell value
    h = times(Sht(ht), Wmr) if has_projection else \
        ht

    _name_node(h, 'h')
    if _trace_layers:
        _log_node(h)  # this looks right
    _name_node(c, 'c')

    # TODO: figure out how to do scoping, and also rename all the apply... to expression
    apply_x_h_c = combine ([h, c])
    # return to caller a helper function to create placeholders for recurrence
    # Note that this function will only exist in the object returned here, but not any cloned version of it.
    apply_x_h_c.create_placeholder = create_hc_placeholder
    #return Block(apply_x_h_c, 'LSTM') # BUGBUG: fails with "RuntimeError: A Function instance with more than one output cannot be implicitly converted to a Variable"
    return apply_x_h_c
Example #4
0
def Input(*args, **kwargs):
    return _name_node(input_variable(*args, **kwargs), 'input')
Example #5
0
def Constant(init, shape=None, name=''):
    p = constant (init, shape=shape, name=name) # TODO: use (*args, **kwargs)
    return _name_node(p, 'constant')   # these are factory methods for things with state
Example #6
0
def Parameter(shape, init, name=''):
    init = _initializer_for(init)
    p = parameter(shape, init=init, name=name) # TODO: use (*args, **kwargs)
    return _name_node(p, 'parameter')   # these are factory methods for things with state
Example #7
0
def _name_and_extend_Function(f, name=None):
    if name is not None:
        _name_node(f, name)
    _extend_Function(f)
Example #8
0
def LSTM(shape,
         _inf,
         cell_shape=None,
         use_peepholes=False,
         init=_default_initializer,
         init_bias=0,
         enable_self_stabilization=False):  # (x, (h, c))
    has_projection = cell_shape is not None
    has_aux = False

    if has_aux:
        UntestedBranchError("LSTM, has_aux option")
    if enable_self_stabilization:
        UntestedBranchError("LSTM, enable_self_stabilization option")

    shape = _as_tuple(shape)

    cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape

    #stack_axis = -1  #
    stack_axis = 0  # BUGBUG: should be -1, i.e. the fastest-changing one, to match BS
    # determine stacking dimensions
    cell_shape_list = list(cell_shape)
    stacked_dim = cell_shape_list[0]
    cell_shape_list[stack_axis] = stacked_dim * 4
    cell_shape_stacked = tuple(
        cell_shape_list)  # patched dims with stack_axis duplicated 4 times

    # parameters
    b = Parameter(cell_shape_stacked, init=init_bias, name='b')  # a bias
    W = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='W')  # input
    A = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='A') if has_aux else None  # aux input (optional)
    H = Parameter(shape + cell_shape_stacked, init=init,
                  name='H')  # hidden-to-hidden
    Ci = Parameter(
        cell_shape, init=init, name='Ci'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Cf = Parameter(
        cell_shape, init=init, name='Cf'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Co = Parameter(
        cell_shape, init=init, name='Co'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}

    Wmr = ParameterTensor(
        cell_shape + shape, init=init, init_value_scale=init_value_scale
    ) if has_projection else None  # final projection

    Sdh = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))
    Sdc = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sct = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sht = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))

    def create_hc_placeholder():
        return (Placeholder(_inf=_inf.with_shape(shape), name='hPh'),
                Placeholder(_inf=_inf.with_shape(cell_shape),
                            name='cPh'))  # (h, c)

    # parameters to model function
    x = Placeholder(_inf=_inf, name='lstm_block_arg')
    prev_state = create_hc_placeholder()

    # formula of model function
    dh, dc = prev_state

    dhs = Sdh(dh)  # previous values, stabilized
    dcs = Sdc(dc)
    # note: input does not get a stabilizer here, user is meant to do that outside

    # projected contribution from input(s), hidden, and bias
    proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \
            b + times(x, W) + times(dhs, H)

    it_proj = slice(proj4, stack_axis, 0 * stacked_dim,
                    1 * stacked_dim)  # split along stack_axis
    bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim)
    ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim)
    ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim)

    # add peephole connection if requested
    def peep(x, c, C):
        return x + C * c if use_peepholes else x

    it = sigmoid(peep(it_proj, dcs, Ci))  # input gate(t)
    bit = it * tanh(bit_proj)  # applied to tanh of input network

    ft = sigmoid(peep(ft_proj, dcs, Cf))  # forget-me-not gate(t)
    bft = ft * dc  # applied to cell(t-1)

    ct = bft + bit  # c(t) is sum of both

    ot = sigmoid(peep(ot_proj, Sct(ct), Co))  # output gate(t)
    ht = ot * tanh(ct)  # applied to tanh(cell(t))

    c = ct  # cell value
    h = times(Sht(ht), Wmr) if has_projection else \
        ht

    _name_node(h, 'h')
    if _trace_layers:
        _log_node(h)  # this looks right
    _name_node(c, 'c')

    # TODO: figure out how to do scoping, and also rename all the apply... to expression
    apply_x_h_c = combine([h, c])
    # return to caller a helper function to create placeholders for recurrence
    apply_x_h_c.create_placeholder = create_hc_placeholder
    _name_and_extend_Function(apply_x_h_c, 'LSTM')
    return apply_x_h_c
Example #9
0
def Constant(init, shape=None, name=''):
    p = constant(init, shape, name=name)
    return _name_node(
        p, 'constant')  # these are factory methods for things with state
Example #10
0
def Parameter(shape, init, name=''):
    if init is None:
        raise "Parameter: init cannot be None"
    p = parameter(shape, init=init, name=name)
    return _name_node(
        p, 'parameter')  # these are factory methods for things with state