def __init__(self, input, n_in, n_out):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """

        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = theano.shared(value=numpy.zeros((n_in, n_out),
                                                 dtype=theano.config.floatX),
                                name='W', borrow=True)
        # initialize the baises b as a vector of n_out 0s
        self.b = theano.shared(value=numpy.zeros((n_out,),
                                                 dtype=theano.config.floatX),
                               name='b', borrow=True)

        # compute vector of class-membership probabilities in symbolic form
        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

        # compute prediction as class whose probability is maximal in
        # symbolic form
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)

        # parameters of the model
        self.params = [self.W, self.b]
def test_pickle_unpickle_without_reoptimization():
    mode = theano.config.mode
    if mode in ["DEBUG_MODE", "DebugMode"]:
        mode = "FAST_RUN"
    x1 = T.fmatrix("x1")
    x2 = T.fmatrix("x2")
    x3 = theano.shared(numpy.ones((10, 10), dtype=floatX))
    x4 = theano.shared(numpy.ones((10, 10), dtype=floatX))
    y = T.sum(T.sum(T.sum(x1 ** 2 + x2) + x3) + x4)

    updates = OrderedDict()
    updates[x3] = x3 + 1
    updates[x4] = x4 + 1
    f = theano.function([x1, x2], y, updates=updates, mode=mode)

    # now pickle the compiled theano fn
    string_pkl = pickle.dumps(f, -1)

    # compute f value
    in1 = numpy.ones((10, 10), dtype=floatX)
    in2 = numpy.ones((10, 10), dtype=floatX)

    # test unpickle without optimization
    default = theano.config.reoptimize_unpickled_function
    try:
        # the default is True
        theano.config.reoptimize_unpickled_function = False
        f_ = pickle.loads(string_pkl)
        assert f(in1, in2) == f_(in1, in2)
    finally:
        theano.config.reoptimize_unpickled_function = default
Example #3
0
    def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation=T.tanh):
        self.input = input

        # initialize weights into this layer
        if W is None:
            W_values = np.asarray(
                rng.uniform(size=(n_in, n_out), low=-np.sqrt(6.0 / (n_in + n_out)), high=np.sqrt(6.0 / (n_in + n_out))),
                dtype=theano.config.floatX,
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name="W", borrow=True)

        # initialize bias term weights into this layer
        if b is None:
            b_values = np.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name="b", borrow=True)

        self.W = W
        self.b = b

        lin_output = T.dot(input, self.W) + self.b
        self.output = lin_output if activation is None else activation(lin_output)

        self.params = [self.W, self.b]
Example #4
0
def check_parameter(name, value):
    """Check, convert and extract inputs of a parameter value.

    This function wraps scalar or lists into a Theano shared variable, then
    acting as a parameter. Theano expressions are left unchanged.

    Parameters
    ----------
    * `name` [string]:
        The parameter name.

    * `value` [theano expression, list or scalar]:
        The parameter value.

    Returns
    -------
    * `value` [theano expression]:
        The parameter expression.

    * `parameters` [set of theano shared variables]:
        Set of base shared variables on which `value` depends.

    * `constants` [set of theano constants]:
        Set of base constants on which `value` depends.

    * `observeds` [set of theano tensor variables]:
        Set of base unset variables on which `value` depends.
    """
    parameters = set()
    constants = set()
    observeds = set()

    if isinstance(value, SharedVariable):
        parameters.add(value)
    elif isinstance(value, T.TensorConstant):
        constants.add(value)
    elif isinstance(value, T.TensorVariable):
        inputs = graph.inputs([value])

        for var in inputs:
            if isinstance(var, SharedVariable):
                parameters.add(var)
            elif isinstance(var, T.TensorConstant):
                constants.add(var)
            elif isinstance(var, T.TensorVariable):
                if not var.name:
                    raise ValueError("Observed variables must be named.")
                observeds.add(var)
    else:
        if isinstance(value, list):
            value = np.ndarray(value)

        if isinstance(value, np.ndarray):
            value = theano.shared(value, name=name)
        else:
            value = theano.shared(float(value), name=name)

        parameters.add(value)

    return value, parameters, constants, observeds
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
        # 需要覆盖每一个颜色通道
        assert image_shape[1] == filter_shape[1]
        self.input = input

        fan_in = numpy.prod(filter_shape[1:])

        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)

        W_bound = numpy.sqrt(6.0 / (fan_in + fan_out))
        # filter_shape是一个四维的张量
        self.W = theano.shared(
            numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX),
            borrow=True,
        )
        # 最后有几个feature需要几个b
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # 卷积输出
        conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape)

        # max-pooling输出
        pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True)  # 忽视边界:大小为除2并向下取整
        # pooling还是先加效果是一样的。
        self.output = T.tanh(pooled_out + self.b.dimshuffle("x", 0, "x", "x"))
        # 记录当前层的参数。
        self.params = [self.W, self.b]
        # 记录输入
        self.input = input
Example #6
0
    def __init__(self, x, n_in, n_out, activation, W=None, b=None):
        self.n_in = n_in
        self.n_out = n_out
        self.input = x

        W_values = numpy.asarray(
            numpy.random.uniform(
                low=-numpy.sqrt(6.0 / (n_in + n_out)), high=numpy.sqrt(6.0 / (n_in + n_out)), size=(n_in, n_out)
            ),
            dtype=theano.config.floatX,
        )

        if W == None:
            self.W = theano.shared(W_values, name="W%dx%d" % (n_in, n_out))
        else:
            self.W = W

        if b == None:
            self.b = theano.shared(numpy.zeros((n_out,), dtype=theano.config.floatX), name="b")
        else:
            self.b = b

        self.linear_output = T.dot(x, self.W)

        self.output = activation(self.linear_output + self.b)
Example #7
0
    def __init__(self, image_shape, filter_shape, poolsize, W=None, b=None, activation=linear):
        """
        params :image_shape: Input size ( batch_size, channel, weight, height )
        type :image_shape: tuple with length of 4

        params :filter_shape: filter size ( channel_out, channel_in, weight, height )
        type :filter_shape: tuple with length of 4

        params :poolsize : poolsize 
        type :poolsize:  int 

        params :W: filter in 4 dimension array ( channel_out, channel_in, weight, height )
        type :W: numpy.ndarray

        params :b: bias vector ( channel_out )
        type :b: numpy.ndarray
        """
        assert W != None
        assert b != None
        assert image_shape[1] == filter_shape[1]

        self.image_shape = image_shape
        self.filter_shape = filter_shape
        self.poolsize = poolsize
        self.activation = activation

        self.W = theano.shared(np.asarray(W, dtype=theano.config.floatX))
        self.b = theano.shared(np.asarray(b, dtype=theano.config.floatX))

        x = T.tensor4("x")
        conv_out = conv.conv2d(
            input=x, filters=W, filter_shape=filter_shape, image_shape=image_shape, border_mode="valid"
        )
        output = conv_out + self.b.dimshuffle("x", 0, "x", "x")
        self.conv = theano.function([x], output, allow_input_downcast=True)
Example #8
0
    def shared_dataset(data_xy, borrow=True):

        data_x, data_y = data_xy
        shared_x = theano.shared(np.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
        shared_y = theano.shared(np.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)

        return shared_x, T.cast(shared_y, "int32")
Example #9
0
def rmsprop(lr, tparams, grads, x, mask, y, cost):
    zipped_grads = [
        theano.shared(p.get_value() * numpy_floatX(0.0), name="%s_grad" % k) for k, p in tparams.iteritems()
    ]
    running_grads = [
        theano.shared(p.get_value() * numpy_floatX(0.0), name="%s_rgrad" % k) for k, p in tparams.iteritems()
    ]
    running_grads2 = [
        theano.shared(p.get_value() * numpy_floatX(0.0), name="%s_rgrad2" % k) for k, p in tparams.iteritems()
    ]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function([x, mask, y], cost, updates=zgup + rgup + rg2up, name="rmsprop_f_grad_shared")

    updir = [theano.shared(p.get_value() * numpy_floatX(0.0), name="%s_updir" % k) for k, p in tparams.iteritems()]
    updir_new = [
        (ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
        for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)
    ]
    param_up = [(p, p + udn[1]) for p, udn in zip(tparams.values(), updir_new)]
    f_update = theano.function(
        [lr], [], updates=updir_new + param_up, on_unused_input="ignore", name="rmsprop_f_update"
    )

    return f_grad_shared, f_update
Example #10
0
def adadelta(lr, tparams, grads, inp, cost):
    zipped_grads = [
        theano.shared(p.get_value() * numpy.float32(0.0), name="%s_grad" % k) for k, p in tparams.iteritems()
    ]
    running_up2 = [
        theano.shared(p.get_value() * numpy.float32(0.0), name="%s_rup2" % k) for k, p in tparams.iteritems()
    ]
    running_grads2 = [
        theano.shared(p.get_value() * numpy.float32(0.0), name="%s_rgrad2" % k) for k, p in tparams.iteritems()
    ]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inp, cost, updates=zgup + rg2up, profile=profile)

    updir = [
        -tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
        for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)
    ]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]

    f_update = theano.function([lr], [], updates=ru2up + param_up, on_unused_input="ignore", profile=profile)

    return f_grad_shared, f_update
Example #11
0
def adam(lr, tparams, grads, inp, cost):
    gshared = [theano.shared(p.get_value() * 0.0, name="%s_grad" % k) for k, p in tparams.iteritems()]
    gsup = [(gs, g) for gs, g in zip(gshared, grads)]

    f_grad_shared = theano.function(inp, cost, updates=gsup, profile=profile)

    lr0 = 0.0002
    b1 = 0.1
    b2 = 0.001
    e = 1e-8

    updates = []

    i = theano.shared(numpy.float32(0.0))
    i_t = i + 1.0
    fix1 = 1.0 - b1 ** (i_t)
    fix2 = 1.0 - b2 ** (i_t)
    lr_t = lr0 * (tensor.sqrt(fix2) / fix1)

    for p, g in zip(tparams.values(), gshared):
        m = theano.shared(p.get_value() * 0.0)
        v = theano.shared(p.get_value() * 0.0)
        m_t = (b1 * g) + ((1.0 - b1) * m)
        v_t = (b2 * tensor.sqr(g)) + ((1.0 - b2) * v)
        g_t = m_t / (tensor.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))

    f_update = theano.function([lr], [], updates=updates, on_unused_input="ignore", profile=profile)

    return f_grad_shared, f_update
def get_rnn_kwargs(pre_rnn, args):
    kwargs = OrderedDict()
    init_states = {}
    if args.rnn_type == "lstm":
        init_cells = {}
    for d in range(args.layers):
        if d > 0:
            suffix = RECURRENTSTACK_SEPARATOR + str(d)
        else:
            suffix = ""
        if args.skip_connections:
            kwargs["inputs" + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs["inputs"] = pre_rnn
        init_states[d] = theano.shared(
            numpy.zeros((args.mini_batch_size, args.state_dim)).astype(floatX), name="state0_%d" % d
        )
        if args.rnn_type == "lstm":
            init_cells[d] = theano.shared(
                numpy.zeros((args.mini_batch_size, args.state_dim)).astype(floatX), name="cell0_%d" % d
            )
        kwargs["states" + suffix] = init_states[d]
        if args.rnn_type == "lstm":
            kwargs["cells" + suffix] = init_cells[d]
    inits = [init_states]
    if args.rnn_type == "lstm":
        inits.append(init_cells)
    return kwargs, inits
Example #13
0
    def __init__(
        self,
        input,
        inputLabels,
        nrLayers,
        initialWeights,
        initialBiases,
        activationFunction,
        classificationActivationFunction,
        visibleDropout,
        hiddenDropout,
        adversarial_training,
        adversarial_epsilon,
        adversarial_coefficient,
    ):
        self.input = input
        self.inputLabels = inputLabels
        # If we should use adversarial training or not
        self.adversarial_training = adversarial_training
        self.adversarial_coefficient = adversarial_coefficient
        self.adversarial_epsilon = adversarial_epsilon

        self.visibleDropout = visibleDropout
        self.hiddenDropout = hiddenDropout
        self.activationFunction = activationFunction
        self.classificationActivationFunction = classificationActivationFunction

        # Let's initialize the fields
        # The weights and biases, make them shared variables
        nrWeights = nrLayers - 1
        self.nrWeights = nrWeights
        biases = []
        weights = []
        for i in xrange(nrWeights):
            w = theano.shared(value=np.asarray(initialWeights[i], dtype=theanoFloat), name="W")
            weights.append(w)

            b = theano.shared(value=np.asarray(initialBiases[i], dtype=theanoFloat), name="b")
            biases.append(b)

        # Set the parameters of the object
        # Do not set more than this, these will be used for differentiation in the
        # gradient
        params = weights + biases
        self.biases = biases

        # Initialize the super class
        super(MiniBatchTrainer, self).__init__(params, weights)

        # Create a theano random number generator required to sample units for dropout
        self.theanoRng = RandomStreams(seed=np.random.randint(1, 1000))
        self.output = self.forwardPass(self.input)

        if self.adversarial_training:
            # TODO(mihaela): move this to the BatchTrainer superclass?
            # This would require moving the forward functionality there
            error = T.sum(self.costFun(self.output, self.inputLabels))
            grad_error = T.grad(error, self.input)
            adversarial_input = self.input + self.adversarial_epsilon * T.sgn(grad_error)
            self.adversarial_output = self.forwardPass(adversarial_input)
    def __init__(self, numpy_rng, input=None, n_visible=784, n_hidden=500, W=None, bhid=None, bvis=None):

        self.n_visible = n_visible
        self.n_hidden = n_hidden

        if not W:
            initial_W = numpy.asarray(
                numpy_rng.uniform(
                    low=-4 * numpy.sqrt(6.0 / (n_hidden + n_visible)),
                    high=4 * numpy.sqrt(6.0 / (n_hidden + n_visible)),
                    size=(n_visible, n_hidden),
                ),
                dtype=theano.config.floatX,
            )
            W = theano.shared(value=initial_W, name="W")

        if not bvis:
            bvis = theano.shared(value=numpy.zeros(n_visible, dtype=theano.config.floatX), name="bvis")

        if not bhid:
            bhid = theano.shared(value=numpy.zeros(n_hidden, dtype=theano.config.floatX), name="bvis")

        self.W = W
        self.b = bhid
        self.b_prime = bvis
        # using tied weights hence W_prime is W transpose
        self.W_prime = self.W.T

        if input == None:
            self.x = T.dmatrix(name="input")
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]
Example #15
0
def adam(loss, all_params, learn_rate=0.001, b1=0.9, b2=0.999, e=1e-8, gamma=1 - 1e-8):
    """ADAM update rules

    Kingma, Diederik, and Jimmy Ba. "Adam: A Method for Stochastic Optimization." arXiv preprint arXiv:1412.6980 (2014). http://arxiv.org/pdf/1412.6980v4.pdf
    """
    updates = []
    all_grads = theano.grad(loss, all_params)
    alpha = learn_rate
    t = theano.shared(np.float32(1.0))
    b1_t = b1 * gamma ** (t - 1.0)  # decay the first moment running average coefficient

    for theta_prev, g in zip(all_params, all_grads):
        m_prev = theano.shared(np.zeros(theta_prev.get_value().shape, dtype=theano.config.floatX))
        v_prev = theano.shared(np.zeros(theta_prev.get_value().shape, dtype=theano.config.floatX))

        m = b1_t * m_prev + (1.0 - b1_t) * g  # update biased first moment estimate
        v = b2 * v_prev + (1.0 - b2) * g ** 2  # update biased second raw moment estimate
        m_hat = m / (1.0 - b1 ** t)  # compute bias-corrected first moment estimate
        v_hat = v / (1.0 - b2 ** t)  # compute bias-corrected second raw moment estimate
        theta = theta_prev - (alpha * m_hat) / (T.sqrt(v_hat) + e)  # update parameters

        updates.append((m_prev, m))
        updates.append((v_prev, v))
        updates.append((theta_prev, theta))
    updates.append((t, t + 1.0))
    return updates
Example #16
0
    def test_dimshuffle_false_get_output_for(self, DummyInputLayer):
        try:
            from lasagne.layers.cuda_convnet import Conv2DCCLayer
        except ImportError:
            pytest.skip("cuda_convnet not available")

        # this implementation is tested against FilterActs instead of
        # theano.tensor.nnet.conv.conv2d because using the latter leads to
        # numerical precision errors.
        from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs

        filter_acts = FilterActs(stride=1, pad=0, partial_sum=1)

        input = theano.shared(floatX(np.random.random((4, 5, 5, 8))))
        kernel = theano.shared(floatX(np.random.random((4, 3, 3, 16))))

        input_layer = DummyInputLayer((4, 5, 5, 8))  # c01b instead of bc01
        layer = Conv2DCCLayer(
            input_layer, num_filters=16, filter_size=(3, 3), dimshuffle=False, W=kernel, b=None, nonlinearity=None
        )

        output = np.array(filter_acts(input, kernel).eval())

        actual = layer.get_output_for(input).eval()
        actual = np.array(actual)
        assert actual.shape == output.shape
        assert actual.shape == layer.output_shape
        assert np.allclose(actual, output)
Example #17
0
def sigmoid_layer(input, n_in, n_out, rng):
    w_init = rng.uniform(
        low=-4 * np.sqrt(6.0 / (n_in + n_out)), high=4 * np.sqrt(6.0 / (n_in + n_out)), size=(n_in, n_out)
    )
    W = theano.shared(np.asarray(w_init, dtype=theano.config.floatX), name="W", borrow=True)
    b = theano.shared(np.zeros((n_out,), dtype=theano.config.floatX), name="b", borrow=True)
    return T.nnet.sigmoid(T.dot(input, W) + b), [W, b]
Example #18
0
def sgd_updates_adadelta(params,cost,rho=0.95,epsilon=1e-6,norm_lim=9,word_vec_name='Words'):
    """
    adadelta update rule, mostly from
    https://groups.google.com/forum/#!topic/pylearn-dev/3QbKtCumAW4 (for Adadelta)
    """
    updates = OrderedDict({})
    exp_sqr_grads = OrderedDict({})
    exp_sqr_ups = OrderedDict({})
    gparams = []
    for param in params:
        empty = np.zeros_like(param.get_value())
        exp_sqr_grads[param] = theano.shared(value=as_floatX(empty),name="exp_grad_%s" % param.name)
        gp = T.grad(cost, param)
        exp_sqr_ups[param] = theano.shared(value=as_floatX(empty), name="exp_grad_%s" % param.name)
        gparams.append(gp)
    for param, gp in zip(params, gparams):
        exp_sg = exp_sqr_grads[param]
        exp_su = exp_sqr_ups[param]
        up_exp_sg = rho * exp_sg + (1 - rho) * T.sqr(gp)
        updates[exp_sg] = up_exp_sg
        step =  -(T.sqrt(exp_su + epsilon) / T.sqrt(up_exp_sg + epsilon)) * gp
        updates[exp_su] = rho * exp_su + (1 - rho) * T.sqr(step)
        stepped_param = param + step
        if (param.get_value(borrow=True).ndim == 2) and (param.name!='Words'):
            col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
            desired_norms = T.clip(col_norms, 0, T.sqrt(norm_lim))
            scale = desired_norms / (1e-7 + col_norms)
            updates[param] = stepped_param * scale
        else:
            updates[param] = stepped_param      
    return updates 
Example #19
0
    def __init__(self, rng, input, n_in, n_out, activation, W=None, b=None, use_bias=False):

        self.input = input
        self.activation = activation

        if W is None:
            if activation.func_name == "ReLU":
                W_values = numpy.asarray(0.01 * rng.standard_normal(size=(n_in, n_out)), dtype=theano.config.floatX)
            else:
                W_values = numpy.asarray(
                    rng.uniform(
                        low=-numpy.sqrt(6.0 / (n_in + n_out)), high=numpy.sqrt(6.0 / (n_in + n_out)), size=(n_in, n_out)
                    ),
                    dtype=theano.config.floatX,
                )
            W = theano.shared(value=W_values, name="W")
        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name="b")

        self.W = W
        self.b = b

        if use_bias:
            lin_output = T.dot(input, self.W) + self.b
        else:
            lin_output = T.dot(input, self.W)

        self.output = lin_output if activation is None else activation(lin_output)

        # parameters of the model
        if use_bias:
            self.params = [self.W, self.b]
        else:
            self.params = [self.W]
    def __init__(self, rng, input, filter_shape, image_shape):

        """
        The conv2 op takes two inputs, one is the input images with shape 
        (batch size, #feature maps, #rows of image, #cols or image), the 
        other is the weights (#filters, stack size, #row of filter, #col of filter)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input
        fan_in = np.prod(filter_shape[1:])
        fan_out = (
            filter_shape[0] * filter_shape[2] * filter_shape[3]
        )  # number of filtered feature maps, and the filter size

        W_bound = np.sqrt(6.0 / (fan_in + fan_out))

        self.W = theano.shared(
            np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX),
            name="W",
            borrow=True,
        )

        b_value = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(b_value, name="b", borrow=True)

        conv_out = T.nnet.conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape)
        self.output = self.relu(conv_out + self.b.dimshuffle("x", 0, "x", "x"))

        self.params = [self.W, self.b]
Example #21
0
 def __init__(self, n_hiddens=1024, W=None, c=None, b=None, K=1, epsilon=0.1, n_samples=10, epochs=20):
     """
     Initialize an RBM.
     
     Parameters
     ----------
     n_hiddens : int, optional
         Number of binary hidden units
     W : array-like, shape (n_visibles, n_hiddens), optional
         Weight matrix, where n_visibles in the number of visible
         units and n_hiddens is the number of hidden units.
     c : array-like, shape (n_hiddens,), optional
         Biases of the hidden units
     b : array-like, shape (n_visibles,), optional
         Biases of the visible units
     K : int, optional
         Number of MCMC steps to perform on the negative chain
         after each gradient step.
     epsilon : float, optional
         Learning rate to use during learning
     n_samples : int, optional
         Number of fantasy particles to use during learning
     epochs : int, optional
         Number of epochs to perform during learning
     """
     self.n_hiddens = n_hiddens
     self._W = theano.shared(numpy.array([[]], dtype=theano.config.floatX) if W == None else W)
     self._c = theano.shared(numpy.array([], dtype=theano.config.floatX) if c == None else c)
     self._b = theano.shared(numpy.array([], dtype=theano.config.floatX) if b == None else b)
     self.K = K
     self.epsilon = epsilon
     self.n_samples = n_samples
     self.epochs = epochs
     self.h_samples = theano.shared(numpy.array([[]], dtype=theano.config.floatX))
     self.rng = RandomStreams(numpy.random.randint(2 ** 30))
    def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation=T.tanh):

        # n_out defines the number of hidden units
        # each column of W defines the weights from input to each hidden node
        self.input = input
        if W is None:
            W_values = np.asarray(
                rng.uniform(
                    low=-np.sqrt((6.0 / (n_in + n_out))), high=np.sqrt(6.0 / (n_in + n_out)), size=(n_in, n_out)
                ),
                dtype=theano.config.floatX,
            )

            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name="W", borrow=True)
        if b is None:
            b_values = np.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(b_values, name="b", borrow=True)

        self.W = W
        self.b = b

        lin_output = T.dot(input, self.W) + self.b
        self.output = self.relu(lin_output) if activation is None else activation(lin_output)

        self.params = [self.W, self.b]
Example #23
0
    def build_2048_ann(self, nb, nh, nh2):
        """
        
        """
        # nb = input nodes
        # nh = first hidden layer size
        # nh2 = second hidden layer size
        print("building")
        w1 = theano.shared(np.random.uniform(low=-0.1, high=0.1, size=(nb, nh)))
        w2 = theano.shared(np.random.uniform(low=-0.1, high=0.1, size=(nh, nh2)))
        w3 = theano.shared(np.random.uniform(low=-0.1, high=0.1, size=(nh2, 4)))
        input = T.dvector("input")
        target = T.wvector("target")
        x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0)
        x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0)
        x3 = Tann.softmax(T.dot(x2, w3))
        error = T.sum(pow((target - x3), 2))
        params = [w1, w2, w3]
        gradients = T.grad(error, params)
        backprops = [(p, p - self.lrate * g) for p, g in zip(params, gradients)]

        self.trainer = theano.function(
            inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True
        )
        self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True)
        print("Built")
Example #24
0
    def __init__(self, rng, input, n_in, n_out, activation):
        """
		>>>type rng: numpy.random.RandomState
		>>>para rng: initalize weight randomly

		>>>type input: theano.tensor.TensorType
		>>>para input: input data

		>>>type n_in: int
		>>>para n_in: the num of input neurons
		
		>>>type n_out: int
		>>>para n_out: the num of output neurons

		>>>type activation: func
		>>>para activation: the activate function
		"""
        self.input = input

        w_bound = np.sqrt(6.0 / (n_in + n_out))

        w_value = np.asarray(rng.uniform(low=-w_bound, high=w_bound, size=(n_in, n_out)), dtype=theano.config.floatX)

        if activation == T.nnet.sigmoid:
            w_value *= 4
        self.w = theano.shared(value=w_value, name="w", borrow=True)

        b_value = np.zeros((n_out), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_value, name="b", borrow=True)

        raw_output = T.dot(input, self.w) + self.b

        self.output = raw_output if activation is None else activation(raw_output)

        self.param = [self.w, self.b]
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
        # initialize weights with random weights
        W_bound = numpy.sqrt(6.0 / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX),
            borrow=True,
        )

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape)

        # downsample each feature map individually, using maxpooling
        pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(pooled_out + self.b.dimshuffle("x", 0, "x", "x"))

        # store parameters of this layer
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input
Example #26
0
    def __init__(self, input, n_in, n_out):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), name="W", borrow=True)
        # initialize the baises b as a vector of n_out 0s
        self.b = theano.shared(value=numpy.zeros((n_out,), dtype=theano.config.floatX), name="b", borrow=True)

        # compute prediction
        self.y_pred = T.dot(input, self.W) + self.b

        # parameters of the model
        self.params = [self.W, self.b]
Example #27
0
    def set_weights(self, parameters, layer_number):
        """
        Set the layer's parameters when loaded from a saved model

        :param parameters: list
            A list containing the numpy.ndarrays representing the actual weights. For this
                particular layer, the size of the list is 5.

        :param layer_number: integer
            The position of the layer in the computational path. It is used to name the
                theano.shared variable.

        :return:

        """
        assert len(parameters) == 5, "Wrong number of parameters to be set to GRU layer!"

        self.layer_number = layer_number
        weights = parameters[0].value
        recs_i = parameters[1].value
        recs_z = parameters[2].value
        recs_r = parameters[3].value
        bias = parameters[4].value

        self.W = theano.shared(value=weights, name="W_%s" % self.layer_number, borrow=True, allow_downcast=True)
        self.U_i = theano.shared(value=recs_i, name="U_i_%s" % self.layer_number, borrow=True, allow_downcast=True)
        self.U_z = theano.shared(value=recs_z, name="U_z_%s" % self.layer_number, borrow=True, allow_downcast=True)
        self.U_r = theano.shared(value=recs_r, name="U_r_%s" % self.layer_number, borrow=True, allow_downcast=True)
        self.b = theano.shared(value=bias, name="b_%s" % self.layer_number, borrow=True, allow_downcast=True)
Example #28
0
    def speed(self):
        n_calls = 20000
        print "n_calls", n_calls
        for border_mode in ["valid", "full"]:
            print
            print border_mode
            for openmp in [False, True]:
                print "OpenMP", openmp
                image_shapes = [
                    (1, 5, 6, 6),
                    (10, 5, 6, 6),
                    # (10, 10, 16, 16),
                    # (10, 10, 32, 32)
                ]
                print "image_shape", image_shapes
                for image_shape in image_shapes:
                    filter_shapes = [(1, 5, 4, 4), (2, 5, 4, 4), (5, 5, 4, 4)]
                    print "filter_shapes", filter_shapes
                    for filter_shape in filter_shapes:

                        input = theano.shared(numpy.random.random(image_shape))
                        filters = theano.shared(numpy.random.random(filter_shape))

                        output = conv.conv2d(
                            input, filters, image_shape, filter_shape, border_mode, unroll_patch=True, openmp=openmp
                        )
                        mode = theano.Mode(linker=theano.gof.vm.VM_Linker(allow_gc=False, use_cloop=True))
                        theano_conv = theano.function([], output, mode=mode)
                        t1 = time.time()
                        theano_conv.fn(n_calls=n_calls)
                        t2 = time.time()
                        print t2 - t1,
                    print
Example #29
0
def load_data(dataset_file):
    """ Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    """

    #############
    # LOAD DATA #
    #############
    print "... loading data from file " + dataset_file

    # Load the dataset  - expecting both supervised and unsupervised data to be supplied (in pairs)
    f = gzip.open(dataset_file, "rb")
    x, x_sums, y = cPickle.load(f)
    f.close()

    # shared_dataset
    shared_x = theano.shared(numpy.asarray(x, dtype=theano.config.floatX))

    # build a replicated 2d array of sums so operations can be performed efficiently
    shared_x_sums = theano.shared(
        numpy.asarray(numpy.array([x_sums] * (x.shape[1])).transpose(), dtype=theano.config.floatX)
    )

    rval = [shared_x, shared_x_sums]
    return rval
Example #30
0
    def __init__(self, rng, input, n_in, n_out):
        # rng: 웨이트 임의 초기화를 위한 seed 값
        # input: 은닉층에 입력되는 데이터
        #       보통 입력 데이터나 이전 은닉층의 출력값
        # n_in: 입력 데이터의 차원
        # n_out: 출력되는 데이터의 차원, 은닉층의 노드 개수

        self.input = input

        # 웨이트 및 바이어스 초기값 정의
        W_values = numpy.asarray(
            rng.uniform(
                low=-numpy.sqrt(6.0 / (n_in + n_out)), high=numpy.sqrt(6.0 / (n_in + n_out)), size=(n_in, n_out)
            ),
            dtype=theano.config.floatX,
        )
        self.W = theano.shared(value=W_values, name="W", borrow=True)

        b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, name="b", borrow=True)

        lin_output = T.dot(input, self.W) + self.b
        self.output = T.nnet.sigmoid(lin_output)

        self.params = [self.W, self.b]