コード例 #1
0
ファイル: parameters.py プロジェクト: everwind/DNNNLP
    def __init__(self, window_size=5, vocab_size=vocabulary.wordmap.len, embedding_size=100, hidden_size=10, seed=1):
        """
        Initialize L{Model} parameters.
        """

        self.vocab_size     = vocab_size
        self.window_size    = window_size
        self.embedding_size = embedding_size
        if 1==1:
            self.hidden_size    = hidden_size
            self.output_size    = 1

        import numpy
        import hyperparameters

        from pylearn.algorithms.weights import random_weights
        #numpy.random.seed(seed)
        self.embeddings = numpy.asarray((numpy.random.rand(self.vocab_size, embedding_size) - 0.5)*2 * 1.0, dtype=floatX)
        isnormalize=1
        if isnormalize==1: self.normalize(range(self.vocab_size))
        if 1==1:
            self.hidden_weights = shared(numpy.asarray(random_weights(self.input_size, self.hidden_size, scale_by=1.0), dtype=floatX))
            self.output_weights = shared(numpy.asarray(random_weights(self.hidden_size, self.output_size, scale_by=1.0), dtype=floatX))
            self.hidden_biases = shared(numpy.asarray(numpy.zeros((self.hidden_size,)), dtype=floatX))
            self.output_biases = shared(numpy.asarray(numpy.zeros((self.output_size,)), dtype=floatX))
コード例 #2
0
 def __init__(self, input, n_in, n_out):
     self.w = shared(numpy.zeros((n_in, n_out), dtype=input.dtype))
     self.b = shared(numpy.zeros((n_out,), dtype=input.dtype))
     self.l1=abs(self.w).sum()
     self.l2_sqr = (self.w**2).sum()
     self.output=nnet.softmax(theano.dot(input, self.w)+self.b)
     self.argmax=theano.tensor.argmax(self.output, axis=1)
     self.params = [self.w, self.b]
コード例 #3
0
 def __init__(self, input, n_in, n_out):
     self.w = shared(numpy.zeros((n_in, n_out), dtype=input.dtype))
     self.b = shared(numpy.zeros((n_out, ), dtype=input.dtype))
     self.l1 = abs(self.w).sum()
     self.l2_sqr = (self.w**2).sum()
     self.output = nnet.softmax(theano.dot(input, self.w) + self.b)
     self.argmax = theano.tensor.argmax(self.output, axis=1)
     self.params = [self.w, self.b]
コード例 #4
0
    def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_shape=(5,5),
            poolsize=(2,2)):
        """
        Allocate a LeNetConvPool layer with shared variable internal parameters.

        :param rng: a random number generator used to initialize weights
        
        :param input: symbolic images.  Shape: (n_examples, n_imgs, img_shape[0], img_shape[1])

        :param n_examples: input's shape[0] at runtime

        :param n_imgs: input's shape[1] at runtime

        :param img_shape: input's shape[2:4] at runtime

        :param n_filters: the number of filters to apply to the image.

        :param filter_shape: the size of the filters to apply
        :type filter_shape: pair (rows, cols)

        :param poolsize: the downsampling (pooling) factor
        :type poolsize: pair (rows, cols)
        """

        #TODO: make a simpler convolution constructor!!
        #    - make dx and dy optional
        #    - why do we have to pass shapes? (Can we make them optional at least?)
        conv_op = ConvOp((n_imgs,)+img_shape, filter_shape, n_filters, n_examples,
                dx=1, dy=1, output_mode='valid')

        # - why is poolsize an op parameter here?
        # - can we just have a maxpool function that creates this Op internally?
        ds_op = DownsampleFactorMax(poolsize, ignore_border=True)

        # the filter tensor that we will apply is a 4D tensor
        w_shp = (n_filters, n_imgs) + filter_shape

        # the bias we add is a 1D tensor
        b_shp = (n_filters,)

        self.w = shared(
                numpy.asarray(
                    rng.uniform(
                        low=-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs), 
                        high=1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
                        size=w_shp), 
                    dtype=input.dtype))
        self.b = shared(
                numpy.asarray(
                    rng.uniform(low=-.0, high=0., size=(n_filters,)),
                    dtype=input.dtype))

        self.input = input
        conv_out = conv_op(input, self.w)
        self.output = tensor.tanh(ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x'))
        self.params = [self.w, self.b]
コード例 #5
0
 def __init__(self, input, n_in, n_out):
     """
     :param input: a symbolic tensor of shape (n_examples, n_in)
     :param w: a symbolic weight matrix of shape (n_in, n_out)
     :param b: symbolic bias terms of shape (n_out,)
     :param squash: an squashing function
     """
     self.input = input
     self.w = shared(
             numpy.asarray(
                 rng.uniform(low=-2/numpy.sqrt(n_in), high=2/numpy.sqrt(n_in),
                 size=(n_in, n_out)), dtype=input.dtype))
     self.b = shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype))
     self.output = tensor.tanh(tensor.dot(input, self.w) + self.b)
     self.params = [self.w, self.b]
コード例 #6
0
 def __init__(self, input, n_in, n_out):
     """
     :param input: a symbolic tensor of shape (n_examples, n_in)
     :param w: a symbolic weight matrix of shape (n_in, n_out)
     :param b: symbolic bias terms of shape (n_out,)
     :param squash: an squashing function
     """
     self.input = input
     self.w = shared(
         numpy.asarray(rng.uniform(low=-2 / numpy.sqrt(n_in),
                                   high=2 / numpy.sqrt(n_in),
                                   size=(n_in, n_out)),
                       dtype=input.dtype))
     self.b = shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype))
     self.output = tensor.tanh(tensor.dot(input, self.w) + self.b)
     self.params = [self.w, self.b]
コード例 #7
0
ファイル: parameters.py プロジェクト: everwind/DNNNLP
    def __init__(self,
                 window_size=5,
                 vocab_size=vocabulary.wordmap.len,
                 embedding_size=100,
                 hidden_size=10,
                 seed=1):
        """
        Initialize L{Model} parameters.
        """

        self.vocab_size = vocab_size
        self.window_size = window_size
        self.embedding_size = embedding_size
        if 1 == 1:
            self.hidden_size = hidden_size
            self.output_size = 1

        import numpy
        import hyperparameters

        from pylearn.algorithms.weights import random_weights
        #numpy.random.seed(seed)
        self.embeddings = numpy.asarray(
            (numpy.random.rand(self.vocab_size, embedding_size) - 0.5) * 2 *
            1.0,
            dtype=floatX)
        isnormalize = 1
        if isnormalize == 1: self.normalize(range(self.vocab_size))
        if 1 == 1:
            self.hidden_weights = shared(
                numpy.asarray(random_weights(self.input_size,
                                             self.hidden_size,
                                             scale_by=1.0),
                              dtype=floatX))
            self.output_weights = shared(
                numpy.asarray(random_weights(self.hidden_size,
                                             self.output_size,
                                             scale_by=1.0),
                              dtype=floatX))
            self.hidden_biases = shared(
                numpy.asarray(numpy.zeros((self.hidden_size, )), dtype=floatX))
            self.output_biases = shared(
                numpy.asarray(numpy.zeros((self.output_size, )), dtype=floatX))
コード例 #8
0
    def __init__(self, window_size, vocab_size, embedding_size, hidden_size, seed, initial_embeddings, two_hidden_layers):
        """
        Initialize L{Model} parameters.
        """

        self.vocab_size     = vocab_size
        self.window_size    = window_size
        self.embedding_size = embedding_size
        self.two_hidden_layers = two_hidden_layers
        if LBL:
            self.hidden_size    = hidden_size
            self.output_size    = self.embedding_size
        else:
            self.hidden_size    = hidden_size
            self.output_size    = 1

        import numpy
        import hyperparameters

        from pylearn.algorithms.weights import random_weights
        numpy.random.seed(seed)
        if initial_embeddings is None:
            self.embeddings = numpy.asarray((numpy.random.rand(self.vocab_size, HYPERPARAMETERS["EMBEDDING_SIZE"]) - 0.5)*2 * HYPERPARAMETERS["INITIAL_EMBEDDING_RANGE"], dtype=floatX)
        else:
            assert initial_embeddings.shape == (self.vocab_size, HYPERPARAMETERS["EMBEDDING_SIZE"])
            self.embeddings = copy.copy(initial_embeddings)
        if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]: self.normalize(range(self.vocab_size))
        if LBL:
            self.output_weights = shared(numpy.asarray(random_weights(self.input_size, self.output_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX))
            self.output_biases = shared(numpy.asarray(numpy.zeros((1, self.output_size)), dtype=floatX))
            self.score_biases = shared(numpy.asarray(numpy.zeros(self.vocab_size), dtype=floatX))
            assert not self.two_hidden_layers
        else:
            self.hidden_weights = shared(numpy.asarray(random_weights(self.input_size, self.hidden_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX))
            self.hidden_biases = shared(numpy.asarray(numpy.zeros((self.hidden_size,)), dtype=floatX))
            if self.two_hidden_layers:
                self.hidden2_weights = shared(numpy.asarray(random_weights(self.hidden_size, self.hidden_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX))
                self.hidden2_biases = shared(numpy.asarray(numpy.zeros((self.hidden_size,)), dtype=floatX))
            self.output_weights = shared(numpy.asarray(random_weights(self.hidden_size, self.output_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX))
            self.output_biases = shared(numpy.asarray(numpy.zeros((self.output_size,)), dtype=floatX))
コード例 #9
0
    def __init__(self, window_size, vocab_size, embedding_size, hidden_size,
                 seed, initial_embeddings, two_hidden_layers):
        """
        Initialize L{Model} parameters.
        """

        self.vocab_size = vocab_size
        self.window_size = window_size
        self.embedding_size = embedding_size
        self.two_hidden_layers = two_hidden_layers
        if LBL:
            self.hidden_size = hidden_size
            self.output_size = self.embedding_size
        else:
            self.hidden_size = hidden_size
            self.output_size = 1

        import numpy
        import hyperparameters

        from pylearn.algorithms.weights import random_weights
        numpy.random.seed(seed)
        if initial_embeddings is None:
            self.embeddings = numpy.asarray(
                (numpy.random.rand(self.vocab_size,
                                   HYPERPARAMETERS["EMBEDDING_SIZE"]) - 0.5) *
                2 * HYPERPARAMETERS["INITIAL_EMBEDDING_RANGE"],
                dtype=floatX)
        else:
            assert initial_embeddings.shape == (
                self.vocab_size, HYPERPARAMETERS["EMBEDDING_SIZE"])
            self.embeddings = copy.copy(initial_embeddings)
        if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
            self.normalize(range(self.vocab_size))
        if LBL:
            self.output_weights = shared(
                numpy.asarray(random_weights(
                    self.input_size,
                    self.output_size,
                    scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]),
                              dtype=floatX))
            self.output_biases = shared(
                numpy.asarray(numpy.zeros((1, self.output_size)),
                              dtype=floatX))
            self.score_biases = shared(
                numpy.asarray(numpy.zeros(self.vocab_size), dtype=floatX))
            assert not self.two_hidden_layers
        else:
            self.hidden_weights = shared(
                numpy.asarray(random_weights(
                    self.input_size,
                    self.hidden_size,
                    scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]),
                              dtype=floatX))
            self.hidden_biases = shared(
                numpy.asarray(numpy.zeros((self.hidden_size, )), dtype=floatX))
            if self.two_hidden_layers:
                self.hidden2_weights = shared(
                    numpy.asarray(random_weights(
                        self.hidden_size,
                        self.hidden_size,
                        scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]),
                                  dtype=floatX))
                self.hidden2_biases = shared(
                    numpy.asarray(numpy.zeros((self.hidden_size, )),
                                  dtype=floatX))
            self.output_weights = shared(
                numpy.asarray(random_weights(
                    self.hidden_size,
                    self.output_size,
                    scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]),
                              dtype=floatX))
            self.output_biases = shared(
                numpy.asarray(numpy.zeros((self.output_size, )), dtype=floatX))
コード例 #10
0
    def __init__(self, n_visible=784, n_hidden=500, lr=1e-1, input=None):
        """
    Initialize the DAE class by specifying the number of visible units (the 
    dimension d of the input ), the number of hidden units ( the dimension 
    d' of the latent or hidden space ), a initial value for the learning rate
    and by giving a symbolic description of the input. Such a symbolic 
    description is of no importance for the simple DAE and therefore can be 
    ignored. This feature is useful when stacking DAEs, since the input of 
    intermediate layers can be symbolically described in terms of the hidden
    units of the previous layer. See the tutorial on SDAE for more details.
    
    :param n_visible: number of visible units
    :param n_hidden:  number of hidden units
    :param lr:        a initial value for the learning rate
    :param input:     a symbolic description of the input or None 
    """
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        # create a Theano random generator that gives symbolic random values
        theano_rng = RandomStreams(seed=1234)
        # create a numpy random generator
        numpy_rng = numpy.random.RandomState(seed=52432)

        # initial values for weights and biases
        # note : W' was written as W_prime and b' as b_prime
        initial_W = numpy_rng.uniform(size=(n_visible, n_hidden))
        # transform W such that all values are between -.01 and .01
        initial_W = (initial_W * 2.0 - 1.0) * .01
        initial_b = numpy.zeros(n_hidden)
        initial_W_prime = numpy_rng.uniform(size=(n_hidden, n_visible))
        # transform W_prime such that all values are between -.01 and .01
        initial_W_prime = (initial_W_prime * 2.0 - 1.0) * .01
        initial_b_prime = numpy.zeros(n_visible)

        # theano shared variables for weights and biases
        self.W = shared(value=initial_W, name="W")
        self.b = shared(value=initial_b, name="b")
        self.W_prime = shared(value=initial_W_prime, name="W'")
        self.b_prime = shared(value=initial_b_prime, name="b'")

        # theano shared variable for the learning rate
        self.lr = shared(value=lr, name="learning_rate")

        # if no input is given generate a variable representing the input
        if input == None:
            # we use a matrix because we expect a minibatch of several examples,
            # each example being a row
            x = tensor.dmatrix(name='input')
        else:
            x = input
        # Equation (1)
        # note : first argument of theano.rng.binomial is the shape(size) of
        #        random numbers that it should produce
        #        second argument is the number of trials
        #        third argument is the probability of success of any trial
        #
        #        this will produce an array of 0s and 1s where 1 has a
        #        probability of 0.9 and 0 if 0.1
        tilde_x = theano_rng.binomial(x.shape, 1, 0.9) * x
        # Equation (2)
        # note  : y is stored as an attribute of the class so that it can be
        #         used later when stacking DAEs.
        self.y = nnet.sigmoid(tensor.dot(tilde_x, self.W) + self.b)
        # Equation (3)
        z = nnet.sigmoid(tensor.dot(self.y, self.W_prime) + self.b_prime)
        # Equation (4)
        L = -tensor.sum(x * tensor.log(z) + (1 - x) * tensor.log(1 - z),
                        axis=1)
        # note : L is now a vector, where each element is the cross-entropy cost
        #        of the reconstruction of the corresponding example of the
        #        minibatch. We need to sum all these to get the cost of the
        #        minibatch
        cost = tensor.sum(L)
        # parameters with respect to whom we need to compute the gradient
        self.params = [self.W, self.b, self.W_prime, self.b_prime]
        # use theano automatic differentiation to get the gradients
        gW, gb, gW_prime, gb_prime = tensor.grad(cost, self.params)
        # update the parameters in the direction of the gradient using the
        # learning rate
        updated_W = self.W - gW * self.lr
        updated_b = self.b - gb * self.lr
        updated_W_prime = self.W_prime - gW_prime * self.lr
        updated_b_prime = self.b_prime - gb_prime * self.lr

        # defining the function that evaluate the symbolic description of
        # one update step
        self.update = pfunc(params=[x],
                            outputs=cost,
                            updates={
                                self.W: updated_W,
                                self.b: updated_b,
                                self.W_prime: updated_W_prime,
                                self.b_prime: updated_b_prime
                            })
        self.get_cost = pfunc(params=[x], outputs=cost)
コード例 #11
0
ファイル: dae.py プロジェクト: azizur77/DeepLearningTutorials
  def __init__(self, n_visible= 784, n_hidden= 500, lr= 1e-1, input= None):
    """
    Initialize the DAE class by specifying the number of visible units (the 
    dimension d of the input ), the number of hidden units ( the dimension 
    d' of the latent or hidden space ), a initial value for the learning rate
    and by giving a symbolic description of the input. Such a symbolic 
    description is of no importance for the simple DAE and therefore can be 
    ignored. This feature is useful when stacking DAEs, since the input of 
    intermediate layers can be symbolically described in terms of the hidden
    units of the previous layer. See the tutorial on SDAE for more details.
    
    :param n_visible: number of visible units
    :param n_hidden:  number of hidden units
    :param lr:        a initial value for the learning rate
    :param input:     a symbolic description of the input or None 
    """
    self.n_visible = n_visible
    self.n_hidden  = n_hidden
    
    # create a Theano random generator that gives symbolic random values
    theano_rng = RandomStreams( seed = 1234 )
    # create a numpy random generator
    numpy_rng = numpy.random.RandomState( seed = 52432 )
    
     
    # initial values for weights and biases
    # note : W' was written as W_prime and b' as b_prime
    initial_W       = numpy_rng.uniform(size = (n_visible, n_hidden))
    # transform W such that all values are between -.01 and .01
    initial_W       = (initial_W*2.0       - 1.0)*.01 
    initial_b       = numpy.zeros(n_hidden)
    initial_W_prime = numpy_rng.uniform(size = (n_hidden, n_visible))
    # transform W_prime such that all values are between -.01 and .01
    initial_W_prime = (initial_W_prime*2.0 - 1.0)*.01 
    initial_b_prime= numpy.zeros(n_visible)
     
    
    # theano shared variables for weights and biases
    self.W       = shared(value = initial_W      , name = "W")
    self.b       = shared(value = initial_b      , name = "b")
    self.W_prime = shared(value = initial_W_prime, name = "W'") 
    self.b_prime = shared(value = initial_b_prime, name = "b'")

    # theano shared variable for the learning rate 
    self.lr      = shared(value = lr             , name = "learning_rate")
      
    # if no input is given generate a variable representing the input
    if input == None : 
        # we use a matrix because we expect a minibatch of several examples,
        # each example being a row
        x = tensor.dmatrix(name = 'input') 
    else:
        x = input
    # Equation (1)
    # note : first argument of theano.rng.binomial is the shape(size) of 
    #        random numbers that it should produce
    #        second argument is the number of trials 
    #        third argument is the probability of success of any trial
    #
    #        this will produce an array of 0s and 1s where 1 has a 
    #        probability of 0.9 and 0 if 0.1
    tilde_x  = theano_rng.binomial( x.shape,  1,  0.9) * x
    # Equation (2)
    # note  : y is stored as an attribute of the class so that it can be 
    #         used later when stacking DAEs. 
    self.y   = nnet.sigmoid(tensor.dot(tilde_x, self.W      ) + self.b)
    # Equation (3)
    z        = nnet.sigmoid(tensor.dot(self.y,  self.W_prime) + self.b_prime)
    # Equation (4)
    L = - tensor.sum( x*tensor.log(z) + (1-x)*tensor.log(1-z), axis=1 ) 
    # note : L is now a vector, where each element is the cross-entropy cost 
    #        of the reconstruction of the corresponding example of the 
    #        minibatch. We need to sum all these to get the cost of the
    #        minibatch
    cost = tensor.sum(L)
    # parameters with respect to whom we need to compute the gradient
    self.params = [ self.W, self.b, self.W_prime, self.b_prime]
    # use theano automatic differentiation to get the gradients
    gW, gb, gW_prime, gb_prime = tensor.grad(cost, self.params)
    # update the parameters in the direction of the gradient using the 
    # learning rate
    updated_W       = self.W       - gW       * self.lr
    updated_b       = self.b       - gb       * self.lr
    updated_W_prime = self.W_prime - gW_prime * self.lr
    updated_b_prime = self.b_prime - gb_prime * self.lr

    # defining the function that evaluate the symbolic description of 
    # one update step 
    self.update  = pfunc(params = [x], outputs = cost, updates = 
                                { self.W       : updated_W, 
                                  self.b       : updated_b,
                                  self.W_prime : updated_W_prime,
                                  self.b_prime : updated_b_prime } )
    self.get_cost = pfunc(params = [x], outputs = cost)
コード例 #12
0
    def __init__(self,
                 rng,
                 input,
                 n_examples,
                 n_imgs,
                 img_shape,
                 n_filters,
                 filter_shape=(5, 5),
                 poolsize=(2, 2)):
        """
        Allocate a LeNetConvPool layer with shared variable internal parameters.

        :param rng: a random number generator used to initialize weights
        
        :param input: symbolic images.  Shape: (n_examples, n_imgs, img_shape[0], img_shape[1])

        :param n_examples: input's shape[0] at runtime

        :param n_imgs: input's shape[1] at runtime

        :param img_shape: input's shape[2:4] at runtime

        :param n_filters: the number of filters to apply to the image.

        :param filter_shape: the size of the filters to apply
        :type filter_shape: pair (rows, cols)

        :param poolsize: the downsampling (pooling) factor
        :type poolsize: pair (rows, cols)
        """

        #TODO: make a simpler convolution constructor!!
        #    - make dx and dy optional
        #    - why do we have to pass shapes? (Can we make them optional at least?)
        conv_op = ConvOp((n_imgs, ) + img_shape,
                         filter_shape,
                         n_filters,
                         n_examples,
                         dx=1,
                         dy=1,
                         output_mode='valid')

        # - why is poolsize an op parameter here?
        # - can we just have a maxpool function that creates this Op internally?
        ds_op = DownsampleFactorMax(poolsize, ignore_border=True)

        # the filter tensor that we will apply is a 4D tensor
        w_shp = (n_filters, n_imgs) + filter_shape

        # the bias we add is a 1D tensor
        b_shp = (n_filters, )

        self.w = shared(
            numpy.asarray(rng.uniform(
                low=-1.0 /
                numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
                high=1.0 /
                numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
                size=w_shp),
                          dtype=input.dtype))
        self.b = shared(
            numpy.asarray(rng.uniform(low=-.0, high=0., size=(n_filters, )),
                          dtype=input.dtype))

        self.input = input
        conv_out = conv_op(input, self.w)
        self.output = tensor.tanh(
            ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x'))
        self.params = [self.w, self.b]