def __init__(self, rng, input, dropout_rate, n_in, n_out, W=None, b=None): super(DropoutHiddenLayer, self).__init__(rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b) self.output = dropout_from_layer(rng, self.output, p=dropout_rate)
def __init__(self, rng, input, dropout_rate, n_in, n_out, W=None, b=None): super(DropoutHiddenLayer, self).__init__( rng=rng, input=input, n_in = n_in, n_out = n_out, W=W, b=b) self.output = dropout_from_layer(rng, self.output, p=dropout_rate)
def __init__(self, rng, input, refInput, E, W1,B1, W2, refE, refW1, refB1, refW2, mapping, drop_out_rate = 0.5, drop_out_embedding_rate = 0.2): """Initialize the parameters for the multilayer perceptron :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type E: theano.tensor.TensorType :param E: the embedding file """ print (" Configuration Source ") n_in1 = W1.get_value().shape[1] n_out1 = W1.get_value().shape[0] n_in2 = W2.get_value().shape[1] n_out2 = W2.get_value().shape[0] print ('W1 size ' + str(n_in1) + ' * ' + str(n_out1)) print ('W2 size ' + str(n_in2) + ' * ' + str(n_out2)) print (" ") print (" Configuration Reference ") ref_n_in1 = refW1.get_value().shape[1] ref_n_out1 = refW1.get_value().shape[0] ref_n_in2 = refW2.get_value().shape[1] ref_n_out2 = refW2.get_value().shape[0] print ('W1 size ' + str(ref_n_in1) + ' * ' + str(ref_n_out1)) print ('W2 size ' + str(ref_n_in2) + ' * ' + str(ref_n_out2)) print (" Size of mapping matrix : " ) print mapping.get_value().shape # Creat the matrix of W1 n_in1 * n_out1 (2400 * 200) ########################## # ORIGINAL MODEL ####### self.embeddingLayer = EmbedLayer(input, E) dropout_embedding = dropout_from_layer(rng, self.embeddingLayer.output, p=drop_out_embedding_rate) self.dropout_HiddenLayer = DropoutHiddenLayer(rng, input = dropout_embedding, dropout_rate = drop_out_rate, n_in = n_in1, n_out = n_out1) self.hiddenLayer = HiddenLayer(rng, input=self.embeddingLayer.output, n_in = n_in1, n_out = n_out1, W=self.dropout_HiddenLayer.W * (1- drop_out_embedding_rate), # Currently don't dropout the Embedding layer b=self.dropout_HiddenLayer.b) self.dropout_LogRegressionLayer = SoftMaxLayer(rng, input=self.dropout_HiddenLayer.output, n_in = n_in2, n_out = n_out2, W=None ) self.logRegressionLayer = SoftMaxLayer(rng, input=self.hiddenLayer.output, n_in = n_in2, n_out = n_out2, W= self.dropout_LogRegressionLayer.W * (1- drop_out_rate) ) # ##REFERENCE MODEL #################### ## DON"T NEED DROPOUT FOR TARGET LANGUAGE #### self.refEmbeddingLayer = EmbedLayer(refInput, refE) ref_dropout_embedding = dropout_from_layer(rng, self.refEmbeddingLayer.output, p=drop_out_embedding_rate) self.ref_dropout_HiddenLayer = DropoutHiddenLayer(rng, input=ref_dropout_embedding, dropout_rate = drop_out_rate, n_in = ref_n_in1, n_out = ref_n_out1) self.ref_dropout_LogRegressionLayer = SoftMaxLayer(rng, input=self.ref_dropout_HiddenLayer.output, n_in = ref_n_in2, n_out = ref_n_out2, ) # Normal regularlization self.L2_sqr = ((self.embeddingLayer.E ** 2).sum() + (self.refEmbeddingLayer.E ** 2).sum()) # Tieing together (remember to transform the matrix self.reg_L2_sqr = ( ((self.dropout_HiddenLayer.W - self.ref_dropout_HiddenLayer.W) ** 2).sum() + ((self.dropout_HiddenLayer.b - self.ref_dropout_HiddenLayer.b) ** 2).sum() + ((self.dropout_LogRegressionLayer.W - T.dot(self.ref_dropout_LogRegressionLayer.W,mapping) ) ** 2).sum() ) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.dropout_LogRegressionLayer.negative_log_likelihood ) self.refNegative_log_likelihood = ( self.ref_dropout_LogRegressionLayer.negative_log_likelihood ) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors self.params = (self.embeddingLayer.params + self.dropout_HiddenLayer.params + self.dropout_LogRegressionLayer.params + self.refEmbeddingLayer.params + self.ref_dropout_HiddenLayer.params + self.ref_dropout_LogRegressionLayer.params) # Initialize the params to hold the accumulate gradient of each params self._accugrads = [build_shared_zeros(t.shape.eval(),'accugrad') for t in self.params]
def __init__(self, rng, input, refInput, E, W1, B1, W2, refE, refW1, refB1, refW2, mapping, drop_out_rate=0.5, drop_out_embedding_rate=0.2): """Initialize the parameters for the multilayer perceptron :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type E: theano.tensor.TensorType :param E: the embedding file """ print(" Configuration Source ") n_in1 = W1.get_value().shape[1] n_out1 = W1.get_value().shape[0] n_in2 = W2.get_value().shape[1] n_out2 = W2.get_value().shape[0] print('W1 size ' + str(n_in1) + ' * ' + str(n_out1)) print('W2 size ' + str(n_in2) + ' * ' + str(n_out2)) print(" ") print(" Configuration Reference ") ref_n_in1 = refW1.get_value().shape[1] ref_n_out1 = refW1.get_value().shape[0] ref_n_in2 = refW2.get_value().shape[1] ref_n_out2 = refW2.get_value().shape[0] print('W1 size ' + str(ref_n_in1) + ' * ' + str(ref_n_out1)) print('W2 size ' + str(ref_n_in2) + ' * ' + str(ref_n_out2)) print(" Size of mapping matrix : ") print mapping.get_value().shape # Creat the matrix of W1 n_in1 * n_out1 (2400 * 200) ########################## # ORIGINAL MODEL ####### self.embeddingLayer = EmbedLayer(input, E) dropout_embedding = dropout_from_layer(rng, self.embeddingLayer.output, p=drop_out_embedding_rate) self.dropout_HiddenLayer = DropoutHiddenLayer( rng, input=dropout_embedding, dropout_rate=drop_out_rate, n_in=n_in1, n_out=n_out1) self.hiddenLayer = HiddenLayer( rng, input=self.embeddingLayer.output, n_in=n_in1, n_out=n_out1, W=self.dropout_HiddenLayer.W * (1 - drop_out_embedding_rate ), # Currently don't dropout the Embedding layer b=self.dropout_HiddenLayer.b) self.dropout_LogRegressionLayer = SoftMaxLayer( rng, input=self.dropout_HiddenLayer.output, n_in=n_in2, n_out=n_out2, W=None) self.logRegressionLayer = SoftMaxLayer( rng, input=self.hiddenLayer.output, n_in=n_in2, n_out=n_out2, W=self.dropout_LogRegressionLayer.W * (1 - drop_out_rate)) # ##REFERENCE MODEL #################### ## DON"T NEED DROPOUT FOR TARGET LANGUAGE #### self.refEmbeddingLayer = EmbedLayer(refInput, refE) ref_dropout_embedding = dropout_from_layer( rng, self.refEmbeddingLayer.output, p=drop_out_embedding_rate) self.ref_dropout_HiddenLayer = DropoutHiddenLayer( rng, input=ref_dropout_embedding, dropout_rate=drop_out_rate, n_in=ref_n_in1, n_out=ref_n_out1) self.ref_dropout_LogRegressionLayer = SoftMaxLayer( rng, input=self.ref_dropout_HiddenLayer.output, n_in=ref_n_in2, n_out=ref_n_out2, ) # Normal regularlization self.L2_sqr = ((self.embeddingLayer.E**2).sum() + (self.refEmbeddingLayer.E**2).sum()) # Tieing together (remember to transform the matrix self.reg_L2_sqr = ( ((self.dropout_HiddenLayer.W - self.ref_dropout_HiddenLayer.W)** 2).sum() + ((self.dropout_HiddenLayer.b - self.ref_dropout_HiddenLayer.b)**2).sum() + ((self.dropout_LogRegressionLayer.W - T.dot(self.ref_dropout_LogRegressionLayer.W, mapping))**2).sum()) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.dropout_LogRegressionLayer.negative_log_likelihood) self.refNegative_log_likelihood = ( self.ref_dropout_LogRegressionLayer.negative_log_likelihood) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors self.params = (self.embeddingLayer.params + self.dropout_HiddenLayer.params + self.dropout_LogRegressionLayer.params + self.refEmbeddingLayer.params + self.ref_dropout_HiddenLayer.params + self.ref_dropout_LogRegressionLayer.params) # Initialize the params to hold the accumulate gradient of each params self._accugrads = [ build_shared_zeros(t.shape.eval(), 'accugrad') for t in self.params ]
def dropout(self): self.input_vectors = utils.dropout_from_layer(self.rng, self.input, self.dropout_rate)