コード例 #1
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 activation,
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.out_dim = hidden_dim
        self.act = Activation(activation)
        self.dropout = dropout
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W', initializer)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b')
        self.params = [self.W, self.b]
        self.norm_params = [self.W]
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
コード例 #2
0
ファイル: convnet.py プロジェクト: Sameer-Arora/ML-IS-FUN
    def __init__(self,
                 filters,
                 kernel_size,
                 stride=1,
                 padding='same',
                 activation=None):
        """
		Params:
		filters: Number of Filters
		kernel_size: shape of the kernel
		stride: the stride
		padding: valid or same
		activation: activation function
		"""
        self.filters = filters

        num_weights = kernel_size[0] * kernel_size[1]
        self.kernel_size = kernel_size
        self.weights = None
        self.bias = None

        self.padding = (kernel_size[0] - 1) // 2 if padding == 'same' else 0
        self.stride = stride
        self.output_units = []

        self.activation = Activation(activation)
コード例 #3
0
    def __init__(self, in_dim, activation, hidden_dim=None, transform_gate="sigmoid", prefix="",
                 initializer=default_initializer, dropout=0, verbose=True):
        # By construction the dimensions of in_dim and out_dim have to match, and hence W_T and W_H are square matrices.
        if hidden_dim is not None:
            assert in_dim == hidden_dim
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(HighwayLayer, self).__init__(in_dim, in_dim, activation, prefix, initializer, dropout, verbose)
        self.transform_gate = Activation(transform_gate)
        self.W_H, self.W_H.name = self.W, prefix + "W_H"
        self.b_H, self.b_H.name = self.b, prefix + "b_H"
        self.W_T = shared_rand_matrix((self.hidden_dim, self.in_dim), prefix + 'W_T', initializer)
        self.b_T = shared_zero_matrix((self.hidden_dim,), prefix + 'b_T')
        self.params = [self.W_H, self.W_T, self.b_H, self.b_T]
        self.norm_params = [self.W_H, self.W_T]
        self.l1_norm = T.sum([T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param ** 2) for param in self.norm_params])

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Transform Gate:   %s' % self.transform_gate.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
コード例 #4
0
    def __init__(self,
                 entity_dim,
                 relation_num,
                 activation='iden',
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(TransEModel, self).__init__()
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        # (relation_num, entity_dim, entity_dim)
        self.W = shared_rand_matrix((relation_num, self.entity_dim),
                                    prefix + 'TransE_R', initializer)
        self.act = Activation(activation)
        self.params = [self.W]
        self.norm_params = [self.W]
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W**2)

        if verbose:
            logger.debug(
                'Architecture of TransE Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)
コード例 #5
0
    def __init__(self, configs=None, verbose=True):
        '''
		Basic RNN is an unsupervised component, where the input is a sequence and the 
		output is a vector with fixed length
		'''
        if verbose: pprint('Build Recurrent Neural Network...')
        self.input = T.matrix(name='input', dtype=floatX)
        self.learn_rate = T.scalar(name='learn rate')
        # Configure activation function
        self.act = Activation(configs.activation)
        fan_in = configs.num_input
        fan_out = configs.num_hidden
        # Initialize all the variables in RNN, including:
        # 1, Feed-forward matrix, feed-forward bias, W, W_b
        # 2, Recurrent matrix, recurrent bias, U, U_b
        self.W = theano.shared(value=np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (fan_in + fan_out)),
            high=np.sqrt(6.0 / (fan_in + fan_out)),
            size=(fan_in, fan_out)),
                                                dtype=floatX),
                               name='W',
                               borrow=True)
        self.U = theano.shared(value=np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (fan_out + fan_out)),
            high=np.sqrt(6.0 / (fan_out + fan_out)),
            size=(fan_out, fan_out)),
                                                dtype=floatX),
                               name='U',
                               borrow=True)
        # Bias parameter for the hidden-layer encoder of RNN
        self.b = theano.shared(value=np.zeros(fan_out, dtype=floatX),
                               name='b',
                               borrow=True)
        # h[0], zero vector
        self.h0 = theano.shared(value=np.zeros(fan_out, dtype=floatX),
                                name='h0',
                                borrow=True)
        # Save all the parameters
        self.params = [self.W, self.U, self.b, self.h0]

        # recurrent function used to compress a sequence of input vectors
        # the first dimension should correspond to time
        def step(x_t, h_tm1):
            h_t = self.act.activate(T.dot(x_t, self.W) + \
                  T.dot(h_tm1, self.U) + self.b)
            return h_t

        # h is the hidden representation over a time sequence
        self.hs, _ = theano.scan(fn=step,
                                 sequences=self.input,
                                 outputs_info=[self.h0],
                                 truncate_gradient=configs.bptt)
        self.h = self.hs[-1]
        # L1, L2 regularization
        self.L1_norm = T.sum(T.abs_(self.W) + T.abs_(self.U))
        self.L2_norm = T.sum(self.W**2) + T.sum(self.U**2)
        # Compress function
        self.compress = theano.function(inputs=[self.input], outputs=self.h)
コード例 #6
0
ファイル: main.py プロジェクト: xiaozhuyfk/NeuralNet
def train(dataset):
    config_options = globals.config
    task_path = config_options.get("Data", dataset)
    loss = config_options.get('Train', 'loss')
    activation = config_options.get('Train', 'activation')

    if dataset == "classify":
        Xtrain = z_norm(load_mnist_X(task_path + "classf_Xtrain.txt"))
        Xtest = z_norm(load_mnist_X(task_path + "classf_Xtest.txt"))
        Xval = z_norm(load_mnist_X(task_path + "classf_XVal.txt"))
        ytrain = load_mnist_Y(task_path + "classf_ytrain.txt")
        ytest = load_mnist_Y(task_path + "classf_ytest.txt")
        yval = load_mnist_Y(task_path + "classf_yVal.txt")
    elif dataset == "regression":
        Xtrain = z_norm(load_regression_X(task_path + "regr_Xtrain.txt"))
        Xtest = z_norm(load_regression_X(task_path + "regr_Xtest.txt"))
        Xval = z_norm(load_regression_X(task_path + "regr_Xval.txt"))
        ytrain = load_regression_Y(task_path + "regr_ytrain.txt")
        ytest = load_regression_Y(task_path + "regr_ytest.txt")
        yval = load_regression_Y(task_path + "regr_yval.txt")
    else:
        logger.warning("Invalid task.")
        return
    logger.info("Load data complete.")

    # build model
    N, input_dim = Xtrain.shape

    model = Model()
    model.add(Layer(output_dim=globals.layer_dim, input_dim=input_dim))
    model.add(Activation(activation=activation))
    model.add(Layer(output_dim=globals.output_dim))

    model.compile(loss=loss)
    history = model.fit(Xtrain,
                        ytrain,
                        batch_size=N,
                        iterations=globals.iterations,
                        validation_data=(Xval, yval))

    # save result
    result_dir = config_options.get('Result', 'result-dir')
    file_name = "_".join([
        dataset, activation,
        str(globals.alpha),
        str(globals.lam),
        str(globals.layer_dim),
        str(globals.iterations)
    ]) + ".txt"
    file_path = result_dir + file_name
    writeFile(file_path, "")
    for datum in history:
        datum = [str(x) for x in datum]
        line = "\t".join(datum) + "\n"
        writeFile(file_path, line, 'a')

    print model.loss.mse(Xval, yval)
    print model.loss.mse(Xtest, ytest)
コード例 #7
0
    def __init__(self, in_dim, hidden_dim, kernel_sizes=[3, 4, 5], padding='same', pooling='max', dilation_rate=1.0,
                 activation='relu', prefix="", initializer=GlorotUniformInitializer(), dropout=0.0, verbose=True):
        """
        Init Function for ConvolutionLayer
        :param in_dim:
        :param hidden_dim:
        :param kernel_sizes:
        :param padding: 'same', 'valid'
        :param pooling: 'max', 'mean', 'min'
        :param dilation_rate:
        :param activation:
        :param prefix:
        :param initializer:
        :param dropout:
        :param verbose:
        """
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        self.conv_layers = list()
        self.in_dim = in_dim
        self.out_dim = hidden_dim * len(kernel_sizes)
        self.hidden_dim = hidden_dim
        self.kernel_sizes = kernel_sizes
        self.padding = padding
        self.dilation_rate = dilation_rate
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)

        self.params = list()
        self.norm_params = list()

        # L1, L2 Norm
        self.l1_norm = 0
        self.l2_norm = 0

        for filter_hs in kernel_sizes:
            self.conv_layers.append(ConvolutionLayer(in_dim=self.in_dim, hidden_dim=hidden_dim, kernel_size=filter_hs,
                                                     padding=self.padding, pooling=self.pooling,
                                                     dilation_rate=self.dilation_rate, activation=activation,
                                                     prefix=prefix+"filter%s_" % filter_hs, initializer=initializer,
                                                     dropout=dropout, verbose=verbose))
            self.params += self.conv_layers[-1].params
            self.norm_params += self.conv_layers[-1].norm_params
            self.l1_norm += self.conv_layers[-1].l1_norm
            self.l2_norm += self.conv_layers[-1].l2_norm

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Filter Num  (Hidden): %d' % self.hidden_dim)
            logger.debug('Kernel Size (Windows): %s' % self.kernel_sizes)
            logger.debug('Padding method :  %s' % self.padding)
            logger.debug('Dilation Rate  :  %s' % self.dilation_rate)
            logger.debug('Pooling method :  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
コード例 #8
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 initializer=default_initializer,
                 normalize=True,
                 dropout=0,
                 reconstructe=True,
                 activation="tanh",
                 verbose=True):
        """
        :param in_dim:          输入维度
        :param hidden_dim:      隐层维度
        :param initializer:     随机初始化器
        :param normalize:       是否归一化
        :param dropout:         dropout率
        :param activation:      激活函数
        :param verbose:         是否输出Debug日志内容
        :return:
        """
        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        assert self.in_dim == self.hidden_dim

        self.initializer = initializer
        self.normalize = normalize
        self.dropout = dropout
        self.verbose = verbose
        self.act = Activation(activation)
        # Composition Function Weight
        # (dim, 2 * dim)
        self.W = shared_rand_matrix((self.hidden_dim, 2 * self.in_dim),
                                    'W',
                                    initializer=initializer)
        # (dim, )
        self.b = shared_zero_matrix((self.hidden_dim, ), 'b')
        # Reconstruction Function Weight
        # (2 * dim, dim)
        self.Wr = shared_rand_matrix((2 * self.in_dim, self.hidden_dim),
                                     'Wr',
                                     initializer=initializer)
        # (2 * dim, )
        self.br = shared_zero_matrix((self.in_dim * 2, ), 'br')
        self.params = [self.W, self.b, self.Wr, self.br]
        self.norm_params = [self.W, self.Wr]

        self.l1_norm = sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug(
                'Architecture of RAE built finished, summarized as below: ')
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Normalize:        %s' % self.normalize)
            logger.debug('Activation:       %s' % self.act)
            logger.debug('Dropout Rate:     %s' % self.dropout)
コード例 #9
0
 def __init__(self, verbose=True):
     if verbose: logger.debug('Build Multilayer Perceptron Ranking model...')
     # Positive input setting
     self.inputPL = T.matrix(name='inputPL', dtype=floatX)
     self.inputPR = T.matrix(name='inputPR', dtype=floatX)
     # Negative input setting
     self.inputNL = T.matrix(name='inputNL', dtype=floatX)
     self.inputNR = T.matrix(name='inputNR', dtype=floatX)
     # Standard input setting
     self.inputL = T.matrix(name='inputL', dtype=floatX)
     self.inputR = T.matrix(name='inputR', dtype=floatX)
     # Build activation function
     self.act = Activation('tanh')
     # Connect input matrices
     self.inputP = T.concatenate([self.inputPL, self.inputPR], axis=1)
     self.inputN = T.concatenate([self.inputNL, self.inputNR], axis=1)
     self.input = T.concatenate([self.inputL, self.inputR], axis=1)
     # Build hidden layer
     self.hidden_layer = HiddenLayer(self.input, (2*edim, args.hidden), act=self.act)
     self.hidden = self.hidden_layer.output
     self.hiddenP = self.hidden_layer.encode(self.inputP)
     self.hiddenN = self.hidden_layer.encode(self.inputN)
     # Dropout parameter
     #srng = T.shared_randomstreams.RandomStreams(args.seed)
     #mask = srng.binomial(n=1, p=1-args.dropout, size=self.hidden.shape)
     #maskP = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenP.shape)
     #maskN = srng.binomial(n=1, p=1-args.dropout, size=self.hiddenN.shape)
     #self.hidden *= T.cast(mask, floatX)
     #self.hiddenP *= T.cast(maskP, floatX)
     #self.hiddenN *= T.cast(maskN, floatX)
     # Build linear output layer
     self.score_layer = ScoreLayer(self.hidden, args.hidden)
     self.output = self.score_layer.output
     self.scoreP = self.score_layer.encode(self.hiddenP)
     self.scoreN = self.score_layer.encode(self.hiddenN)
     # Stack all the parameters
     self.params = []
     self.params += self.hidden_layer.params
     self.params += self.score_layer.params
     # Build cost function
     self.cost = T.mean(T.maximum(T.zeros_like(self.scoreP), 1.0-self.scoreP+self.scoreN))
     # Construct the gradient of the cost function with respect to the model parameters
     self.gradparams = T.grad(self.cost, self.params)
     # Count the total number of parameters in this model
     self.num_params = edim * args.hidden + args.hidden + args.hidden + 1
     # Build class method
     self.score = theano.function(inputs=[self.inputL, self.inputR], outputs=self.output)
     self.compute_cost_and_gradient = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
                                                      outputs=self.gradparams+[self.cost, self.scoreP, self.scoreN])
     self.show_scores = theano.function(inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR], 
                                        outputs=[self.scoreP, self.scoreN])
     if verbose:
         logger.debug('Architecture of MLP Ranker built finished, summarized below: ')
         logger.debug('Input dimension: %d' % edim)
         logger.debug('Hidden dimension: %d' % args.hidden)
         logger.debug('Total number of parameters used in the model: %d' % self.num_params)
コード例 #10
0
ファイル: network.py プロジェクト: sky-lzy/DailyCode
    def __init__(self, input_size, output_size, hidden_size, n_layers,
                 act_type):
        '''
        Multilayer Perceptron
        ----------------------
        :param input_size: dimension of input features
        :param output_size: dimension of output features
        :param hidden_size: a list containing hidden size for each hidden layer
        :param n_layers: number of layers
        :param act_type: type of activation function for each hidden layer, can be none, sigmoid, tanh, or relu
        '''
        super(MLP, self).__init__()

        # total layer number should be hidden layer number + 1 (output layer)
        assert len(
            hidden_size
        ) + 1 == n_layers, 'total layer number should be hidden layer number + 1'

        # define the activation function by activation function in activations.py
        self.act = Activation(act_type)

        # initialize a list to save layers
        layers = nn.ModuleList()

        if n_layers == 1:
            # if n_layers == 1, MLP degenerates to a Linear layer
            layer = Linear(input_size, output_size)
            # append the layer into layers
            layers.append(layer)
            layers.append(self.act)

        # TODO 4: Finish MLP with at least 2 layers
        else:
            # step 1: initialize the input layer
            layer = Linear(input_size, hidden_size[0])
            # step 2: append the input layer and the activation layer into layers
            layers.append(layer)
            layers.append(self.act)
            # step 3: construct the hidden layers and add it to layers
            for i in range(1, n_layers - 1):
                #initialize a hidden layer and activation layer
                # hint: Noting that the output size of a hidden layer is hidden_size[i], so what is its input size?
                layer = Linear(hidden_size[i - 1], hidden_size[i])
                layers.append(layer)
                layers.append(self.act)

            # step 4: initialize the output layer and append the layer into layers
            # hint: what is the output size of the output layer?
            # hint: here we do not need activation layer
            layer = Linear(hidden_size[-1], output_size)
            layers.append(layer)
            # End TODO 4

        #Use nn.Sequential to get the neural network
        self.net = nn.Sequential(*layers)
コード例 #11
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 gates=("sigmoid", "sigmoid", "sigmoid"),
                 prefix="",
                 initializer=OrthogonalInitializer(),
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(LSTMEncoder, self).__init__(in_dim, hidden_dim, pooling,
                                          activation, dropout)
        self.in_gate, self.forget_gate, self.out_gate = Activation(
            gates[0]), Activation(gates[1]), Activation(gates[2])

        # W [in, forget, output, recurrent] (4 * hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim * 4, self.in_dim),
                                    prefix + 'W', initializer)
        # U [in, forget, output, recurrent] (4 * hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim * 4, self.hidden_dim),
                                    prefix + 'U', initializer)
        # b [in, forget, output, recurrent] (4 * hidden,)
        self.b = shared_zero_matrix((self.hidden_dim * 4, ), prefix + 'b')

        self.params = [self.W, self.U, self.b]
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Input Gate:       %s' % self.in_gate.method)
            logger.debug('Forget Gate:      %s' % self.forget_gate.method)
            logger.debug('Output Gate:      %s' % self.out_gate.method)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
コード例 #12
0
 def __init__(self,
              in_dim,
              hidden_dim,
              pooling,
              activation='tanh',
              dropout=0):
     self.in_dim = in_dim
     self.out_dim = hidden_dim
     self.hidden_dim = hidden_dim
     self.pooling = pooling
     self.dropout = dropout
     self.act = Activation(activation)
コード例 #13
0
    def __init__(self, in_dim, hidden_dim, kernel_size=3, padding='same', pooling='max', dilation_rate=1.0,
                 activation='relu', prefix="", initializer=GlorotUniformInitializer(), dropout=0.0, verbose=True):
        """
        Init Function for ConvolutionLayer
        :param in_dim:
        :param hidden_dim:
        :param kernel_size:
        :param padding: 'same', 'valid'
        :param pooling: 'max', 'mean', 'min'
        :param dilation_rate:
        :param activation:
        :param prefix:
        :param initializer:
        :param dropout:
        :param verbose:
        """
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))

        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.padding = padding
        self.dilation_rate = dilation_rate
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)
        self.padding_size = int(self.dilation_rate * (self.kernel_size - 1))
        # Composition Function Weight
        # Kernel Matrix (kernel_size, hidden, in)
        self.W = shared_rand_matrix((self.kernel_size, self.hidden_dim, self.in_dim), prefix + 'W', initializer)
        # Bias Term (hidden)
        self.b = shared_zero_matrix((self.hidden_dim,), prefix + 'b')

        self.params = [self.W, self.b]
        self.norm_params = [self.W]

        # L1, L2 Norm
        self.l1_norm = T.sum(T.abs_(self.W))
        self.l2_norm = T.sum(self.W ** 2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Filter Num  (Hidden): %d' % self.hidden_dim)
            logger.debug('Kernel Size (Windows): %d' % self.kernel_size)
            logger.debug('Padding method :  %s' % self.padding)
            logger.debug('Dilation Rate  :  %s' % self.dilation_rate)
            logger.debug('Padding Size   :  %s' % self.padding_size)
            logger.debug('Pooling method :  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
コード例 #14
0
ファイル: layers.py プロジェクト: foshyjoshy/nake
    def __init__(self, name, n_inputs, n_outputs, activation=None, use_bias=True, weights=None, biases=None):
        super().__init__(name)

        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.use_bias = use_bias

        if activation is None:
            activation = Activation.getInitialized("tanh")
        else:
            if not Activation.isObjectRegistered(activation):
                if isinstance(activation, dict):
                    activation = Activation(**activation)
                elif isinstance(activation, str):
                    activation = Activation(class_name=activation)
                else:
                    raise Exception("{} is not a "\
                    "registered activation. Use {}".format(activation, Activation.registeredClasses()))

        self.activation = activation


        if weights is None:
            # Between -1 and 1
            self.weights = (np.random.random((n_outputs, n_inputs)) * 2 - 1)
        else:
            assert isinstance(weights, np.ndarray)
            assert weights.shape == (n_outputs, n_inputs)
            self.weights = weights

        if biases is None:
            # Between -1 and 1
            self.biases = (np.random.random((n_outputs, 1)) * 2 - 1) * 0.001
        else:
            assert isinstance(biases, np.ndarray)
            assert biases.shape == (n_outputs, 1)
            self.biases = biases

        # Mutation mask ... create only once.
        self.mutation_mask = np.zeros_like(self.weights)
コード例 #15
0
    def __init__(self,
                 entity_dim,
                 relation_num,
                 activation='tanh',
                 hidden=5,
                 keep_normal=False,
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(NeuralTensorModel, self).__init__()
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        self.hidden = hidden
        self.slice_seq = T.arange(hidden)
        self.keep_normal = keep_normal
        # (relation_num, entity_dim, entity_dim, hidden)
        self.W = shared_rand_matrix(
            (relation_num, self.entity_dim, self.entity_dim, self.hidden),
            prefix + 'NTN_W', initializer)
        # (relation_num, hidden)
        self.U = shared_ones_matrix((relation_num, self.hidden),
                                    name=prefix + 'NTN_U')
        if keep_normal:
            # (relation_num, entity_dim, hidden)
            self.V = shared_rand_matrix(
                (relation_num, self.entity_dim * 2, self.hidden),
                prefix + 'NTN_V', initializer)
            # (relation_num, hidden)
            self.b = shared_zero_matrix((relation_num, self.hidden),
                                        name=prefix + 'NTN_B')
            self.params = [self.W, self.V, self.U, self.b]
            self.norm_params = [self.W, self.V, self.U, self.b]
        else:
            self.params = [self.W]
            self.norm_params = [self.W]
        self.act = Activation(activation)
        self.l1_norm = T.sum(
            [T.sum(T.abs_(param)) for param in self.norm_params])
        self.l2_norm = T.sum([T.sum(param**2) for param in self.norm_params])

        if verbose:
            logger.debug(
                'Architecture of Tensor Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Hidden Dimension: %d' % self.hidden)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)
 def __init__(self,
              in_features,
              out_features,
              input_layer=False,
              fully_connected=True):
     self.in_features = in_features
     self.out_features = out_features
     self.fully_connected = fully_connected
     # changed from v0.0.0 #
     self.weights = np.random.randn(out_features, in_features)
     self.bias = np.random.randn(out_features)
     # last part for emphasis #
     self.next_layer = None
     self.prev_layer = None
     self.input_layer = input_layer
     self.variables = 0
     self.activation = Activation()
コード例 #17
0
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 pooling,
                 activation='tanh',
                 prefix="",
                 initializer=default_initializer,
                 dropout=0,
                 verbose=True):
        if verbose:
            logger.debug('Building {}...'.format(self.__class__.__name__))
        super(RecurrentEncoder, self).__init__(in_dim, hidden_dim, pooling,
                                               activation, dropout)

        self.in_dim = in_dim
        self.out_dim = hidden_dim
        self.hidden_dim = hidden_dim
        self.pooling = pooling
        self.dropout = dropout
        self.act = Activation(activation)
        # Composition Function Weight
        # Feed-Forward Matrix (hidden, in)
        self.W = shared_rand_matrix((self.hidden_dim, self.in_dim),
                                    prefix + 'W_forward', initializer)
        # Bias Term (hidden)
        self.b = shared_zero_matrix((self.hidden_dim, ), prefix + 'b_forward')
        # Recurrent Matrix (hidden, hidden)
        self.U = shared_rand_matrix((self.hidden_dim, self.hidden_dim),
                                    prefix + 'U_forward', initializer)

        self.params = [self.W, self.U, self.b]
        self.norm_params = [self.W, self.U]

        # L1, L2 Norm
        self.l1_norm = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.U))
        self.l2_norm = T.sum(self.W**2) + T.sum(self.U**2)

        if verbose:
            logger.debug('Architecture of {} built finished'.format(
                self.__class__.__name__))
            logger.debug('Input dimension:  %d' % self.in_dim)
            logger.debug('Hidden dimension: %d' % self.hidden_dim)
            logger.debug('Pooling methods:  %s' % self.pooling)
            logger.debug('Activation Func:  %s' % self.act.method)
            logger.debug('Dropout Rate:     %f' % self.dropout)
コード例 #18
0
 def testAE(self):
     # Set parameters
     input = T.matrix(name='input')
     num_in, num_out = 784, 500
     act = Activation('sigmoid')
     is_denoising, is_sparse = True, False
     lambda1 = 1e-4
     mask = 0.7
     rng = RandomStreams(42)
     start_time = time.time()
     ae = AutoEncoder(input, (num_in, num_out),
                      act,
                      is_denoising,
                      is_sparse,
                      lambda1,
                      mask,
                      rng,
                      verbose=True)
     end_time = time.time()
     pprint('Time used to build the AutoEncoder: %f seconds.' %
            (end_time - start_time))
     batch_size = 1000
     num_batches = self.training_set.shape[0] / batch_size
     nepoch = 50
     learn_rate = 1
     start_time = time.time()
     for i in xrange(nepoch):
         rate = learn_rate
         for j in xrange(num_batches):
             train_set = self.training_set[j * batch_size:(j + 1) *
                                           batch_size, :]
             cost = ae.train(train_set, rate)
             pprint('epoch %d, batch %d, cost = %f' % (i, j, cost))
     end_time = time.time()
     pprint('Time used for training AutoEncoder: %f seconds.' %
            (end_time - start_time))
     image = PIL.Image.fromarray(
         imgutils.tile_raster_images(
             X=ae.encode_layer.W.get_value(borrow=True).T,
             img_shape=(28, 28),
             tile_shape=(10, 10),
             tile_spacing=(1, 1)))
     image.save('filters_corruption_%.2f.png' % mask)
     AutoEncoder.save('./autoencoder-mnist.model', ae)
コード例 #19
0
ファイル: dense_layer.py プロジェクト: Sameer-Arora/ML-IS-FUN
	def __init__(self, input_units, output_units, activation = None):
		"""
		Params:
		input_units = Number of input nodes
		output_units = Number of output nodes
		activation = The activation layer
		"""
		# self.weights = np.random.normal(0.0, 1.0/np.sqrt(input_units), (input_units, output_units))
		# self.bias = np.random.normal(0.0, 1.0/np.sqrt(input_units), (1, output_units))
		# self.weights = np.random.uniform(-0.01, 0.01, (input_units, output_units))
		self.weights = np.linspace(-0.01, 0.01, num = input_units*output_units)
		self.weights = self.weights.reshape((input_units, output_units))
		self.bias = np.zeros((1,output_units))
		self.activation = Activation(activation)

		# Initialize Other Things as Zero
		self.output_units = None
		self.grad_weights = 0
		self.grad_bias = 0
コード例 #20
0
    def __init__(self,
                 entity_dim,
                 relation_num,
                 hidden=50,
                 activation='tanh',
                 initializer=default_initializer,
                 prefix='',
                 verbose=True):
        super(SingleLayerModel, self).__init__()
        self.hidden = hidden
        self.entity_dim = entity_dim
        self.relation_num = relation_num
        # (relation_num, k, entity_dim)
        self.W_1 = shared_rand_matrix(
            (relation_num, self.hidden, self.entity_dim),
            prefix + 'SingleLayer_W1', initializer)
        # (relation_num, k, entity_dim)
        self.W_2 = shared_rand_matrix(
            (relation_num, self.hidden, self.entity_dim),
            prefix + 'SingleLayer_W2', initializer)
        # (relation_num, k, )
        self.u = shared_ones_matrix((
            relation_num,
            self.hidden,
        ), prefix + 'SingleLayer_u')
        self.act = Activation(activation)
        self.params = [self.W_1, self.W_2, self.u]
        self.norm_params = [self.W_1, self.W_2, self.u]
        self.l1_norm = T.sum(T.abs_(self.W_1)) + T.sum(T.abs_(
            self.W_2)) + T.sum(T.abs_(self.u))
        self.l2_norm = T.sum(self.W_1**2) + T.sum(self.W_2**2) + T.sum(self.u**
                                                                       2)

        if verbose:
            logger.debug(
                'Architecture of Single Layer Model built finished, summarized as below:'
            )
            logger.debug('Entity Dimension: %d' % self.entity_dim)
            logger.debug('Hidden Dimension: %d' % self.hidden)
            logger.debug('Relation Number:  %d' % self.relation_num)
            logger.debug('Initializer:      %s' % initializer)
            logger.debug('Activation:       %s' % activation)
コード例 #21
0
 def __init__(self,
              word_dim,
              seq_dim,
              hidden_dim,
              activation='tanh',
              initializer=default_initializer):
     super(NNWordBasedAttention,
           self).__init__(word_dim=word_dim,
                          seq_dim=seq_dim,
                          initializer=default_initializer)
     # (dim, dim)
     self.hidden_dim = hidden_dim
     self.W = shared_rand_matrix((self.word_dim, self.hidden_dim),
                                 'Attention_W', initializer)
     self.U = shared_rand_matrix((self.seq_dim, self.hidden_dim),
                                 'Attention_U', initializer)
     self.v = shared_rand_matrix((self.hidden_dim, ), 'Attention_v',
                                 initializer)
     self.act = Activation(activation)
     self.params = [self.W]
     self.norm_params = [self.W]
コード例 #22
0
    def __init__(self, configs=None, verbose=True):
        '''
		@config: CNNConfiger. Configer used to set the architecture of CNN.
		'''
        if verbose: pprint("Building Convolutional Neural Network...")
        # Make theano symbolic tensor for input and ground truth label
        self.input = T.tensor4(name='input', dtype=floatX)
        self.truth = T.ivector(name='label')
        self.learn_rate = T.scalar(name='learn rate')
        self.batch_size = configs.batch_size
        self.image_row = configs.image_row
        self.image_col = configs.image_col
        # There may have multiple convolution-pooling and multi-layer perceptrons.
        self.convpool_layers = []
        self.hidden_layers = []
        self.softmax_layers = []
        # Configure activation function
        self.act = Activation(configs.activation)
        # Configuration should be valid
        assert configs.num_convpool == len(configs.convs)
        assert configs.num_convpool == len(configs.pools)
        assert configs.num_hidden == len(configs.hiddens)
        assert configs.num_softmax == len(configs.softmaxs)
        # Construct random number generator
        srng = T.shared_randomstreams.RandomStreams(configs.random_seed)
        # Build architecture of CNN
        # Convolution and Pooling layers
        image_shapes, filter_shapes = [], []
        for i in xrange(configs.num_convpool):
            if i == 0:
                image_shapes.append(
                    (self.batch_size, 1, self.image_row, self.image_col))
                filter_shapes.append(
                    (configs.convs[i][0], 1, configs.convs[i][1],
                     configs.convs[i][2]))
            else:
                image_shapes.append(
                    (self.batch_size, configs.convs[i - 1][0],
                     (image_shapes[i - 1][2] - configs.convs[i - 1][1] + 1) /
                     configs.pools[i - 1][0],
                     (image_shapes[i - 1][3] - configs.convs[i - 1][2] + 1) /
                     configs.pools[i - 1][1]))
                filter_shapes.append(
                    (configs.convs[i][0], configs.convs[i - 1][0],
                     configs.convs[i][1], configs.convs[i][2]))
        for i in xrange(configs.num_convpool):
            if i == 0:
                current_input = self.input
            else:
                current_input = self.convpool_layers[i - 1].output
            self.convpool_layers.append(
                LeNetConvPoolLayer(input=current_input,
                                   filter_shape=filter_shapes[i],
                                   image_shape=image_shapes[i],
                                   poolsize=configs.pools[i],
                                   act=self.act))
        # Multilayer perceptron layers
        for i in xrange(configs.num_hidden):
            if i == 0:
                current_input = T.flatten(
                    self.convpool_layers[configs.num_convpool - 1].output, 2)
            else:
                current_input = self.hidden_layers[i - 1].output
            # Adding dropout to hidden layers
            hidden_layer = HiddenLayer(current_input,
                                       configs.hiddens[i],
                                       act=self.act)
            mask = srng.binomial(n=1,
                                 p=1 - configs.dropout,
                                 size=hidden_layer.shape)
            hidden_layer *= T.cast(mask, floatX)
            self.hidden_layers.append(hidden_layer)
        # Softmax Layer, for most case, the architecture will only contain one softmax layer
        for i in xrange(configs.num_softmax):
            if i == 0:
                current_input = self.hidden_layers[configs.num_hidden -
                                                   1].output
            else:
                current_input = self.softmax_layers[i - 1].output
            self.softmax_layers.append(
                SoftmaxLayer(current_input, configs.softmaxs[i]))
        # Output
        self.pred = self.softmax_layers[configs.num_softmax - 1].prediction()
        # Cost function with ground truth provided
        self.cost = self.softmax_layers[configs.num_softmax - 1].NLL_loss(
            self.truth)
        # Build cost function
        # Stack all the parameters
        self.params = []
        for convpool_layer in self.convpool_layers:
            self.params.extend(convpool_layer.params)
        for hidden_layer in self.hidden_layers:
            self.params.extend(hidden_layer.params)
        for softmax_layer in self.softmax_layers:
            self.params.extend(softmax_layer.params)
        # Compute gradient of self.cost with respect to network parameters
        self.gradparams = T.grad(self.cost, self.params)
        # Stochastic gradient descent learning algorithm
        self.updates = []
        for param, gradparam in zip(self.params, self.gradparams):
            self.updates.append((param, param - self.learn_rate * gradparam))
        # Build objective function
        self.objective = theano.function(
            inputs=[self.input, self.truth, self.learn_rate],
            outputs=self.cost,
            updates=self.updates)
        # Build prediction function
        self.predict = theano.function(inputs=[self.input], outputs=self.pred)
        if verbose:
            pprint('Architecture building finished, summarized as below: ')
            pprint(
                'There are %d layers (not including the input layer) algether: '
                % (configs.num_convpool * 2 + configs.num_hidden +
                   configs.num_softmax))
            pprint('%d convolution layers + %d maxpooling layers.' %
                   (len(self.convpool_layers), len(self.convpool_layers)))
            pprint('%d hidden layers.' % (len(self.hidden_layers)))
            pprint('%d softmax layers.' % (len(self.softmax_layers)))
            pprint('=' * 50)
            pprint('Detailed architecture of each layer: ')
            pprint('-' * 50)
            pprint('Convolution and Pooling layers: ')
            for i in xrange(len(self.convpool_layers)):
                pprint('Convolution Layer %d: ' % i)
                pprint(
                    '%d feature maps, each has a filter kernel with size (%d, %d)'
                    % (configs.convs[i][0], configs.convs[i][1],
                       configs.convs[i][2]))
            pprint('-' * 50)
            pprint('Hidden layers: ')
            for i in xrange(len(self.hidden_layers)):
                pprint('Hidden Layer %d: ' % i)
                pprint('Input dimension: %d, Output dimension: %d' %
                       (configs.hiddens[i][0], configs.hiddens[i][1]))
            pprint('-' * 50)
            pprint('Softmax layers: ')
            for i in xrange(len(self.softmax_layers)):
                pprint('Softmax Layer %d: ' % i)
                pprint('Input dimension: %d, Output dimension: %d' %
                       (configs.softmaxs[i][0], configs.softmaxs[i][1]))
コード例 #23
0
 def __init__(self, config=None, verbose=True):
     # Construct two GrCNNEncoders for matching two sentences
     self.encoderL = GrCNNEncoder(config, verbose)
     self.encoderR = GrCNNEncoder(config, verbose)
     # Link the parameters of two parts
     self.params = []
     self.params += self.encoderL.params
     self.params += self.encoderR.params
     # Build three kinds of inputs:
     # 1, inputL, inputR. This pair is used for computing the score after training
     # 2, inputPL, inputPR. This part is used for training positive pairs
     # 3, inputNL, inputNR. This part is used for training negative pairs
     self.inputL = self.encoderL.input
     self.inputR = self.encoderR.input
     # Positive
     self.inputPL = T.matrix(name='inputPL', dtype=floatX)
     self.inputPR = T.matrix(name='inputPR', dtype=floatX)
     # Negative
     self.inputNL = T.matrix(name='inputNL', dtype=floatX)
     self.inputNR = T.matrix(name='inputNR', dtype=floatX)
     # Linking input-output mapping
     self.hiddenL = self.encoderL.output
     self.hiddenR = self.encoderR.output
     # Positive
     self.hiddenPL = self.encoderL.encode(self.inputPL)
     self.hiddenPR = self.encoderR.encode(self.inputPR)
     # Negative
     self.hiddenNL = self.encoderL.encode(self.inputNL)
     self.hiddenNR = self.encoderR.encode(self.inputNR)
     # Activation function
     self.act = Activation(config.activation)
     # MLP Component
     self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=1)
     self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=1)
     self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=1)
     # Build hidden layer
     self.hidden_layer = HiddenLayer(
         self.hidden, (2 * config.num_hidden, config.num_mlp),
         act=Activation(config.hiddenact))
     self.compressed_hidden = self.hidden_layer.output
     self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
     self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
     # Accumulate parameters
     self.params += self.hidden_layer.params
     # Dropout parameter
     srng = T.shared_randomstreams.RandomStreams(config.random_seed)
     mask = srng.binomial(n=1,
                          p=1 - config.dropout,
                          size=self.compressed_hidden.shape)
     maskP = srng.binomial(n=1,
                           p=1 - config.dropout,
                           size=self.compressed_hiddenP.shape)
     maskN = srng.binomial(n=1,
                           p=1 - config.dropout,
                           size=self.compressed_hiddenN.shape)
     self.compressed_hidden *= T.cast(mask, floatX)
     self.compressed_hiddenP *= T.cast(maskP, floatX)
     self.compressed_hiddenN *= T.cast(maskN, floatX)
     # Score layers
     self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
     self.output = self.score_layer.output
     self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
     self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
     # Accumulate parameters
     self.params += self.score_layer.params
     # Build cost function
     self.cost = T.mean(
         T.maximum(T.zeros_like(self.scoreP),
                   1.0 - self.scoreP + self.scoreN))
     # Construct the gradient of the cost function with respect to the model parameters
     self.gradparams = T.grad(self.cost, self.params)
     # Compute the total number of parameters in the model
     self.num_params_encoder = config.num_input * config.num_hidden + \
                       config.num_hidden * config.num_hidden * 2 + \
                       config.num_hidden + \
                       config.num_hidden * 3 * 2 + \
                       3
     self.num_params_encoder *= 2
     self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + \
                                  config.num_mlp + \
                                  config.num_mlp + 1
     self.num_params = self.num_params_encoder + self.num_params_classifier
     # Build class methods
     self.score = theano.function(inputs=[self.inputL, self.inputR],
                                  outputs=self.output)
     self.compute_cost_and_gradient = theano.function(
         inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
         outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN])
     self.show_scores = theano.function(
         inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
         outputs=[self.scoreP, self.scoreN])
     self.show_hiddens = theano.function(
         inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
         outputs=[self.hiddenP, self.hiddenN])
     self.show_inputs = theano.function(
         inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
         outputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR])
     if verbose:
         logger.debug(
             'Architecture of GrCNNMatchScorer built finished, summarized below: '
         )
         logger.debug('Input dimension: %d' % config.num_input)
         logger.debug(
             'Hidden dimension inside GrCNNMatchScorer pyramid: %d' %
             config.num_hidden)
         logger.debug('Hidden dimension MLP: %d' % config.num_mlp)
         logger.debug('There are 2 GrCNNEncoders used in model.')
         logger.debug('Total number of parameters used in the model: %d' %
                      self.num_params)
コード例 #24
0
 def __init__(self, config, verbose=True):
     '''
     @config: GRCNNConfiger. Configer used to set the architecture of GRCNNEncoder.
     '''
     self.encoder = GrCNNEncoder(config, verbose)
     # Link two parts
     self.input = self.encoder.input
     # Activation function
     self.act = Activation(config.activation)
     # Extract the hierarchical representation, the pyramids, from the encoder
     # Combine the original time series and the compressed time series
     self.pyramids = self.encoder.pyramids
     self.pyramids = T.concatenate([
         self.encoder.hidden0.dimshuffle('x', 0, 1), self.encoder.pyramids
     ])
     self.nsteps = self.pyramids.shape[0]
     # Use another scan function to compress each hierarchical representation
     # into the vector representation
     self.hierarchies, _ = theano.scan(
         fn=self._step_compress,
         sequences=[T.arange(self.nsteps, 0, -1), self.pyramids])
     # Global classifier, MLP, mixture of experts
     self.hidden_layer = HiddenLayer(self.hierarchies,
                                     (config.num_hidden, config.num_mlp),
                                     act=Activation(config.hiddenact))
     # Adding dropout support
     self.hidden = self.hidden_layer.output
     srng = T.shared_randomstreams.RandomStreams(config.random_seed)
     mask = srng.binomial(n=1, p=1 - config.dropout, size=self.hidden.shape)
     self.hidden *= T.cast(mask, floatX)
     # Connect the hidden layer after dropout to a logistic output layer
     self.output_layer = LogisticLayer(self.hidden, config.num_mlp)
     self.experts = self.output_layer.output
     # Global weighting mechanism, voting weights
     self.weight_layer = theano.shared(
         name='Weighting vector',
         value=np.random.rand(config.num_hidden).astype(floatX))
     self.weights = T.nnet.softmax(
         T.dot(self.hierarchies, self.weight_layer))
     # Compute the total number of parameters in the model
     self.num_params = self.encoder.num_params + self.hidden_layer.num_params + \
                       self.output_layer.num_params + config.num_hidden
     # Final decision, bagging
     self.score = T.sum(T.flatten(self.experts) * T.flatten(self.weights))
     # Prediction for classification
     self.pred = self.score >= 0.5
     # Stack all the parameters
     self.params = []
     self.params += self.encoder.params
     self.params += self.hidden_layer.params
     self.params += self.output_layer.params
     self.params += [self.weight_layer]
     # Build objective function for binary classification problem
     self.truth = T.iscalar(name='label')
     self.cost = -self.truth * T.log((self.score+np.finfo(float).eps) / (1+2*np.finfo(float).eps)) - \
                 (1-self.truth) * T.log((1.0-self.score+np.finfo(float).eps) / (1+2*np.finfo(float).eps))
     ## Weight Decay
     if config.weight_decay:
         self.regularizer = self.encoder.L2_loss() + self.hidden_layer.L2_loss() + \
                            self.output_layer.L2_loss() + T.sum(self.weight_layer ** 2)
         self.regularizer *= config.weight_decay_parameter
         self.cost += self.regularizer
     # Construct gradient vectors
     self.gradparams = T.grad(self.cost, self.params)
     # Construct gradient for the input matrix, fine-tuning
     self.input_grads = T.grad(self.cost, self.input)
     # Build and compile theano functions
     self.predict = theano.function(inputs=[self.input], outputs=self.pred)
     self.bagging = theano.function(inputs=[self.input], outputs=self.score)
     self.compute_gradient_and_cost = theano.function(
         inputs=[self.input, self.truth],
         outputs=self.gradparams + [self.cost, self.pred])
     self.compute_input_gradient = theano.function(
         inputs=[self.input, self.truth], outputs=self.input_grads)
     # Theano functions for debugging purposes
     self.show_weights = theano.function(inputs=[self.input],
                                         outputs=self.weights)
     self.show_scores = theano.function(inputs=[self.input],
                                        outputs=self.experts)
     self.show_hierarchy = theano.function(inputs=[self.input],
                                           outputs=self.hierarchies)
     self.show_prob = theano.function(inputs=[self.input],
                                      outputs=self.score)
     self.show_cost = theano.function(inputs=[self.input, self.truth],
                                      outputs=self.cost)
     if verbose:
         logger.debug('GrCNNBagger built finished...')
         logger.debug(
             'Hierarchical structure of GrCNN for classification...')
         logger.debug('Total number of parameters in the model: %d' %
                      self.num_params)
images_path = './data/training_images'
annotations_path = './data/annotations'
classes_file = './data/classes.txt'
X, y = prepare_dataset(images_path, annotations_path, classes_file)
'''TRAINING PROCEDURE'''
from models_final import Sequential
from convolutions_final import Conv2D
from normalizations import BatchNormalization
from poolings import MaxPool2D
from dense_final import Flatten, Dense
from activations import Activation

model = Sequential()
model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer1", X.shape))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer1"))
model.add(Activation('relu'))
model.add(BatchNormalization(1, 0, 1e-5))

model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer2"))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer2"))
model.add(Activation('relu'))
model.add(BatchNormalization(1, 0, 1e-5))

model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer3"))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer3"))
model.add(Activation('relu'))
model.add(BatchNormalization(1, 0, 1e-5))

model.add(Conv2D(10, (3, 3), 1, "valid", "convLayer4"))
model.add(MaxPool2D((2, 2), 2, "valid", "poolLayer4"))
model.add(Activation('relu'))
コード例 #26
0
    def testTrain(self):
        '''
		Train Auto-Encoder + SoftmaxLayer on batch learning mode.
		'''
        input_dim, hidden_dim = self.max_length * self.word_embedding.embedding_dim(
        ), 500
        # Build AutoEncoder + SoftmaxLayer
        start_time = time.time()
        seed = 1991
        input_matrix = T.matrix(name='input')
        num_in, num_out = input_dim, hidden_dim
        act = Activation('tanh')
        is_denoising, is_sparse = True, False
        lambda1, mask = 1e-4, 0.5
        rng = RandomStreams(seed)
        sent_model = SentModel(input_matrix, (num_in, num_out),
                               act,
                               is_denoising,
                               is_sparse,
                               lambda1,
                               mask,
                               rng,
                               verbose=True)
        end_time = time.time()
        pprint('Time used to build the model: %f seconds.' %
               (end_time - start_time))
        # Loading training data and start batch training mode
        num_batch = self.num_sent / self.batch_size
        learn_rate = 0.1
        # Pretraining
        pprint('Start pretraining...')
        start_time = time.time()
        for i in xrange(self.nepoch):
            # Batch training
            pprint('Training epoch: %d' % i)
            for j in xrange(num_batch):
                train_set = np.zeros(
                    (self.batch_size,
                     self.max_length * self.word_embedding.embedding_dim()),
                    dtype=floatX)
                train_txt = self.train_txt[j * self.batch_size:(j + 1) *
                                           self.batch_size]
                for k, sent in enumerate(train_txt):
                    words = sent.split()
                    vectors = np.asarray(
                        [self.word_embedding.wordvec(word) for word in words])
                    vectors = vectors.flatten()
                    train_set[k, :vectors.shape[0]] = vectors
                rate = learn_rate
                cost = sent_model.pretrain(train_set, rate)
                if (j + 1) % 500 == 0:
                    pprint('Training epoch: %d, Number batch: %d, cost = %f' %
                           (i, j, cost))
            # Saving temporary pretraining model in .gz
            with gzip.GzipFile('./large_pretrain.sent.gz', 'wb') as fout:
                cPickle.dump(sent_model, fout)
        end_time = time.time()
        pprint('Time used for pretraining: %f minutes.' %
               ((end_time - start_time) / 60.0))
        # Fine tuning
        pprint('Start fine-tuning...')
        start_time = time.time()
        for i in xrange(self.nepoch):
            # Batch training
            pprint('Training epoch: %d' % i)
            for j in xrange(num_batch):
                train_set = np.zeros(
                    (self.batch_size,
                     self.max_length * self.word_embedding.embedding_dim()),
                    dtype=floatX)
                train_txt = self.train_txt[j * self.batch_size:(j + 1) *
                                           self.batch_size]
                for k, sent in enumerate(train_txt):
                    words = sent.split()
                    vectors = np.asarray(
                        [self.word_embedding.wordvec(word) for word in words])
                    vectors = vectors.flatten()
                    train_set[k, :vectors.shape[0]] = vectors
                rate = learn_rate
                cost = sent_model.finetune(train_set, rate)
                if (j + 1) % 500 == 0:
                    pprint('Training epoch: %d, Number batch: %d, cost = %f' %
                           (i, j, cost))
            # Saving temporary fine-tuning model in .gz
            with gzip.GzipFile('./large_finetune.sent.gz', 'wb') as fout:
                cPickle.dump(sent_model, fout)
        end_time = time.time()
        pprint('Time used for fine-tuning: %f minutes.' %
               ((end_time - start_time) / 60.0))
コード例 #27
0
 def __init__(self, config, verbose=True):
     # Construct two BRNNEncoders for matching two sentences
     self.encoderL = BRNNEncoder(config, verbose)
     self.encoderR = BRNNEncoder(config, verbose)
     # Link two parts
     self.params = []
     self.params += self.encoderL.params
     self.params += self.encoderR.params
     # Set up input
     # Note that there are three kinds of inputs altogether, including:
     # 1, inputL, inputR. This pair is used for computing the score after training
     # 2, inputPL, inputPR. This pair is used for training positive pairs
     # 3, inputNL, inputNR. This pair is used for training negative pairs
     self.inputL = self.encoderL.input
     self.inputR = self.encoderR.input
     # Positive
     self.inputPL = T.matrix(name='inputPL', dtype=floatX)
     self.inputPR = T.matrix(name='inputPR', dtype=floatX)
     # Negative
     self.inputNL = T.matrix(name='inputNL', dtype=floatX)
     self.inputNR = T.matrix(name='inputNR', dtype=floatX)
     # Get output of two BRNNEncoders
     self.hiddenL = self.encoderL.output
     self.hiddenR = self.encoderR.output
     # Positive Hidden
     self.hiddenPL = self.encoderL.encode(self.inputPL)
     self.hiddenPR = self.encoderR.encode(self.inputPR)
     # Negative Hidden
     self.hiddenNL = self.encoderL.encode(self.inputNL)
     self.hiddenNR = self.encoderR.encode(self.inputNR)
     # Activation function
     self.act = Activation(config.activation)
     self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=0)
     self.hiddenP = T.concatenate([self.hiddenPL, self.hiddenPR], axis=0)
     self.hiddenN = T.concatenate([self.hiddenNL, self.hiddenNR], axis=0)
     # Build hidden layer
     self.hidden_layer = HiddenLayer(
         self.hidden, (4 * config.num_hidden, config.num_mlp),
         act=Activation(config.hiddenact))
     self.compressed_hidden = self.hidden_layer.output
     self.compressed_hiddenP = self.hidden_layer.encode(self.hiddenP)
     self.compressed_hiddenN = self.hidden_layer.encode(self.hiddenN)
     # Accumulate parameters
     self.params += self.hidden_layer.params
     # Dropout parameter
     srng = T.shared_randomstreams.RandomStreams(config.random_seed)
     mask = srng.binomial(n=1,
                          p=1 - config.dropout,
                          size=self.compressed_hidden.shape)
     maskP = srng.binomial(n=1,
                           p=1 - config.dropout,
                           size=self.compressed_hiddenP.shape)
     maskN = srng.binomial(n=1,
                           p=1 - config.dropout,
                           size=self.compressed_hiddenN.shape)
     self.compressed_hidden *= T.cast(mask, floatX)
     self.compressed_hiddenP *= T.cast(maskP, floatX)
     self.compressed_hiddenN *= T.cast(maskN, floatX)
     # Score layer
     self.score_layer = ScoreLayer(self.compressed_hidden, config.num_mlp)
     self.output = self.score_layer.output
     self.scoreP = self.score_layer.encode(self.compressed_hiddenP)
     self.scoreN = self.score_layer.encode(self.compressed_hiddenN)
     # Accumulate parameters
     self.params += self.score_layer.params
     # Build cost function
     self.cost = T.mean(
         T.maximum(T.zeros_like(self.scoreP),
                   1.0 - self.scoreP + self.scoreN))
     # Construct the total number of parameters in the model
     self.gradparams = T.grad(self.cost, self.params)
     # Compute the total number of parameters in the model
     self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
     self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + config.num_mlp + \
             config.num_mlp + 1
     self.num_params = self.num_params_encoder + self.num_params_classifier
     # Build class functions
     self.score = theano.function(inputs=[self.inputL, self.inputR],
                                  outputs=self.output)
     # Compute the gradient of the objective function and cost and prediction
     self.compute_cost_and_gradient = theano.function(
         inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
         outputs=self.gradparams + [self.cost, self.scoreP, self.scoreN])
     # Output function for debugging purpose
     self.show_scores = theano.function(
         inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
         outputs=[self.scoreP, self.scoreN])
     self.show_hiddens = theano.function(
         inputs=[self.inputPL, self.inputPR, self.inputNL, self.inputNR],
         outputs=[self.hiddenP, self.hiddenN])
     if verbose:
         logger.debug(
             'Architecture of BRNNMatchScorer built finished, summarized below: '
         )
         logger.debug('Input dimension: %d' % config.num_input)
         logger.debug('Hidden dimension of RNN: %d' % config.num_hidden)
         logger.debug('Hidden dimension of MLP: %d' % config.num_mlp)
         logger.debug('There are 2 BRNNEncoders used in the model.')
         logger.debug('Total number of parameters in this model: %d' %
                      self.num_params)
コード例 #28
0
 def __init__(self, config, verbose=True):
     # Construct two BRNNEncoders for matching two sentences
     self.encoderL = BRNNEncoder(config, verbose)
     self.encoderR = BRNNEncoder(config, verbose)
     # Link two parts
     self.params = []
     self.params += self.encoderL.params
     self.params += self.encoderR.params
     # Set up input
     self.inputL = self.encoderL.input
     self.inputR = self.encoderR.input
     # Get output of two BRNNEncoders
     self.hiddenL = self.encoderL.output
     self.hiddenR = self.encoderR.output
     # Activation function
     self.act = Activation(config.activation)
     # MLP Component
     self.hidden = T.concatenate([self.hiddenL, self.hiddenR], axis=0)
     self.hidden_layer = HiddenLayer(
         self.hidden, (4 * config.num_hidden, config.num_mlp),
         act=Activation(config.hiddenact))
     self.compressed_hidden = self.hidden_layer.output
     # Accumulate parameters
     self.params += self.hidden_layer.params
     # Dropout parameter
     srng = T.shared_randomstreams.RandomStreams(config.random_seed)
     mask = srng.binomial(n=1,
                          p=1 - config.dropout,
                          size=self.compressed_hidden.shape)
     self.compressed_hidden *= T.cast(mask, floatX)
     # Logistic regression
     self.logistic_layer = LogisticLayer(self.compressed_hidden,
                                         config.num_mlp)
     self.output = self.logistic_layer.output
     self.pred = self.logistic_layer.pred
     # Accumulate parameters
     self.params += self.logistic_layer.params
     # Compute the total number of parameters in the model
     self.num_params_encoder = self.encoderL.num_params + self.encoderR.num_params
     self.num_params_classifier = 2 * config.num_hidden * config.num_mlp + config.num_mlp + \
             config.num_mlp + 1
     self.num_params = self.num_params_encoder + self.num_params_classifier
     # Build target function
     self.truth = T.ivector(name='label')
     self.cost = self.logistic_layer.NLL_loss(self.truth)
     # Build computational graph and compute the gradients of the model parameters
     # with respect to the cost function
     self.gradparams = T.grad(self.cost, self.params)
     # Compile theano function
     self.objective = theano.function(
         inputs=[self.inputL, self.inputR, self.truth], outputs=self.cost)
     self.predict = theano.function(inputs=[self.inputL, self.inputR],
                                    outputs=self.pred)
     # Compute the gradient of the objective function and cost and prediction
     self.compute_cost_and_gradient = theano.function(
         inputs=[self.inputL, self.inputR, self.truth],
         outputs=self.gradparams + [self.cost, self.pred])
     # Output function for debugging purpose
     self.show_hidden = theano.function(inputs=[self.inputL, self.inputR],
                                        outputs=self.hidden)
     self.show_compressed_hidden = theano.function(
         inputs=[self.inputL, self.inputR], outputs=self.compressed_hidden)
     self.show_output = theano.function(inputs=[self.inputL, self.inputR],
                                        outputs=self.output)
     if verbose:
         logger.debug(
             'Architecture of BRNNMatcher built finished, summarized below: '
         )
         logger.debug('Input dimension: %d' % config.num_input)
         logger.debug('Hidden dimension of RNN: %d' % config.num_hidden)
         logger.debug('Hidden dimension of MLP: %d' % config.num_mlp)
         logger.debug('Number of parameters in the encoder part: %d' %
                      self.num_params_encoder)
         logger.debug('Number of parameters in the classifier: %d' %
                      self.num_params_classifier)
         logger.debug('Total number of parameters in this model: %d' %
                      self.num_params)
コード例 #29
0
 def __init__(self, config, verbose=True):
     if verbose: logger.debug('Building Bidirectional RNN Encoder...')
     self.input = T.matrix(name='BRNNEncoder_input')
     # Configure Activation function
     self.act = Activation(config.activation)
     # Build Bidirectional RNN
     num_input, num_hidden = config.num_input, config.num_hidden
     self.num_params = 2 * (num_input * num_hidden +
                            num_hidden * num_hidden + num_hidden)
     # Initialize model parameters
     np.random.seed(config.random_seed)
     # 1, Feed-forward matrix for forward direction: W_forward
     W_forward_val = np.random.uniform(low=-1.0,
                                       high=1.0,
                                       size=(num_input, num_hidden))
     W_forward_val = W_forward_val.astype(floatX)
     self.W_forward = theano.shared(value=W_forward_val,
                                    name='W_forward',
                                    borrow=True)
     # 1, Feed-forward matrix for backward direction: W_backward
     W_backward_val = np.random.uniform(low=-1.0,
                                        high=1.0,
                                        size=(num_input, num_hidden))
     W_backward_val = W_backward_val.astype(floatX)
     self.W_backward = theano.shared(value=W_backward_val,
                                     name='W_backward',
                                     borrow=True)
     # 2, Recurrent matrix for forward direction: U_forward
     U_forward_val = np.random.uniform(low=-1.0,
                                       high=1.0,
                                       size=(num_hidden, num_hidden))
     U_forward_val = U_forward_val.astype(floatX)
     U_forward_val, _, _ = np.linalg.svd(U_forward_val)
     self.U_forward = theano.shared(value=U_forward_val,
                                    name='U_forward',
                                    borrow=True)
     # 2, Recurrent matrix for backward direction: U_backward
     U_backward_val = np.random.uniform(low=-1.0,
                                        high=1.0,
                                        size=(num_hidden, num_hidden))
     U_backward_val = U_backward_val.astype(floatX)
     U_backward_val, _, _ = np.linalg.svd(U_backward_val)
     self.U_backward = theano.shared(value=U_backward_val,
                                     name='U_backward',
                                     borrow=True)
     # 3, Bias parameter for the hidden-layer forward direction RNN
     b_forward_val = np.zeros(num_hidden, dtype=floatX)
     self.b_forward = theano.shared(value=b_forward_val,
                                    name='b_forward',
                                    borrow=True)
     # 3, Bias parameter for the hidden-layer backward direction RNN
     b_backward_val = np.zeros(num_hidden, dtype=floatX)
     self.b_backward = theano.shared(value=b_backward_val,
                                     name='b_backward',
                                     borrow=True)
     # h[0], zero vectors, treated as constants
     self.h0_forward = theano.shared(value=np.zeros(num_hidden,
                                                    dtype=floatX),
                                     name='h0_forward',
                                     borrow=True)
     self.h0_backward = theano.shared(value=np.zeros(num_hidden,
                                                     dtype=floatX),
                                      name='h0_backward',
                                      borrow=True)
     # Stack all the parameters
     self.params = [
         self.W_forward, self.W_backward, self.U_forward, self.U_backward,
         self.b_forward, self.b_backward
     ]
     # Compute the forward and backward representation over time
     self.h_forwards, _ = theano.scan(fn=self._forward_step,
                                      sequences=self.input,
                                      outputs_info=[self.h0_forward],
                                      truncate_gradient=config.bptt)
     self.h_backwards, _ = theano.scan(fn=self._backward_step,
                                       sequences=self.input,
                                       outputs_info=[self.h0_backward],
                                       truncate_gradient=config.bptt,
                                       go_backwards=True)
     # Average compressing
     self.h_forward = T.mean(self.h_forwards, axis=0)
     self.h_backward = T.mean(self.h_backwards, axis=0)
     # Concatenate
     self.output = T.concatenate([self.h_forward, self.h_backward], axis=0)
     # L1, L2 regularization
     self.L1_norm = T.sum(
         T.abs_(self.W_forward) + T.abs_(self.W_backward) +
         T.abs_(self.U_forward) + T.abs_(self.U_backward))
     self.L2_norm = T.sum(self.W_forward ** 2) + T.sum(self.W_backward ** 2) + \
           T.sum(self.U_forward ** 2) + T.sum(self.U_backward ** 2)
     if verbose:
         logger.debug(
             'Finished constructing the structure of BRNN Encoder: ')
         logger.debug('Size of the input dimension: %d' % num_input)
         logger.debug('Size of the hidden dimension: %d' % num_hidden)
         logger.debug('Activation function: %s' % config.activation)
コード例 #30
0
    def __init__(self, configs, verbose=True):
        if verbose:
            pprint('Build Tied weights Bidirectional Recurrent Neural Network')
        self.input = T.matrix(name='input')
        self.truth = T.ivector(name='label')
        self.learn_rate = T.scalar(name='learn rate')
        # Configure Activation function
        self.act = Activation(configs.activation)
        # Build bidirectional RNN with tied weights
        num_input, num_hidden, num_class = configs.num_input, configs.num_hidden, configs.num_class
        # Stack all the variables together into a vector in order to apply the batch updating algorithm
        # Since there are two directions for the RNN, all the weight matrix associated with RNN will be
        # duplicated
        num_params = 2 * (num_input * num_hidden + \
            num_hidden * num_hidden + \
            num_hidden) + \
            2 * num_hidden * num_class + \
            num_class
        self.num_params = num_params
        self.theta = theano.shared(value=np.zeros(num_params, dtype=floatX),
                                   name='theta',
                                   borrow=True)
        # Incremental index
        param_idx = 0
        # 1, Feed-forward matrix for forward direction: W_forward
        self.W_forward = self.theta[param_idx:param_idx +
                                    num_input * num_hidden].reshape(
                                        (num_input, num_hidden))
        self.W_forward.name = 'W_forward_RNN'
        W_forward_init = np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (num_input + num_hidden)),
            high=np.sqrt(6.0 / (num_input + num_hidden)),
            size=(num_input, num_hidden)),
                                    dtype=floatX)
        param_idx += num_input * num_hidden
        # 1, Feed-forward matrix for backward direction: W_backward
        self.W_backward = self.theta[param_idx:param_idx +
                                     num_input * num_hidden].reshape(
                                         (num_input, num_hidden))
        self.W_backward.name = 'W_backward_RNN'
        W_backward_init = np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (num_input + num_hidden)),
            high=np.sqrt(6.0 / (num_input + num_hidden)),
            size=(num_input, num_hidden)),
                                     dtype=floatX)
        param_idx += num_input * num_hidden
        # 2, Recurrent matrix for forward direction: U_forward
        self.U_forward = self.theta[param_idx:param_idx +
                                    num_hidden * num_hidden].reshape(
                                        (num_hidden, num_hidden))
        self.U_forward.name = 'U_forward_RNN'
        U_forward_init = np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (num_hidden + num_hidden)),
            high=np.sqrt(6.0 / (num_hidden + num_hidden)),
            size=(num_hidden, num_hidden)),
                                    dtype=floatX)
        param_idx += num_hidden * num_hidden
        # 2, Recurrent matrix for backward direction: U_backward
        self.U_backward = self.theta[param_idx:param_idx +
                                     num_hidden * num_hidden].reshape(
                                         (num_hidden, num_hidden))
        self.U_backward.name = 'U_backward_RNN'
        U_backward_init = np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (num_hidden + num_hidden)),
            high=np.sqrt(6.0 / (num_hidden + num_hidden)),
            size=(num_hidden, num_hidden)),
                                     dtype=floatX)
        param_idx += num_hidden * num_hidden
        # 3, Bias parameter for the hidden-layer forward direction RNN
        self.b_forward = self.theta[param_idx:param_idx + num_hidden]
        self.b_forward.name = 'b_forward_RNN'
        b_forward_init = np.zeros(num_hidden, dtype=floatX)
        param_idx += num_hidden
        # 3, Bias parameter for the hidden-layer backward direction RNN
        self.b_backward = self.theta[param_idx:param_idx + num_hidden]
        self.b_backward.name = 'b_backward_RNN'
        b_backward_init = np.zeros(num_hidden, dtype=floatX)
        param_idx += num_hidden
        # Weight matrix for softmax function
        self.W_softmax = self.theta[param_idx:param_idx +
                                    2 * num_hidden * num_class].reshape(
                                        (2 * num_hidden, num_class))
        self.W_softmax.name = 'W_softmax'
        W_softmax_init = np.asarray(np.random.uniform(
            low=-np.sqrt(6.0 / (2 * num_hidden + num_class)),
            high=np.sqrt(6.0 / (2 * num_hidden + num_class)),
            size=(2 * num_hidden, num_class)),
                                    dtype=floatX)
        param_idx += 2 * num_hidden * num_class
        # Bias vector for softmax function
        self.b_softmax = self.theta[param_idx:param_idx + num_class]
        self.b_softmax.name = 'b_softmax'
        b_softmax_init = np.zeros(num_class, dtype=floatX)
        param_idx += num_class
        # Set all the default parameters into theta
        self.theta.set_value(
            np.concatenate([
                x.ravel()
                for x in (W_forward_init, W_backward_init, U_forward_init,
                          U_backward_init, b_forward_init, b_backward_init,
                          W_softmax_init, b_softmax_init)
            ]))
        assert param_idx == num_params
        # h[0], zero vector, treated as constants
        self.h_start = theano.shared(value=np.zeros(num_hidden, dtype=floatX),
                                     name='h_start',
                                     borrow=True)
        self.h_end = theano.shared(value=np.zeros(num_hidden, dtype=floatX),
                                   name='h_end',
                                   borrow=True)

        # recurrent function used to compress a sequence of input vectors
        # the first dimension should correspond to time
        def forward_step(x_t, h_tm1):
            h_t = self.act.activate(T.dot(x_t, self.W_forward) + \
                  T.dot(h_tm1, self.U_forward) + self.b_forward)
            return h_t

        def backward_step(x_t, h_tm1):
            h_t = self.act.activate(T.dot(x_t, self.W_backward) + \
                  T.dot(h_tm1, self.U_backward) + self.b_backward)
            return h_t

        # Forward and backward representation over time
        self.forward_h, _ = theano.scan(fn=forward_step,
                                        sequences=self.input,
                                        outputs_info=[self.h_start],
                                        truncate_gradient=configs.bptt)
        self.backward_h, _ = theano.scan(fn=backward_step,
                                         sequences=self.input,
                                         outputs_info=[self.h_end],
                                         truncate_gradient=configs.bptt,
                                         go_backwards=True)
        # Store the final value
        # self.h_start_star = self.forward_h[-1]
        # self.h_end_star = self.backward_h[-1]
        self.h_start_star = T.mean(self.forward_h, axis=0)
        self.h_end_star = T.mean(self.backward_h, axis=0)
        # L1, L2 regularization
        self.L1_norm = T.sum(T.abs_(self.W_forward) + T.abs_(self.W_backward) + \
              T.abs_(self.U_forward) + T.abs_(self.U_backward) + \
              T.abs_(self.W_softmax))
        self.L2_norm = T.sum(self.W_forward ** 2) + T.sum(self.W_backward ** 2) + \
              T.sum(self.U_forward ** 2) + T.sum(self.U_backward ** 2) + \
              T.sum(self.W_softmax ** 2)
        # Build function to show the learned representation for different sentences
        self.show_forward = theano.function(inputs=[self.input],
                                            outputs=self.h_start_star)
        self.show_backward = theano.function(inputs=[self.input],
                                             outputs=self.h_end_star)
        ##################################################################################
        # Correlated BRNN
        ##################################################################################
        # Concatenate these two vectors into one
        self.h = T.concatenate([self.h_start_star, self.h_end_star], axis=0)
        # Dropout parameter
        srng = T.shared_randomstreams.RandomStreams(configs.random_seed)
        mask = srng.binomial(n=1, p=1 - configs.dropout, size=self.h.shape)
        self.h *= T.cast(mask, floatX)
        # Use concatenated vector as input to the Softmax/MLP classifier
        self.output = T.nnet.softmax(
            T.dot(self.h, self.W_softmax) + self.b_softmax)
        self.pred = T.argmax(self.output, axis=1)
        # Build cost function
        self.cost = -T.mean(
            T.log(self.output)[T.arange(self.truth.shape[0]), self.truth])
        if configs.regularization:
            self.cost += configs.lambda1 * self.L2_norm
        # Compute gradient
        self.gradtheta = T.grad(self.cost, self.theta)
        self.gradinput = T.grad(self.cost, self.input)
        # Build objective function
        # Compute the gradients to parameters
        self.compute_cost_and_gradient = theano.function(
            inputs=[self.input, self.truth],
            outputs=[self.cost, self.gradtheta])
        # Compute the gradients to inputs
        self.compute_input_gradient = theano.function(
            inputs=[self.input, self.truth], outputs=self.gradinput)
        # Build prediction function
        self.predict = theano.function(inputs=[self.input], outputs=self.pred)
        if verbose:
            pprint('*' * 50)
            pprint(
                'Finished constructing Bidirectional Recurrent Neural Network (BRNN)'
            )
            pprint('Size of input dimension: %d' % configs.num_input)
            pprint('Size of hidden/recurrent dimension: %d' %
                   configs.num_hidden)
            pprint('Size of output dimension: %d' % configs.num_class)
            pprint('Is regularization applied? %s' %
                   ('yes' if configs.regularization else 'no'))
            if configs.regularization:
                pprint('Coefficient of regularization term: %f' %
                       configs.lambda1)
            pprint('BPTT step: %d' % configs.bptt)
            pprint('Number of free parameters in BRNN: %d' % self.num_params)
            pprint('*' * 50)