예제 #1
0
    def __init__(self, numpy_rng, n_ins=784, n_outs=24, l1_reg = None, l2_reg = None, 
                 hidden_layers_sizes=[500, 500], 
                 hidden_activation='tanh', output_activation='linear', var_floor=0.01, 
                 n_component=1, beta_opt=False, use_rprop=0, rprop_init_update=0.001, 
                 eff_sample_size=0.8, mean_log_det=-100.0):

        logger = logging.getLogger("Multi-stream DNN initialization")

        self.sigmoid_layers = []
        self.params = []
        self.delta_params   = []

        self.final_layers = []
        
        self.n_outs = n_outs

        self.n_layers = len(hidden_layers_sizes)
        
        self.output_activation = output_activation
        self.var_floor = var_floor

        self.use_rprop = use_rprop
        self.rprop_init_update = rprop_init_update
        
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.beta_opt = beta_opt
        self.eff_sample_size = eff_sample_size
        self.mean_log_det = mean_log_det
        
        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.matrix('y') 

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.tanh)  ##T.nnet.sigmoid)  # 
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

        hidden_output_size = hidden_layers_sizes[-1]
        
        self.final_layer = MixtureDensityOutputLayer(rng = numpy_rng,
                                                input = sigmoid_layer.output,
                                                n_in = hidden_output_size,
                                                n_out = self.n_outs, 
                                                n_component = n_component, 
                                                var_floor = self.var_floor)
        self.params.extend(self.final_layer.params)
        self.delta_params.extend(self.final_layer.delta_params)        

        ### Maximum likelihood
        self.finetune_cost = 0.0

        self.errors = 0.0

        epsd = self.eff_sample_size**(-2.0/(n_outs + 2.0))
        beta = (epsd - 1.0) + math.sqrt(epsd*(epsd - 1.0))

        if self.beta_opt:
            assert n_component == 1, "beta optimisation only implemented for single-component MDNs"
            for i in xrange(n_component):  #n_component
                sigma = self.final_layer.sigma[:, i*n_outs:(i+1)*n_outs]
                mu    = self.final_layer.mu[:, i*n_outs:(i+1)*n_outs]
                mix_weight = self.final_layer.mix[:, i]
                
                xEx = -0.5 * beta * T.sum(((self.y - mu)**2) * T.inv(sigma), axis=1)
                exponent = (0.5 * (n_outs + 2.0) * T.log(1 + beta)) + xEx
                point_fit = T.exp(exponent) - beta
                
                log_det_mult = -0.5 * beta * T.sum(T.log(sigma), axis=1)
                
                log_det_mult += (0.5 * beta * self.mean_log_det) # normalise by mean_log_det
                
                beta_obj = (mix_weight**2) * point_fit * T.exp(log_det_mult)
                
                self.finetune_cost += -T.mean(beta_obj)
    
            # lines to compute debugging information for later printing
            #self.errors = T.min(T.min(T.log(sigma), axis=1))
            #self.errors = T.mean(T.sum(T.log(sigma), axis=1)) # computes mean_log_det
            #self.errors = -xEx # (vector quantity) should be about 0.5 * beta * n_outs
            #self.errors = point_fit  # (vector quantity) should be about one
            #self.errors = T.mean(T.exp(exponent)) / T.exp(T.max(exponent)) # fraction of the data used, should be about efficiency
            #self.errors = T.mean(point_fit) # should be about one
            #self.errors = log_det_mult # (vector quantity) about zero, or always less if using Rprop
            #self.errors = beta_obj # (vector quantity) objective function terms
            #self.errors = self.finetune_cost # disable this line below when debugging
        else:     

            all_mix_prob = []
            
            print   n_component
            for i in xrange(n_component):  #n_component
                sigma = self.final_layer.sigma[:, i*n_outs:(i+1)*n_outs]
                mu    = self.final_layer.mu[:, i*n_outs:(i+1)*n_outs]
                mix_weight = self.final_layer.mix[:, i]

                xEx = -0.5 * T.sum(((self.y - mu)**2) * T.inv(sigma), axis=1)
                normaliser = 0.5 * ( n_outs * T.log(2 * numpy.pi) + T.sum(T.log(sigma), axis=1))
                exponent = xEx + T.log(mix_weight) - normaliser
                all_mix_prob.append(exponent)

            max_exponent = T.max(all_mix_prob, axis=0, keepdims=True)
            mod_exponent = T.as_tensor_variable(all_mix_prob) - max_exponent
            
            self.finetune_cost = - T.mean(max_exponent + T.log(T.sum(T.exp(mod_exponent), axis=0)))

            #self.errors = self.finetune_cost
            

        if self.l2_reg is not None:
            for i in xrange(self.n_layers-1):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
            self.finetune_cost += self.l2_reg * T.sqr(self.final_layer.W_mu).sum()
            self.finetune_cost += self.l2_reg * T.sqr(self.final_layer.W_sigma).sum()
            self.finetune_cost += self.l2_reg * T.sqr(self.final_layer.W_mix).sum()

        self.errors = self.finetune_cost # disable this line if debugging beta_opt
예제 #2
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 n_outs=10, l1_reg = None, l2_reg = None, 
                 hidden_layers_sizes=[500, 500], 
                 hidden_activation='tanh', output_activation='linear',
                 projection_insize=100, projection_outsize=10,
                 first_layer_split=True, expand_by_minibatch=False,
                 initial_projection_distrib='gaussian',
		         use_rprop=0, rprop_init_update=0.001):
                        ## beginning at label index 1, 5 blocks of 49 inputs each to be projected to 10 dim.

        logger = logging.getLogger("TP-DNN initialization")
        
        self.projection_insize = projection_insize
        self.projection_outsize = projection_outsize

        self.sigmoid_layers = []
        self.params = []
        self.delta_params   = []
        self.n_layers = len(hidden_layers_sizes)
        
        self.output_activation = output_activation

        self.use_rprop = use_rprop
        self.rprop_init_update = rprop_init_update
            
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.numpy_rng = numpy_rng

        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        if expand_by_minibatch:
            self.x_proj = T.ivector('x_proj') 
        else:
            self.x_proj = T.matrix('x_proj') 
        self.y = T.matrix('y') 

        if expand_by_minibatch:
            z = theano.tensor.zeros((self.x_proj.shape[0], self.projection_insize))
            indexes = self.x_proj 
            one_hot = theano.tensor.set_subtensor(z[theano.tensor.arange(self.x_proj.shape[0]), indexes], 1)
            
            projection_input = one_hot
        else:
            projection_input = self.x_proj



        ## Make projection layer        
        self.projection_layer = TokenProjectionLayer(rng=numpy_rng,
                                        input=projection_input,
                                        projection_insize = self.projection_insize,
                                        projection_outsize = self.projection_outsize,
                                        initial_projection_distrib=initial_projection_distrib)
 
        self.params.extend(self.projection_layer.params)
        self.delta_params.extend(self.projection_layer.delta_params)

        first_layer_input = T.concatenate([self.x, self.projection_layer.output], axis=1)



        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins + self.projection_outsize
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = first_layer_input
            else:
                layer_input = self.sigmoid_layers[-1].output

            if i == 0 and first_layer_split:
                sigmoid_layer = SplitHiddenLayer(rng=numpy_rng,
                                            input=layer_input,
                                            n_in1=n_ins, n_in2=self.projection_outsize,
                                            n_out=hidden_layers_sizes[i],
                                            activation=T.tanh)  ##T.nnet.sigmoid)  #             
            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                            input=layer_input,
                                            n_in=input_size,
                                            n_out=hidden_layers_sizes[i],
                                            activation=T.tanh)  ##T.nnet.sigmoid)  # 
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

        # add final layer
        if self.output_activation == 'linear':
            self.final_layer = LinearLayer(rng = numpy_rng,
                                           input=self.sigmoid_layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
        elif self.output_activation == 'sigmoid':
            self.final_layer = SigmoidLayer(
                 rng = numpy_rng,
                 input=self.sigmoid_layers[-1].output,
                 n_in=hidden_layers_sizes[-1],
                 n_out=n_outs, activation=T.nnet.sigmoid)
        else:
            logger.critical("This output activation function: %s is not supported right now!" %(self.output_activation))
            sys.exit(1)

        self.params.extend(self.final_layer.params)
        self.delta_params.extend(self.final_layer.delta_params)

        ## params for 2 hidden layers, projection, first split layer, will look like this:
        ## [W_proj; W_1a, W_1b, b_1; W_2 b_2; W_o, b_o]

        ### MSE
        self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
        
        self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
예제 #3
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 n_outs=10, l1_reg = None, l2_reg = None, 
                 hidden_layers_sizes=[500, 500], 
                 hidden_activation='tanh', output_activation='linear',
                 use_rprop=0, rprop_init_update=0.001):

        logger = logging.getLogger("DNN initialization")

        self.sigmoid_layers = []
        self.params = []
        self.delta_params   = []
        self.n_layers = len(hidden_layers_sizes)
        
        self.output_activation = output_activation

        self.use_rprop = use_rprop
        self.rprop_init_update = rprop_init_update
            
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.matrix('y') 

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.tanh)  ##T.nnet.sigmoid)  # 
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

        # add final layer
        if self.output_activation == 'linear':
            self.final_layer = LinearLayer(rng = numpy_rng,
                                           input=self.sigmoid_layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
        elif self.output_activation == 'sigmoid':
            self.final_layer = SigmoidLayer(
                 rng = numpy_rng,
                 input=self.sigmoid_layers[-1].output,
                 n_in=hidden_layers_sizes[-1],
                 n_out=n_outs, activation=T.nnet.sigmoid)
        else:
            logger.critical("This output activation function: %s is not supported right now!" %(self.output_activation))
            sys.exit(1)

        self.params.extend(self.final_layer.params)
        self.delta_params.extend(self.final_layer.delta_params)

        ### MSE
        self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
        
        self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))

        ### L1-norm
        if self.l1_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        ### L2-norm
        if self.l2_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()