Example #1
0
    def lenet5(self):

        with tf.name_scope('LeNet5'):

            self.conv1 = tools.conv('conv1',
                                    self.input,
                                    32,
                                    kernel_size=[5, 5],
                                    stride=[1, 1, 1, 1],
                                    is_trainable=self.is_trainable)
            self.pool1 = tools.pool('pool1',
                                    self.conv1,
                                    kernel=[1, 2, 2, 1],
                                    stride=[1, 2, 2, 1],
                                    is_max_pool=True)

            self.conv2 = tools.conv('conv2',
                                    self.pool1,
                                    64,
                                    kernel_size=[5, 5],
                                    stride=[1, 1, 1, 1],
                                    is_trainable=self.is_trainable)
            self.pool2 = tools.pool('pool2',
                                    self.conv2,
                                    kernel=[1, 2, 2, 1],
                                    stride=[1, 2, 2, 1],
                                    is_max_pool=True)

            self.fc1 = tools.fc_layer('fc1', self.pool2, out_nodes=512)
            self.dropout1 = tools.dropout('dropout1', self.fc1, self.keep_prob)

            self.logits = tools.fc_layer('fc2',
                                         self.dropout1,
                                         use_relu=False,
                                         out_nodes=self.n_classes)
Example #2
0
    def gradient(self, weight_vector, training_data, training_targets):
        layer_indexes = range(len(self.layers))[::-1]  # reversed
        self.weights = self.unpack(np.array(weight_vector))
        input_signals, derivatives = self.update(training_data, trace=True)

        out = input_signals[-1]
        cost_derivative = self.cost_function(out,
                                             training_targets,
                                             derivative=True).T
        delta = cost_derivative * derivatives[-1]
        error = self.cost_function(out, training_targets)

        layers = []
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # calculate the weight change
            dropped = dropout(
                input_signals[i],
                # dropout probability
                self.hidden_layer_dropout
                if i > 0 else self.input_layer_dropout)

            layers.append(np.dot(delta, add_bias(dropped)).T.flat)

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(self.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]
        #end weight adjustment loop

        return np.hstack(reversed(layers))
Example #3
0
    def gradient(self, weight_vector, training_data, training_targets ):
        layer_indexes              = range( len(self.layers) )[::-1]    # reversed
        self.weights               = self.unpack( np.array(weight_vector) )
        input_signals, derivatives = self.update( training_data, trace=True )

        out                        = input_signals[-1]
        cost_derivative            = self.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        error                      = self.cost_function(out, training_targets )

        layers = []
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # calculate the weight change
            dropped = dropout(
                        input_signals[i],
                        # dropout probability
                        self.hidden_layer_dropout if i > 0 else self.input_layer_dropout
                    )

            layers.append(np.dot( delta, add_bias(dropped) ).T.flat)

            if i!= 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( self.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]
        #end weight adjustment loop

        return np.hstack( reversed(layers) )
Example #4
0
def parallel_backpropagation_one_process(network, trainingset, block_number, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()):
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )

    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    momentum                   = collections.defaultdict( int )
    epoch                      = 0

    input_signals, derivatives = network.update( training_data, trace=True )

    out                        = input_signals[-1]
    error                      = network.cost_function(out, training_targets )
    cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]

    while epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                        input_signals[i],
                        # dropout probability
                        network.hidden_layer_dropout if i > 0 else network.input_layer_dropout
                    )

            # calculate the weight change
            dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        error                      = network.cost_function(out, training_targets )
        cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]

    result = []
    result.append(block_number)
    result.append(out)
    result.append(error)
    result.append(cost_derivative)
    result.append(delta)
    return result
Example #5
0
def VGG16N(x, n_classes, keep_prob, is_pretrain=True):
    # fix a layer by is_pretrain
    with tf.name_scope('VGG16'):
        x = tools.conv('conv1_1', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv1_2', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool1'):
            x = tools.pool('pool1', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tools.conv('conv2_1', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv2_2', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool2'):
            x = tools.pool('pool2', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tools.conv('conv3_1', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv3_2', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv3_3', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool3'):
            x = tools.pool('pool3', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tools.conv('conv4_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv4_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv4_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool4'):
            x = tools.pool('pool4', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tools.conv('conv5_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv5_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        x = tools.conv('conv5_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain)
        with tf.name_scope('pool5'):
            x = tools.pool('pool5', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True)

        x = tools.FC_layer('fc6', x, out_nodes=4096)
        #        with tf.name_scope('batch_norm1'):
        #            x = tools.batch_norm(x)
        x = tools.dropout(x, keep_prob)
        x = tools.FC_layer('fc7', x, out_nodes=4096)
        #        with tf.name_scope('batch_norm2'):
        #            x = tools.batch_norm(x)
        x = tools.dropout(x, keep_prob)
        x = tools.FC_layer('fc8', x, out_nodes=n_classes)

        return x


        # %%
Example #6
0
    def lenet_300_100(self):

        with tf.name_scope('LeNet_300_100'):

            self.fc1 = tools.fc_layer('fc1', self.input, out_nodes=300)
            self.fc2 = tools.fc_layer('fc2', self.fc1, out_nodes=100)
            self.dropout1 = tools.dropout('dropout1', self.fc1, self.keep_prob)
            self.logits = tools.fc_layer('fc3',
                                         self.dropout1,
                                         use_relu=False,
                                         out_nodes=self.n_classes)
Example #7
0
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
                             
     layer_indexes              = range( len(self.layers) )[::-1]    # reversed
     momentum                   = collections.defaultdict( int )
     MSE                        = ( ) # inf
     epoch                      = 0
     
     input_signals, derivatives = self.update( training_data, trace=True )
     
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
             
             # perform dropout
             dropped = dropout( 
                         input_signals[i], 
                         # dropout probability
                         self.hidden_layer_dropout if i else self.input_layer_dropout
                     )
             
             # calculate the weight change
             dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]
             
             if i!= 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             # Store the momentum
             momentum[i] = dW
                                 
             # Update the weights
             self.weights[ i ] += dW
         #end weight adjustment loop
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
                             
     layer_indexes              = range( len(self.layers) )[::-1]    # reversed
     momentum                   = collections.defaultdict( int )
     MSE                        = ( ) # inf
     epoch                      = 0
     
     input_signals, derivatives = self.update( training_data, trace=True )
     
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
             
             # perform dropout
             dropped = dropout( 
                         input_signals[i], 
                         # dropout probability
                         self.hidden_layer_dropout if i else self.input_layer_dropout
                     )
             
             # calculate the weight change
             dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]
             
             if i!= 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             # Store the momentum
             momentum[i] = dW
                                 
             # Update the weights
             self.weights[ i ] += dW
         #end weight adjustment loop
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
def backpropagation(network, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"


    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )

    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    momentum                   = collections.defaultdict( int )
    epoch                      = 0

    input_signals, derivatives = network.update( training_data, trace=True )

    out                        = input_signals[-1]
    error                      = network.cost_function(out, training_targets )
    cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                        input_signals[i],
                        # dropout probability
                        network.hidden_layer_dropout if i > 0 else network.input_layer_dropout
                    )

            # calculate the weight change
            dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        error                      = network.cost_function(out, training_targets )
        cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]


        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Trained for %d epochs." % epoch

    if network.save_trained_network:
        network.save_to_file()
    def backpropagation(self,
                        trainingset,
                        ERROR_LIMIT=1e-3,
                        learning_rate=0.3,
                        momentum_factor=0.9):

        assert trainingset[0].features.shape[0] == self.n_inputs, \
                "ERROR: input size varies from the defined input setting"

        assert trainingset[0].targets.shape[0]  == self.n_outputs, \
                "ERROR: output size varies from the defined output setting"

        training_data = np.array(
            [instance.features for instance in trainingset])
        training_targets = np.array(
            [instance.targets for instance in trainingset])

        MSE = ()  # inf
        neterror = None
        momentum = collections.defaultdict(int)

        batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[
            0]

        epoch = 0
        while MSE > ERROR_LIMIT:
            epoch += 1

            for start in xrange(0, len(training_data), batch_size):
                batch = training_data[start:start + batch_size]
                input_layers = self.update(training_data, trace=True)
                out = input_layers[-1]

                error = out - training_targets
                delta = error
                MSE = np.mean(np.power(error, 2))

                loop = itertools.izip(
                    xrange(len(self.weights) - 1, -1, -1),
                    reversed(self.weights),
                    reversed(input_layers[:-1]),
                )

                for i, weight_layer, input_signals in loop:
                    # Loop over the weight layers in reversed order to calculate the deltas

                    if i == 0:
                        dropped = dropout(
                            add_bias(input_signals).T,
                            self.input_layer_dropout)
                    else:
                        dropped = dropout(
                            add_bias(input_signals).T,
                            self.hidden_layer_dropout)

                    # Calculate weight change
                    dW = learning_rate * np.dot(
                        dropped, delta) + momentum_factor * momentum[i]

                    if i != 0:
                        """Do not calculate the delta unnecessarily."""
                        # Skipping the bias weight during calculation.
                        weight_delta = np.dot(delta, weight_layer[1:, :].T)

                        # Calculate the delta for the subsequent layer
                        delta = np.multiply(
                            weight_delta,
                            self.activation_functions[i - 1](input_signals,
                                                             derivative=True))

                    # Store the momentum
                    momentum[i] = dW

                    # Update the weights
                    self.weights[i] -= dW

            if epoch % 1000 == 0:
                # Show the current training status
                print "* current network error (MSE):", MSE

        print "* Converged to error bound (%.4g) with MSE = %.4g." % (
            ERROR_LIMIT, MSE)
        print "* Trained for %d epochs." % epoch
Example #11
0
def backpropagation(network,
                    trainingset,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=()):

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])

    layer_indexes = range(len(network.layers))[::-1]  # reversed
    momentum = collections.defaultdict(int)
    epoch = 0

    input_signals, derivatives = network.update(training_data, trace=True)

    out = input_signals[-1]
    error = network.cost_function(out, training_targets)
    cost_derivative = network.cost_function(out,
                                            training_targets,
                                            derivative=True).T
    delta = cost_derivative * derivatives[-1]

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                input_signals[i],
                # dropout probability
                network.hidden_layer_dropout
                if i > 0 else network.input_layer_dropout)

            # calculate the weight change
            dW = -learning_rate * np.dot(
                delta, add_bias(dropped)).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[i] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        error = network.cost_function(out, training_targets)
        cost_derivative = network.cost_function(out,
                                                training_targets,
                                                derivative=True).T
        delta = cost_derivative * derivatives[-1]

        if epoch % 1000 == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Trained for %d epochs." % epoch

    if network.save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, save_trained_network = False  ):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features  for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )
          
    momentum                   = collections.defaultdict( int )
    
    input_signals, derivatives = network.update( training_data, trace=True )
    out                        = input_signals[-1]
    cost_derivative            = cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]
    error                      = cost_function(network.update( test_data ), test_targets )
    
    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    epoch                      = 0
    n_samples                  = float(training_data.shape[0])
    
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1
        
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas
            
            # perform dropout
            dropped = dropout( 
                        input_signals[i], 
                        # dropout probability
                        hidden_layer_dropout if i > 0 else input_layer_dropout
                    )
            
            # calculate the weight change
            dW = -learning_rate * (np.dot( delta, add_bias(input_signals[i]) )/n_samples).T + momentum_factor * momentum[i]
            
            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )
    
                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]
            
            # Store the momentum
            momentum[i] = dW
                                
            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop
        
        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        cost_derivative            = cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        error                      = cost_function(network.update( test_data ), test_targets )
        
        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
    print "[training]   Trained for %d epochs." % epoch
    
    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_network_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function = None, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), batch_size = 0, input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, print_rate = 1000, save_trained_network = False  ):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    # Whether to use another function for printing the dataset error than the cost function. 
    # This is useful if you train the network with the MSE cost function, but are going to 
    # classify rather than regress on your data.
    calculate_print_error      = evaluation_function if evaluation_function != None else cost_function
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )
    
    batch_size                 = batch_size if batch_size != 0 else training_data.shape[0] 
    batch_training_data        = np.array_split(training_data, math.ceil(1.0 * training_data.shape[0] / batch_size))
    batch_training_targets     = np.array_split(training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size))
    batch_indices              = range(len(batch_training_data))       # fast reference to batches
    
    error                      = calculate_print_error(network.update( test_data ), test_targets )
    reversed_layer_indexes     = range( len(network.layers) )[::-1]
    momentum                   = collections.defaultdict( int )
    
    epoch                      = 0
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1
        
        random.shuffle(batch_indices) # Shuffle the order in which the batches are processed between the iterations
        
        for batch_index in batch_indices:
            batch_data                 = batch_training_data[    batch_index ]
            batch_targets              = batch_training_targets[ batch_index ]
            batch_size                 = float( batch_data.shape[0] )
            
            input_signals, derivatives = network.update( batch_data, trace=True )
            out                        = input_signals[-1]
            cost_derivative            = cost_function( out, batch_targets, derivative=True ).T
            delta                      = cost_derivative * derivatives[-1]
            
            for i in reversed_layer_indexes:
                # Loop over the weight layers in reversed order to calculate the deltas
            
                # perform dropout
                dropped = dropout( 
                            input_signals[i], 
                            # dropout probability
                            hidden_layer_dropout if i > 0 else input_layer_dropout
                        )
            
                # calculate the weight change
                dW = -learning_rate * (np.dot( delta, add_bias(dropped) )/batch_size).T + momentum_factor * momentum[i]
            
                if i != 0:
                    """Do not calculate the delta unnecessarily."""
                    # Skip the bias weight
                    weight_delta = np.dot( network.weights[ i ][1:,:], delta )
    
                    # Calculate the delta for the subsequent layer
                    delta = weight_delta * derivatives[i-1]
            
                # Store the momentum
                momentum[i] = dW
                                
                # Update the weights
                network.weights[ i ] += dW
            #end weight adjustment loop
        
        error = calculate_print_error(network.update( test_data ), test_targets )
        
        if epoch%print_rate==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
    print "[training]   Trained for %d epochs." % epoch
    
    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_network_to_file()
Example #14
0
    def forward(self,
                emb_in,
                length,
                context,
                state_init,
                batch_size=1,
                mask=None,
                cmask=None):
        '''
			Build the computational graph which computes the hidden states.

			:type emb_in: theano variable
			:param emb_in: the input word embeddings

			:type length: theano variable
			:param length: the length of the input

			:type context: theano variable
			:param context: the context vectors

			:type state_init: theano variable
			:param state_init: the inital states 

			:type batch_size: int
			:param batch_size: the batch size

			:type mask: theano variable
			:param mask: indicate the length of each sequence in one batch

			:type cmask: theano variable
			:param cmask: indicate the length of each context sequence in one batch
		'''

        # calculate the input vector for inputter, updater and reseter
        att_c = tools.dot3d(context,
                            self.att_context)  # size: (length, batch_size,dim)
        state_in = (tensor.dot(emb_in, self.input_emb) +
                    self.input_emb_offset).reshape(
                        (length, batch_size, self.dim))
        gate_in = tensor.dot(emb_in, self.gate_emb).reshape(
            (length, batch_size, self.dim))
        reset_in = tensor.dot(emb_in, self.reset_emb).reshape(
            (length, batch_size, self.dim))

        if mask:
            scan_inp = [state_in, gate_in, reset_in, mask]
            scan_func = lambda x, g, r, m, h, c, attc, cm: self.forward_step(
                h, x, g, r, c, attc, m, cm)
        else:
            scan_inp = [state_in, gate_in, reset_in]
            scan_func = lambda x, g, r, h, c, attc: self.forward_step(
                h, x, g, r, c, attc)

        if self.verbose:
            outputs_info = [
                state_init, None, None, None, None, None, None, None, None,
                None, None, None, None
            ]
        else:
            outputs_info = [state_init, None, None]

        # calculate hidden states
        hiddens, updates = theano.scan(scan_func,
                                       sequences=scan_inp,
                                       outputs_info=outputs_info,
                                       non_sequences=[context, att_c, cmask],
                                       n_steps=length)
        c = hiddens[1]
        attentions = hiddens[2]

        # Add the initial state and discard the last hidden state
        state_before = tensor.concatenate((state_init.reshape(
            (1, state_init.shape[0], state_init.shape[1])), hiddens[0][:-1]))

        state_in_prev = tensor.dot(emb_in, self.readout_emb).reshape(
            (length, batch_size, self.dim))

        # calculate the energy for each word
        readout_c = tensor.dot(c, self.readout_context)
        readout_h = tensor.dot(state_before, self.readout_hidden)
        readout_h += self.readout_offset
        state_in_prev = tools.shift_one(state_in_prev)
        readout = readout_c + readout_h + state_in_prev
        readout = readout.reshape(
            (readout.shape[0] * readout.shape[1], readout.shape[2]))
        maxout = tools.maxout(readout, self.maxout)
        if self.dropout > 0.:
            logging.info('dropout ratio: ' + str(self.dropout))
            maxout = tools.dropout(maxout, self.dropout)

        outenergy = tensor.dot(maxout, self.probs_emb)
        outenergy_1 = outenergy
        outenergy = tensor.dot(outenergy, self.probs)
        outenergy_2 = outenergy

        outenergy += self.probs_offset
        if self.verbose:
            return hiddens, outenergy, state_in, gate_in, reset_in, state_in_prev, readout, maxout, outenergy_1, outenergy_2
        else:
            return hiddens, outenergy, attentions
def backpropagation(network,
                    trainingset,
                    testset,
                    cost_function,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=(),
                    input_layer_dropout=0.0,
                    hidden_layer_dropout=0.0,
                    save_trained_network=False):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    momentum = collections.defaultdict(int)

    input_signals, derivatives = network.update(training_data, trace=True)
    out = input_signals[-1]
    cost_derivative = cost_function(out, training_targets, derivative=True).T
    delta = cost_derivative * derivatives[-1]
    error = cost_function(network.update(test_data), test_targets)

    layer_indexes = range(len(network.layers))[::-1]  # reversed
    epoch = 0
    n_samples = float(training_data.shape[0])

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                input_signals[i],
                # dropout probability
                hidden_layer_dropout if i > 0 else input_layer_dropout)

            # calculate the weight change
            dW = -learning_rate * (np.dot(delta, add_bias(input_signals[i])) /
                                   n_samples).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[i] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        cost_derivative = cost_function(out, training_targets,
                                        derivative=True).T
        delta = cost_derivative * derivatives[-1]
        error = cost_function(network.update(test_data), test_targets)

        if epoch % 1000 == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
def backpropagation(network,
                    trainingset,
                    testset,
                    cost_function,
                    evaluation_function=None,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=(),
                    batch_size=0,
                    input_layer_dropout=0.0,
                    hidden_layer_dropout=0.0,
                    print_rate=1000,
                    save_trained_network=False):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    # Whether to use another function for printing the dataset error than the cost function.
    # This is useful if you train the network with the MSE cost function, but are going to
    # classify rather than regress on your data.
    calculate_print_error = evaluation_function if evaluation_function != None else cost_function

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    batch_size = batch_size if batch_size != 0 else training_data.shape[0]
    batch_training_data = np.array_split(
        training_data, math.ceil(1.0 * training_data.shape[0] / batch_size))
    batch_training_targets = np.array_split(
        training_targets,
        math.ceil(1.0 * training_targets.shape[0] / batch_size))
    batch_indices = range(
        len(batch_training_data))  # fast reference to batches

    error = calculate_print_error(network.update(test_data), test_targets)
    reversed_layer_indexes = range(len(network.layers))[::-1]
    momentum = collections.defaultdict(int)

    epoch = 0
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        random.shuffle(
            batch_indices
        )  # Shuffle the order in which the batches are processed between the iterations

        for batch_index in batch_indices:
            batch_data = batch_training_data[batch_index]
            batch_targets = batch_training_targets[batch_index]
            batch_size = float(batch_data.shape[0])

            input_signals, derivatives = network.update(batch_data, trace=True)
            out = input_signals[-1]
            cost_derivative = cost_function(out,
                                            batch_targets,
                                            derivative=True).T
            delta = cost_derivative * derivatives[-1]

            for i in reversed_layer_indexes:
                # Loop over the weight layers in reversed order to calculate the deltas

                # perform dropout
                dropped = dropout(
                    input_signals[i],
                    # dropout probability
                    hidden_layer_dropout if i > 0 else input_layer_dropout)

                # calculate the weight change
                dW = -learning_rate * (np.dot(delta, add_bias(
                    dropped)) / batch_size).T + momentum_factor * momentum[i]

                if i != 0:
                    """Do not calculate the delta unnecessarily."""
                    # Skip the bias weight
                    weight_delta = np.dot(network.weights[i][1:, :], delta)

                    # Calculate the delta for the subsequent layer
                    delta = weight_delta * derivatives[i - 1]

                # Store the momentum
                momentum[i] = dW

                # Update the weights
                network.weights[i] += dW
            #end weight adjustment loop

        error = calculate_print_error(network.update(test_data), test_targets)

        if epoch % print_rate == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.3, momentum_factor = 0.9  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.n_outputs, \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data    = np.array( [instance.features for instance in trainingset ] )
     training_targets = np.array( [instance.targets for instance in trainingset ] )
     
     MSE              = ( ) # inf
     neterror         = None
     momentum         = collections.defaultdict( int )
     
     batch_size       = self.batch_size if self.batch_size != 0 else training_data.shape[0]
     
     epoch = 0
     while MSE > ERROR_LIMIT:
         epoch += 1
         
         for start in xrange( 0, len(training_data), batch_size ):
             batch             = training_data[start : start+batch_size]
             input_layers      = self.update( training_data, trace=True )
             out               = input_layers[-1]
                           
             error             = out - training_targets
             delta             = error
             MSE               = np.mean( np.power(error,2) )
         
         
             loop  = itertools.izip(
                             xrange(len(self.weights)-1, -1, -1),
                             reversed(self.weights),
                             reversed(input_layers[:-1]),
                         )
         
             for i, weight_layer, input_signals in loop:
                 # Loop over the weight layers in reversed order to calculate the deltas
             
                 if i == 0:
                     dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout  )
                 else:
                     dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout )
             
                 # Calculate weight change
                 dW = learning_rate * np.dot( dropped, delta ) + momentum_factor * momentum[i]
             
                 if i!= 0:
                     """Do not calculate the delta unnecessarily."""
                     # Skipping the bias weight during calculation.
                     weight_delta = np.dot( delta, weight_layer[1:,:].T )
         
                     # Calculate the delta for the subsequent layer
                     delta = np.multiply(  weight_delta, self.activation_functions[i-1]( input_signals, derivative=True) )
             
                 # Store the momentum
                 momentum[i] = dW
             
                 # Update the weights
                 self.weights[ i ] -= dW
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch