def scipyoptimize(self, trainingset, method = "Newton-CG", ERROR_LIMIT = 1e-6, max_iterations = ()  ):
     from scipy.optimize import minimize
     
     training_data       = np.array( [instance.features for instance in trainingset ] )
     training_targets    = np.array( [instance.targets  for instance in trainingset ] )
     options             = {}
     
     if max_iterations < ():
         options["maxiter"] = max_iterations
         
     results = minimize( 
         self.error, self.get_weights(), 
         args = (training_data, training_targets), 
         method = method,
         jac = self.gradient,
         options = options 
     )
     
     optimized_weights = results.x
     
     self.weights = self.unpack( np.array(optimized_weights) )
     
     
     if not results.success:
         print "* ERROR: did not converge"
     
     print "* Error = %.3g." % results.fun
     print "* Trained for %d epochs." % results.nfev
     
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
 def scipyoptimize(self, trainingset, method = "Newton-CG", ERROR_LIMIT = 1e-6, max_iterations = ()  ):
     from scipy.optimize import minimize
     
     training_data        = np.array( [instance.features for instance in trainingset ] )
     training_targets     = np.array( [instance.targets  for instance in trainingset ] )
     minimization_options = {}
     
     if max_iterations < ():
         minimization_options["maxiter"] = max_iterations
         
     results = minimize( 
         self.error,                                     # The function we are minimizing
         self.get_weights(),                             # The vector (parameters) we are minimizing
         args    = (training_data, training_targets),    # Additional arguments to the error and gradient function
         method  = method,                               # The minimization strategy specified by the user
         jac     = self.gradient,                        # The gradient calculating function
         tol     = ERROR_LIMIT,                          # The error limit
         options = minimization_options,                 # Additional options
     )
     
     self.weights = self.unpack( results.x )
     
     
     if not results.success:
         print "[training] WARNING:", results.message
         print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun )
     else:
         print "[training] Finished:"
         print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun )
         
         if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
             self.save_to_file()
def scipyoptimize(network, trainingset, method = "Newton-CG", ERROR_LIMIT = 1e-6, max_iterations = ()  ):
    from scipy.optimize import minimize

    training_data        = np.array( [instance.features for instance in trainingset ] )
    training_targets     = np.array( [instance.targets  for instance in trainingset ] )
    minimization_options = {}

    if max_iterations < ():
        minimization_options["maxiter"] = max_iterations

    results = minimize(
        network.error,                                     # The function we are minimizing
        network.get_weights(),                             # The vector (parameters) we are minimizing
        args    = (training_data, training_targets),    # Additional arguments to the error and gradient function
        method  = method,                               # The minimization strategy specified by the user
        jac     = network.gradient,                        # The gradient calculating function
        tol     = ERROR_LIMIT,                          # The error limit
        options = minimization_options,                 # Additional options
    )

    network.weights = network.unpack( results.x )


    if not results.success:
        print "[training] WARNING:", results.message
        print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun )
    else:
        print "[training] Finished:"
        print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun )

        if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
            network.save_to_file()
Beispiel #4
0
    def check_gradient(self, trainingset, cost_function, epsilon=1e-4):
        assert trainingset[0].features.shape[0] == self.n_inputs, \
            "ERROR: input size varies from the configuration. Configured as %d, instance had %d" % (self.n_inputs, trainingset[0].features.shape[0])
        assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
            "ERROR: output size varies from the configuration. Configured as %d, instance had %d" % (self.layers[-1][0], trainingset[0].targets.shape[0])

        training_data = np.array([
            instance.features for instance in trainingset
        ][:100])  # perform the test with at most 100 instances
        training_targets = np.array(
            [instance.targets for instance in trainingset][:100])

        # assign the weight_vector as the network topology
        initial_weights = np.array(self.get_weights())
        numeric_gradient = np.zeros(initial_weights.shape)
        perturbed = np.zeros(initial_weights.shape)
        n_samples = float(training_data.shape[0])

        print "[gradient check] Running gradient check..."

        for i in xrange(self.n_weights):
            perturbed[i] = epsilon
            right_side = self.error(initial_weights + perturbed, training_data,
                                    training_targets, cost_function)
            left_side = self.error(initial_weights - perturbed, training_data,
                                   training_targets, cost_function)
            numeric_gradient[i] = (right_side - left_side) / (2 * epsilon)
            perturbed[i] = 0
        #end loop

        # Reset the weights
        self.set_weights(initial_weights)

        # Calculate the analytic gradient
        analytic_gradient = self.gradient(self.get_weights(), training_data,
                                          training_targets, cost_function)

        # Compare the numeric and the analytic gradient
        ratio = np.linalg.norm(analytic_gradient -
                               numeric_gradient) / np.linalg.norm(
                                   analytic_gradient + numeric_gradient)

        if not ratio < 1e-6:
            print "[gradient check] WARNING: The numeric gradient check failed! Analytical gradient differed by %g from the numerical." % ratio
            if not confirm("[gradient check] Do you want to continue?"):
                print "[gradient check] Exiting."
                import sys
                sys.exit(2)
        else:
            print "[gradient check] Passed!"

        return ratio
def scipyoptimize(network,
                  trainingset,
                  testset,
                  cost_function,
                  method="Newton-CG",
                  save_trained_network=False):
    from scipy.optimize import minimize

    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    error_function_wrapper = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.error(
        weights, test_data, test_targets, cost_function)
    gradient_function_wrapper = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.gradient(
        weights, training_data, training_targets, cost_function)

    results = minimize(
        error_function_wrapper,  # The function we are minimizing
        network.get_weights(),  # The vector (parameters) we are minimizing
        method=method,  # The minimization strategy specified by the user
        jac=gradient_function_wrapper,  # The gradient calculating function
        args=(training_data, training_targets, test_data, test_targets,
              cost_function
              ),  # Additional arguments to the error and gradient function
    )

    network.set_weights(results.x)

    if not results.success:
        print "[training] WARNING:", results.message
        print "[training]   Terminated with error %.4g." % results.fun
    else:
        print "[training] Finished:"
        print "[training]   Completed with error %.4g." % results.fun
        print "[training]   Measured quality: %.4g" % network.measure_quality(
            training_data, training_targets, cost_function)

        if save_trained_network and confirm(
                promt="Do you wish to store the trained network?"):
            network.save_network_to_file()
Beispiel #6
0
    def check_gradient(self, trainingset, cost_function, epsilon=1e-4):
        check_network_structure(
            self, cost_function
        )  # check for special case topology requirements, such as softmax

        training_data, training_targets = verify_dataset_shape_and_modify(
            self, trainingset)

        # assign the weight_vector as the network topology
        initial_weights = np.array(self.get_weights())
        numeric_gradient = np.zeros(initial_weights.shape)
        perturbed = np.zeros(initial_weights.shape)
        n_samples = float(training_data.shape[0])

        print "[gradient check] Running gradient check..."

        for i in xrange(self.n_weights):
            perturbed[i] = epsilon
            right_side = self.error(initial_weights + perturbed, training_data,
                                    training_targets, cost_function)
            left_side = self.error(initial_weights - perturbed, training_data,
                                   training_targets, cost_function)
            numeric_gradient[i] = (right_side - left_side) / (2 * epsilon)
            perturbed[i] = 0
            print i, "/", self.n_weights
        #end loop

        # Reset the weights
        self.set_weights(initial_weights)

        # Calculate the analytic gradient
        analytic_gradient = self.gradient(self.get_weights(), training_data,
                                          training_targets, cost_function)

        # Compare the numeric and the analytic gradient
        ratio = np.linalg.norm(analytic_gradient -
                               numeric_gradient) / np.linalg.norm(
                                   analytic_gradient + numeric_gradient)

        if not ratio < 1e-6:
            print "[gradient check] WARNING: The numeric gradient check failed! Analytical gradient differed by %g from the numerical." % ratio
            if not confirm("[gradient check] Do you want to continue?"):
                print "[gradient check] Exiting."
                import sys
                sys.exit(2)
        else:
            print "[gradient check] Passed!"

        return ratio
 def check_gradient(self, trainingset, cost_function, epsilon = 1e-4 ):
     assert trainingset[0].features.shape[0] == self.n_inputs, \
         "ERROR: input size varies from the configuration. Configured as %d, instance had %d" % (self.n_inputs, trainingset[0].features.shape[0])
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
         "ERROR: output size varies from the configuration. Configured as %d, instance had %d" % (self.layers[-1][0], trainingset[0].targets.shape[0])
     
     training_data           = np.array( [instance.features for instance in trainingset ][:100] ) # perform the test with at most 100 instances
     training_targets        = np.array( [instance.targets  for instance in trainingset ][:100] )
     
     # assign the weight_vector as the network topology
     initial_weights         = np.array(self.get_weights())
     numeric_gradient        = np.zeros( initial_weights.shape )
     perturbed               = np.zeros( initial_weights.shape )
     n_samples               = float(training_data.shape[0])
     
     print "[gradient check] Running gradient check..."
     
     for i in xrange( self.n_weights ):
         perturbed[i]        = epsilon
         right_side          = self.error( initial_weights + perturbed, training_data, training_targets, cost_function )
         left_side           = self.error( initial_weights - perturbed, training_data, training_targets, cost_function )
         numeric_gradient[i] = (right_side - left_side) / (2 * epsilon)
         perturbed[i]        = 0
     #end loop
     
     # Reset the weights
     self.set_weights( initial_weights )
     
     # Calculate the analytic gradient
     analytic_gradient       = self.gradient( self.get_weights(), training_data, training_targets, cost_function )
     
     # Compare the numeric and the analytic gradient
     ratio                   = np.linalg.norm(analytic_gradient - numeric_gradient) / np.linalg.norm(analytic_gradient + numeric_gradient)
     
     if not ratio < 1e-6:
         print "[gradient check] WARNING: The numeric gradient check failed! Analytical gradient differed by %g from the numerical." % ratio
         if not confirm("[gradient check] Do you want to continue?"):
             print "[gradient check] Exiting."
             import sys
             sys.exit(2)
     else:
         print "[gradient check] Passed!"
     
     return ratio
def scipyoptimize(network, trainingset, testset, cost_function, method = "Newton-CG", save_trained_network = False  ):
    from scipy.optimize import minimize
    
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features  for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )
    
    error_function_wrapper     = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.error( weights, test_data, test_targets, cost_function )
    gradient_function_wrapper  = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.gradient( weights, training_data, training_targets, cost_function )
        
    results = minimize( 
        error_function_wrapper,                         # The function we are minimizing
        network.get_weights(),                          # The vector (parameters) we are minimizing
        method  = method,                               # The minimization strategy specified by the user
        jac     = gradient_function_wrapper,            # The gradient calculating function
        args    = (training_data, training_targets, test_data, test_targets, cost_function),  # Additional arguments to the error and gradient function
    )
    
    network.set_weights( results.x )
    
    
    if not results.success:
        print "[training] WARNING:", results.message
        print "[training]   Terminated with error %.4g." % results.fun
    else:
        print "[training] Finished:"
        print "[training]   Completed with error %.4g." % results.fun
        print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
        
        if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
            network.save_network_to_file()
Beispiel #9
0
	print "1.Implant Trojan."
	print "2.Implant File."
	print "3.Get flag."
	print "4.Store flag."
	print "5.Get Score."
	print "6.Singe Rce."
	print "7.Multi Rce."
	print "8.Confirm Config."
	print "9.Monitor Trojan."
	print "*******************************************************"
	choose = int(raw_input("Please Input:"))
	if choose == 1:
		tools.trojan_implant()
		tools.living_check()
	if choose == 2:
		tools.file_implant()
	if choose == 3:
		tools.catch_flag()
	if choose == 4:
		tools.store_flag()
	if choose == 5:
		tools.upload_flag()
	if choose == 6:
		tools.remote_command()
	if choose == 7:
		tools.remote_command_multi()
	if choose == 8:
		tools.confirm()
	if choose == 9:
		tools.living_check()
	 
def scaled_conjugate_gradient(network, trainingset, ERROR_LIMIT = 1e-6, max_iterations = () ):
    # Implemented according to the paper by Martin F. Moller
    # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391

    assert network.input_layer_dropout == 0 and network.hidden_layer_dropout == 0, \
            "ERROR: dropout should not be used with scaled conjugated gradients training"

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"


    training_data       = np.array( [instance.features for instance in trainingset ] )
    training_targets    = np.array( [instance.targets  for instance in trainingset ] )


    ## Variables
    sigma0              = 1.e-6
    lamb                = 1.e-6
    lamb_               = 0

    vector              = network.get_weights() # The (weight) vector we will use SCG to optimalize
    N                   = len(vector)
    grad_new            = -network.gradient( vector, training_data, training_targets )
    r_new               = grad_new
    # end

    success             = True
    k                   = 0
    while k < max_iterations:
        k               += 1
        r               = np.copy( r_new     )
        grad            = np.copy( grad_new  )
        mu              = np.dot(  grad,grad )

        if success:
            success     = False
            sigma       = sigma0 / math.sqrt(mu)
            s           = (network.gradient(vector+sigma*grad, training_data, training_targets)-network.gradient(vector,training_data, training_targets))/sigma
            delta       = np.dot( grad.T, s )
        #end

        # scale s
        zetta           = lamb-lamb_
        s               += zetta*grad
        delta           += zetta*mu

        if delta < 0:
            s           += (lamb - 2*delta/mu)*grad
            lamb_       = 2*(lamb - delta/mu)
            delta       -= lamb*mu
            delta       *= -1
            lamb        = lamb_
        #end

        phi             = np.dot( grad.T,r )
        alpha           = phi/delta

        vector_new      = vector+alpha*grad
        f_old, f_new    = network.error(vector,training_data, training_targets), network.error(vector_new,training_data, training_targets)

        comparison      = 2 * delta * (f_old - f_new)/np.power( phi, 2 )

        if comparison >= 0:
            if f_new < ERROR_LIMIT:
                break # done!

            vector      = vector_new
            f_old       = f_new
            r_new       = -network.gradient( vector, training_data, training_targets )

            success     = True
            lamb_       = 0

            if k % N == 0:
                grad_new = r_new
            else:
                beta    = (np.dot( r_new, r_new ) - np.dot( r_new, r ))/phi
                grad_new = r_new + beta * grad

            if comparison > 0.75:
                lamb    = 0.5 * lamb
        else:
            lamb_       = lamb
        # end

        if comparison < 0.25:
            lamb        = 4 * lamb

        if k%1000==0:
            print "[training] Current error:", f_new, "\tEpoch:", k
    #end

    network.weights = network.unpack( np.array(vector_new) )

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, f_new )
    print "[training]   Trained for %d epochs." % k


    if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_to_file()
def resilient_backpropagation(network, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ):
    # Implemented according to iRprop+
    # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf
    assert network.input_layer_dropout == 0 and network.hidden_layer_dropout == 0, \
            "ERROR: dropout should not be used with resilient backpropagation"

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"

    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )

    # Data structure to store the previous derivative
    previous_dEdW                  = [ 1 ] * len( network.weights )

    # Storing the current / previous weight step size
    weight_step                = [ np.full( weight_layer.shape, start_step ) for weight_layer in network.weights ]

    # Storing the current / previous weight update
    dW                         = [  np.ones(shape=weight_layer.shape) for weight_layer in network.weights ]


    input_signals, derivatives = network.update( training_data, trace=True )
    out                        = input_signals[-1]
    cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]
    error                      = network.cost_function(out, training_targets )

    layer_indexes              = range( len(network.layers) )[::-1] # reversed
    prev_error                   = ( )                             # inf
    epoch                      = 0

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch       += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # Calculate the delta with respect to the weights
            dEdW = np.dot( delta, add_bias(input_signals[i]) ).T

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]


            # Calculate sign changes and note where they have changed
            diffs            = np.multiply( dEdW, previous_dEdW[i] )
            pos_indexes      = np.where( diffs > 0 )
            neg_indexes      = np.where( diffs < 0 )
            zero_indexes     = np.where( diffs == 0 )


            # positive
            if np.any(pos_indexes):
                # Calculate the weight step size
                weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max )

                # Calculate the weight step direction
                dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] )

                # Apply the weight deltas
                network.weights[i][ pos_indexes ] += dW[i][pos_indexes]

            # negative
            if np.any(neg_indexes):
                weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min )

                if error > prev_error:
                    # iRprop+ version of resilient backpropagation
                    network.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack

                dEdW[ neg_indexes ] = 0

            # zeros
            if np.any(zero_indexes):
                dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] )
                network.weights[i][ zero_indexes ] += dW[i][zero_indexes]

            # Store the previous weight step
            previous_dEdW[i] = dEdW
        #end weight adjustment loop

        prev_error                 = error

        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        error                      = network.cost_function(out, training_targets )

        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Trained for %d epochs." % epoch

    if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_to_file()
Beispiel #12
0
def backpropagation(network,
                    trainingset,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=()):

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])

    layer_indexes = range(len(network.layers))[::-1]  # reversed
    momentum = collections.defaultdict(int)
    epoch = 0

    input_signals, derivatives = network.update(training_data, trace=True)

    out = input_signals[-1]
    error = network.cost_function(out, training_targets)
    cost_derivative = network.cost_function(out,
                                            training_targets,
                                            derivative=True).T
    delta = cost_derivative * derivatives[-1]

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                input_signals[i],
                # dropout probability
                network.hidden_layer_dropout
                if i > 0 else network.input_layer_dropout)

            # calculate the weight change
            dW = -learning_rate * np.dot(
                delta, add_bias(dropped)).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[i] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        error = network.cost_function(out, training_targets)
        cost_derivative = network.cost_function(out,
                                                training_targets,
                                                derivative=True).T
        delta = cost_derivative * derivatives[-1]

        if epoch % 1000 == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Trained for %d epochs." % epoch

    if network.save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_to_file()
def backpropagation(network,
                    trainingset,
                    testset,
                    cost_function,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=(),
                    input_layer_dropout=0.0,
                    hidden_layer_dropout=0.0,
                    save_trained_network=False):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    momentum = collections.defaultdict(int)

    input_signals, derivatives = network.update(training_data, trace=True)
    out = input_signals[-1]
    cost_derivative = cost_function(out, training_targets, derivative=True).T
    delta = cost_derivative * derivatives[-1]
    error = cost_function(network.update(test_data), test_targets)

    layer_indexes = range(len(network.layers))[::-1]  # reversed
    epoch = 0
    n_samples = float(training_data.shape[0])

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                input_signals[i],
                # dropout probability
                hidden_layer_dropout if i > 0 else input_layer_dropout)

            # calculate the weight change
            dW = -learning_rate * (np.dot(delta, add_bias(input_signals[i])) /
                                   n_samples).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[i] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        cost_derivative = cost_function(out, training_targets,
                                        derivative=True).T
        delta = cost_derivative * derivatives[-1]
        error = cost_function(network.update(test_data), test_targets)

        if epoch % 1000 == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
def scaled_conjugate_gradient(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-6, max_iterations = (), save_trained_network = False ):
    # Implemented according to the paper by Martin F. Moller
    # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391
     
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features  for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )

    ## Variables
    sigma0              = 1.e-6
    lamb                = 1.e-6
    lamb_               = 0

    vector              = network.get_weights() # The (weight) vector we will use SCG to optimalize
    N                   = len(vector)
    grad_new            = -network.gradient( vector, training_data, training_targets, cost_function )
    r_new               = grad_new
    # end

    success             = True
    k                   = 0
    while k < max_iterations:
        k               += 1
        r               = np.copy( r_new     )
        grad            = np.copy( grad_new  )
        mu              = np.dot(  grad,grad )
    
        if success:
            success     = False
            sigma       = sigma0 / math.sqrt(mu)
            s           = (network.gradient(vector+sigma*grad, training_data, training_targets, cost_function)-network.gradient(vector,training_data, training_targets,cost_function))/sigma
            delta       = np.dot( grad.T, s )
        #end
    
        # scale s
        zetta           = lamb-lamb_
        s               += zetta*grad
        delta           += zetta*mu
    
        if delta < 0:
            s           += (lamb - 2*delta/mu)*grad
            lamb_       = 2*(lamb - delta/mu)
            delta       -= lamb*mu
            delta       *= -1
            lamb        = lamb_
        #end
    
        phi             = np.dot( grad.T,r )
        alpha           = phi/delta
    
        vector_new      = vector+alpha*grad
        f_old, f_new    = network.error(vector, test_data, test_targets, cost_function), network.error(vector_new, test_data, test_targets, cost_function)
    
        comparison      = 2 * delta * (f_old - f_new)/np.power( phi, 2 )
        
        if comparison >= 0:
            if f_new < ERROR_LIMIT: 
                break # done!
        
            vector      = vector_new
            f_old       = f_new
            r_new       = -network.gradient( vector, training_data, training_targets, cost_function )
        
            success     = True
            lamb_       = 0
        
            if k % N == 0:
                grad_new = r_new
            else:
                beta    = (np.dot( r_new, r_new ) - np.dot( r_new, r ))/phi
                grad_new = r_new + beta * grad
        
            if comparison > 0.75:
                lamb    = 0.5 * lamb
        else:
            lamb_       = lamb
        # end 
    
        if comparison < 0.25: 
            lamb        = 4 * lamb
    
        if k%1000==0:
            print "[training] Current error:", f_new, "\tEpoch:", k
    #end
    
    network.set_weights( np.array(vector_new) )
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, f_new )
    print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
    print "[training]   Trained for %d epochs." % k
    
    
    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_network_to_file()
#end scg


# NOT YET IMPLEMENTED
#def generalized_hebbian(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.001, max_iterations = (), save_trained_network = False ):
#    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
#        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
#    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
#        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
#        
#    assert trainingset[0].features.shape[0] == network.n_inputs, \
#        "ERROR: input size varies from the defined input setting"
#    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
#        "ERROR: output size varies from the defined output setting"
#    
#    training_data              = np.array( [instance.features for instance in trainingset ] )
#    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
#    test_data                  = np.array( [instance.features  for instance in testset ] )
#    test_targets               = np.array( [instance.targets  for instance in testset ] )
#                                
#    layer_indexes               = range( len(network.layers) )
#    epoch                       = 0
#                                
#    input_signals, derivatives  = network.update( training_data, trace=True )
#                                
#    out                         = input_signals[-1]
#    error                       = cost_function( out, training_targets )
#    
#    input_signals[-1] = out - training_targets
#    
#    while error > ERROR_LIMIT and epoch < max_iterations:
#        epoch += 1
#        
#        for i in layer_indexes:
#            forgetting_term     = np.dot(network.weights[i], np.tril(np.dot( input_signals[i+1].T, input_signals[i+1] )))
#            activation_product  = np.dot(add_bias(input_signals[i]).T, input_signals[i+1])
#            network.weights[i] += learning_rate * (activation_product - forgetting_term)
#        #end weight adjustment loop
#        
#        # normalize the weight to prevent the weights from growing unbounded
#        network.weights[i]     /= np.sqrt(np.sum(network.weights[i]**2))
#        
#        input_signals, derivatives = network.update( training_data, trace=True )
#        out                        = input_signals[-1]
#        error                      = cost_function(out, training_targets )
#        
#        if epoch%1000==0:
#            # Show the current training status
#            print "[training] Current error:", error, "\tEpoch:", epoch
#    
#    print "[training] Finished:"
#    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
#    print "[training]   Trained for %d epochs." % epoch
#    
#    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
#        network.save_network_to_file()
## end backprop
Beispiel #15
0
def backpropagation(network, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):
    
    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"
    
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"
    
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
                            
    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    momentum                   = collections.defaultdict( int )
    epoch                      = 0
    
    input_signals, derivatives = network.update( training_data, trace=True )
    
    out                        = input_signals[-1]
    error                      = network.cost_function(out, training_targets )
    cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]
    
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1
        
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas
            
            # perform dropout
            dropped = dropout( 
                        input_signals[i], 
                        # dropout probability
                        network.hidden_layer_dropout if i > 0 else network.input_layer_dropout
                    )
            
            # calculate the weight change
            dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]
            
            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )
    
                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]
            
            # Store the momentum
            momentum[i] = dW
                                
            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop
        
        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        error                      = network.cost_function(out, training_targets )
        cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        
        
        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Trained for %d epochs." % epoch
    
    if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_to_file()
def resilient_backpropagation(network,
                              trainingset,
                              testset,
                              cost_function,
                              ERROR_LIMIT=1e-3,
                              max_iterations=(),
                              weight_step_max=50.,
                              weight_step_min=0.,
                              start_step=0.5,
                              learn_max=1.2,
                              learn_min=0.5,
                              print_rate=1000,
                              save_trained_network=False):
    # Implemented according to iRprop+
    # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf

    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    # Storing the current / previous weight step size
    weight_step = [
        np.full(weight_layer.shape, start_step)
        for weight_layer in network.weights
    ]

    # Storing the current / previous weight update
    dW = [
        np.ones(shape=weight_layer.shape) for weight_layer in network.weights
    ]

    # Storing the previous derivative
    previous_dEdW = [1] * len(network.weights)

    # Storing the previous error measurement
    prev_error = ()  # inf

    input_signals, derivatives = network.update(training_data, trace=True)
    out = input_signals[-1]
    cost_derivative = cost_function(out, training_targets, derivative=True).T
    delta = cost_derivative * derivatives[-1]
    error = cost_function(network.update(test_data), test_targets)

    n_samples = float(training_data.shape[0])
    layer_indexes = range(len(network.layers))[::-1]  # reversed
    epoch = 0

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # Calculate the delta with respect to the weights
            dEdW = (np.dot(delta, add_bias(input_signals[i])) / n_samples).T

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Calculate sign changes and note where they have changed
            diffs = np.multiply(dEdW, previous_dEdW[i])
            pos_indexes = np.where(diffs > 0)
            neg_indexes = np.where(diffs < 0)
            zero_indexes = np.where(diffs == 0)

            # positive
            if np.any(pos_indexes):
                # Calculate the weight step size
                weight_step[i][pos_indexes] = np.minimum(
                    weight_step[i][pos_indexes] * learn_max, weight_step_max)

                # Calculate the weight step direction
                dW[i][pos_indexes] = np.multiply(-np.sign(dEdW[pos_indexes]),
                                                 weight_step[i][pos_indexes])

                # Apply the weight deltas
                network.weights[i][pos_indexes] += dW[i][pos_indexes]

            # negative
            if np.any(neg_indexes):
                weight_step[i][neg_indexes] = np.maximum(
                    weight_step[i][neg_indexes] * learn_min, weight_step_min)

                if error > prev_error:
                    # iRprop+ version of resilient backpropagation
                    network.weights[i][neg_indexes] -= dW[i][
                        neg_indexes]  # backtrack

                dEdW[neg_indexes] = 0

            # zeros
            if np.any(zero_indexes):
                dW[i][zero_indexes] = np.multiply(-np.sign(dEdW[zero_indexes]),
                                                  weight_step[i][zero_indexes])
                network.weights[i][zero_indexes] += dW[i][zero_indexes]

            # Store the previous weight step
            previous_dEdW[i] = dEdW
        #end weight adjustment loop

        prev_error = error

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        cost_derivative = cost_function(out, training_targets,
                                        derivative=True).T
        delta = cost_derivative * derivatives[-1]
        error = cost_function(network.update(test_data), test_targets)

        if epoch % print_rate == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
Beispiel #17
0
 def scg(self, trainingset, ERROR_LIMIT = 1e-6, max_iterations = () ):
     # Implemented according to the paper by Martin F. Moller
     # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391
     
     assert self.input_layer_dropout == 0 and self.hidden_layer_dropout == 0, \
             "ERROR: dropout should not be used with scaled conjugated gradients training"
             
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data       = np.array( [instance.features for instance in trainingset ] )
     training_targets    = np.array( [instance.targets  for instance in trainingset ] )
     
 
     ## Variables
     sigma0              = 1.e-6
     lamb                = 1.e-6
     lamb_               = 0
 
     vector              = self.get_weights() # The (weight) vector we will use SCG to optimalize
     N                   = len(vector)
     grad_new            = -self.gradient( vector, training_data, training_targets )
     r_new               = grad_new
     # end
 
     success             = True
     k                   = 0
     while k < max_iterations:
         k               += 1
         r               = np.copy( r_new     )
         grad            = np.copy( grad_new  )
         mu              = np.dot(  grad,grad )
     
         if success:
             success     = False
             sigma       = sigma0 / math.sqrt(mu)
             s           = (self.gradient(vector+sigma*grad, training_data, training_targets)-self.gradient(vector,training_data, training_targets))/sigma
             delta       = np.dot( grad.T, s )
         #end
     
         # scale s
         zetta           = lamb-lamb_
         s               += zetta*grad
         delta           += zetta*mu
     
         if delta < 0:
             s           += (lamb - 2*delta/mu)*grad
             lamb_       = 2*(lamb - delta/mu)
             delta       -= lamb*mu
             delta       *= -1
             lamb        = lamb_
         #end
     
         phi             = np.dot( grad.T,r )
         alpha           = phi/delta
     
         vector_new      = vector+alpha*grad
         f_old, f_new    = self.error(vector,training_data, training_targets), self.error(vector_new,training_data, training_targets)
     
         comparison      = 2 * delta * (f_old - f_new)/np.power( phi, 2 )
         
         if comparison >= 0:
             if f_new < ERROR_LIMIT: 
                 break # done!
         
             vector      = vector_new
             f_old       = f_new
             r_new       = -self.gradient( vector, training_data, training_targets )
         
             success     = True
             lamb_       = 0
         
             if k % N == 0:
                 grad_new = r_new
             else:
                 beta    = (np.dot( r_new, r_new ) - np.dot( r_new, r ))/phi
                 grad_new = r_new + beta * grad
         
             if comparison > 0.75:
                 lamb    = 0.5 * lamb
         else:
             lamb_       = lamb
         # end 
     
         if comparison < 0.25: 
             lamb        = 4 * lamb
     
         if k%1000==0: print "* current network error (MSE):", f_new
     #end
     
     self.weights = self.unpack( np.array(vector_new) )
     
     print "* Converged to error bound (%.3g) with MSE = %.3g." % ( ERROR_LIMIT, f_new )
     print "* Trained for %d epochs." % k
     
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
                             
     layer_indexes              = range( len(self.layers) )[::-1]    # reversed
     momentum                   = collections.defaultdict( int )
     MSE                        = ( ) # inf
     epoch                      = 0
     
     input_signals, derivatives = self.update( training_data, trace=True )
     
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
             
             # perform dropout
             dropped = dropout( 
                         input_signals[i], 
                         # dropout probability
                         self.hidden_layer_dropout if i else self.input_layer_dropout
                     )
             
             # calculate the weight change
             dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]
             
             if i!= 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             # Store the momentum
             momentum[i] = dW
                                 
             # Update the weights
             self.weights[ i ] += dW
         #end weight adjustment loop
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
def backpropagation(network,
                    trainingset,
                    testset,
                    cost_function,
                    evaluation_function=None,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=(),
                    batch_size=0,
                    input_layer_dropout=0.0,
                    hidden_layer_dropout=0.0,
                    print_rate=1000,
                    save_trained_network=False):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    # Whether to use another function for printing the dataset error than the cost function.
    # This is useful if you train the network with the MSE cost function, but are going to
    # classify rather than regress on your data.
    calculate_print_error = evaluation_function if evaluation_function != None else cost_function

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    batch_size = batch_size if batch_size != 0 else training_data.shape[0]
    batch_training_data = np.array_split(
        training_data, math.ceil(1.0 * training_data.shape[0] / batch_size))
    batch_training_targets = np.array_split(
        training_targets,
        math.ceil(1.0 * training_targets.shape[0] / batch_size))
    batch_indices = range(
        len(batch_training_data))  # fast reference to batches

    error = calculate_print_error(network.update(test_data), test_targets)
    reversed_layer_indexes = range(len(network.layers))[::-1]
    momentum = collections.defaultdict(int)

    epoch = 0
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        random.shuffle(
            batch_indices
        )  # Shuffle the order in which the batches are processed between the iterations

        for batch_index in batch_indices:
            batch_data = batch_training_data[batch_index]
            batch_targets = batch_training_targets[batch_index]
            batch_size = float(batch_data.shape[0])

            input_signals, derivatives = network.update(batch_data, trace=True)
            out = input_signals[-1]
            cost_derivative = cost_function(out,
                                            batch_targets,
                                            derivative=True).T
            delta = cost_derivative * derivatives[-1]

            for i in reversed_layer_indexes:
                # Loop over the weight layers in reversed order to calculate the deltas

                # perform dropout
                dropped = dropout(
                    input_signals[i],
                    # dropout probability
                    hidden_layer_dropout if i > 0 else input_layer_dropout)

                # calculate the weight change
                dW = -learning_rate * (np.dot(delta, add_bias(
                    dropped)) / batch_size).T + momentum_factor * momentum[i]

                if i != 0:
                    """Do not calculate the delta unnecessarily."""
                    # Skip the bias weight
                    weight_delta = np.dot(network.weights[i][1:, :], delta)

                    # Calculate the delta for the subsequent layer
                    delta = weight_delta * derivatives[i - 1]

                # Store the momentum
                momentum[i] = dW

                # Update the weights
                network.weights[i] += dW
            #end weight adjustment loop

        error = calculate_print_error(network.update(test_data), test_targets)

        if epoch % print_rate == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
def scaled_conjugate_gradient(network,
                              trainingset,
                              testset,
                              cost_function,
                              ERROR_LIMIT=1e-6,
                              max_iterations=(),
                              print_rate=1000,
                              save_trained_network=False):
    # Implemented according to the paper by Martin F. Moller
    # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391

    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    ## Variables
    sigma0 = 1.e-6
    lamb = 1.e-6
    lamb_ = 0

    vector = network.get_weights(
    )  # The (weight) vector we will use SCG to optimalize
    grad_new = -network.gradient(vector, training_data, training_targets,
                                 cost_function)
    r_new = grad_new
    # end

    success = True
    k = 0
    while k < max_iterations:
        k += 1
        r = np.copy(r_new)
        grad = np.copy(grad_new)
        mu = np.dot(grad, grad)

        if success:
            success = False
            sigma = sigma0 / math.sqrt(mu)
            s = (network.gradient(vector + sigma * grad, training_data,
                                  training_targets, cost_function) -
                 network.gradient(vector, training_data, training_targets,
                                  cost_function)) / sigma
            delta = np.dot(grad.T, s)
        #end

        # scale s
        zetta = lamb - lamb_
        s += zetta * grad
        delta += zetta * mu

        if delta < 0:
            s += (lamb - 2 * delta / mu) * grad
            lamb_ = 2 * (lamb - delta / mu)
            delta -= lamb * mu
            delta *= -1
            lamb = lamb_
        #end

        phi = np.dot(grad.T, r)
        alpha = phi / delta

        vector_new = vector + alpha * grad
        f_old, f_new = network.error(vector, test_data, test_targets,
                                     cost_function), network.error(
                                         vector_new, test_data, test_targets,
                                         cost_function)

        comparison = 2 * delta * (f_old - f_new) / np.power(phi, 2)

        if comparison >= 0:
            if f_new < ERROR_LIMIT:
                break  # done!

            vector = vector_new
            f_old = f_new
            r_new = -network.gradient(vector, training_data, training_targets,
                                      cost_function)

            success = True
            lamb_ = 0

            if k % network.n_weights == 0:
                grad_new = r_new
            else:
                beta = (np.dot(r_new, r_new) - np.dot(r_new, r)) / phi
                grad_new = r_new + beta * grad

            if comparison > 0.75:
                lamb = 0.5 * lamb
        else:
            lamb_ = lamb
        # end

        if comparison < 0.25:
            lamb = 4 * lamb

        if k % print_rate == 0:
            print "[training] Current error:", f_new, "\tEpoch:", k
    #end

    network.set_weights(np.array(vector_new))

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, f_new)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % k

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()


#end scg

## NOT YET IMPLEMENTED
#def generalized_hebbian(network, trainingset, testset, cost_function, learning_rate = 0.001, max_iterations = (), save_trained_network = False ):
#    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
#        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
#    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
#        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
#
#    assert trainingset[0].features.shape[0] == network.n_inputs, \
#        "ERROR: input size varies from the defined input setting"
#    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
#        "ERROR: output size varies from the defined output setting"
#
#    training_data              = np.array( [instance.features for instance in trainingset ] )
#    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
#
#    layer_indexes               = range( len(network.layers) )
#    epoch                       = 0
#
#    input_signals, derivatives  = network.update( training_data, trace=True )
#    error                       = cost_function(input_signals[-1], training_targets )
#    input_signals[-1]          -= training_targets
#
#    while error > 0.01 and epoch < max_iterations:
#        epoch += 1
#
#        for i in layer_indexes:
#            forgetting_term     = np.dot(network.weights[i], np.tril(np.dot( input_signals[i+1].T, input_signals[i+1] )))
#            activation_product  = np.dot(add_bias(input_signals[i]).T, input_signals[i+1])
#            dW                  = learning_rate * (activation_product - forgetting_term)
#            network.weights[i] += dW
#
#            # normalize the weight to prevent the weights from growing unbounded
#            #network.weights[i]     /= np.sqrt(np.sum(network.weights[i]**2))
#        #end weight adjustment loop
#
#        input_signals, derivatives  = network.update( training_data, trace=True )
#        error                       = cost_function(input_signals[-1], training_targets )
#        input_signals[-1]          -= training_targets
#
#        if epoch % 1000 == 0:
#            print "[training] Error:", error
#
#    print "[training] Finished:"
#    print "[training]   Trained for %d epochs." % epoch
#
#    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
#        network.save_network_to_file()
## end hebbian
Beispiel #21
0
 def resilient_backpropagation(self, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ):
     # Implemented according to iRprop+ 
     # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf
     assert self.input_layer_dropout == 0 and self.hidden_layer_dropout == 0, \
             "ERROR: dropout should not be used with resilient backpropagation"
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
     
     # Data structure to store the previous derivative
     last_dEdW                  = [ 1 ] * len( self.weights )
     
     # Storing the current / previous weight step size
     weight_step                = [ np.full( weight_layer.shape, start_step ) for weight_layer in self.weights ]
     
     # Storing the current / previous weight update
     dW                         = [  np.ones(shape=weight_layer.shape) for weight_layer in self.weights ]
     
     
     input_signals, derivatives = self.update( training_data, trace=True )
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     layer_indexes              = range( len(self.layers) )[::-1] # reversed
     prev_MSE                   = ( )                             # inf
     epoch                      = 0
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch       += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
                    
             # Calculate the delta with respect to the weights
             dEdW = np.dot( delta, add_bias(input_signals[i]) ).T
             
             if i != 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             
             # Calculate sign changes and note where they have changed
             diffs            = np.multiply( dEdW, last_dEdW[i] )
             pos_indexes      = np.where( diffs > 0 )
             neg_indexes      = np.where( diffs < 0 )
             zero_indexes     = np.where( diffs == 0 )
             
             
             # positive
             if np.any(pos_indexes):
                 # Calculate the weight step size
                 weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max )
                 
                 # Calculate the weight step direction
                 dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] )
                 
                 # Apply the weight deltas
                 self.weights[i][ pos_indexes ] += dW[i][pos_indexes]
             
             # negative
             if np.any(neg_indexes):
                 weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min )
                 
                 if MSE > prev_MSE:
                     # iRprop+ version of resilient backpropagation
                     self.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack
                 
                 dEdW[ neg_indexes ] = 0
             
             # zeros
             if np.any(zero_indexes):
                 dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] )
                 self.weights[i][ zero_indexes ] += dW[i][zero_indexes]
             
             # Store the previous weight step
             last_dEdW[i] = dEdW
         #end weight adjustment loop
         
         prev_MSE                   = MSE
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         if epoch%1000==0: print "* current network error (MSE):", MSE
 
     print "* Converged to error bound (%.3g) with MSE = %.3g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, save_trained_network = False  ):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features  for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )
          
    momentum                   = collections.defaultdict( int )
    
    input_signals, derivatives = network.update( training_data, trace=True )
    out                        = input_signals[-1]
    cost_derivative            = cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]
    error                      = cost_function(network.update( test_data ), test_targets )
    
    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    epoch                      = 0
    n_samples                  = float(training_data.shape[0])
    
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1
        
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas
            
            # perform dropout
            dropped = dropout( 
                        input_signals[i], 
                        # dropout probability
                        hidden_layer_dropout if i > 0 else input_layer_dropout
                    )
            
            # calculate the weight change
            dW = -learning_rate * (np.dot( delta, add_bias(input_signals[i]) )/n_samples).T + momentum_factor * momentum[i]
            
            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )
    
                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]
            
            # Store the momentum
            momentum[i] = dW
                                
            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop
        
        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        cost_derivative            = cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        error                      = cost_function(network.update( test_data ), test_targets )
        
        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
    print "[training]   Trained for %d epochs." % epoch
    
    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_network_to_file()
Beispiel #23
0
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
                             
     layer_indexes              = range( len(self.layers) )[::-1]    # reversed
     momentum                   = collections.defaultdict( int )
     MSE                        = ( ) # inf
     epoch                      = 0
     
     input_signals, derivatives = self.update( training_data, trace=True )
     
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
             
             # perform dropout
             dropped = dropout( 
                         input_signals[i], 
                         # dropout probability
                         self.hidden_layer_dropout if i else self.input_layer_dropout
                     )
             
             # calculate the weight change
             dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]
             
             if i!= 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             # Store the momentum
             momentum[i] = dW
                                 
             # Update the weights
             self.weights[ i ] += dW
         #end weight adjustment loop
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function = None, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), batch_size = 0, input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, print_rate = 1000, save_trained_network = False  ):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    # Whether to use another function for printing the dataset error than the cost function. 
    # This is useful if you train the network with the MSE cost function, but are going to 
    # classify rather than regress on your data.
    calculate_print_error      = evaluation_function if evaluation_function != None else cost_function
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )
    
    batch_size                 = batch_size if batch_size != 0 else training_data.shape[0] 
    batch_training_data        = np.array_split(training_data, math.ceil(1.0 * training_data.shape[0] / batch_size))
    batch_training_targets     = np.array_split(training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size))
    batch_indices              = range(len(batch_training_data))       # fast reference to batches
    
    error                      = calculate_print_error(network.update( test_data ), test_targets )
    reversed_layer_indexes     = range( len(network.layers) )[::-1]
    momentum                   = collections.defaultdict( int )
    
    epoch                      = 0
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1
        
        random.shuffle(batch_indices) # Shuffle the order in which the batches are processed between the iterations
        
        for batch_index in batch_indices:
            batch_data                 = batch_training_data[    batch_index ]
            batch_targets              = batch_training_targets[ batch_index ]
            batch_size                 = float( batch_data.shape[0] )
            
            input_signals, derivatives = network.update( batch_data, trace=True )
            out                        = input_signals[-1]
            cost_derivative            = cost_function( out, batch_targets, derivative=True ).T
            delta                      = cost_derivative * derivatives[-1]
            
            for i in reversed_layer_indexes:
                # Loop over the weight layers in reversed order to calculate the deltas
            
                # perform dropout
                dropped = dropout( 
                            input_signals[i], 
                            # dropout probability
                            hidden_layer_dropout if i > 0 else input_layer_dropout
                        )
            
                # calculate the weight change
                dW = -learning_rate * (np.dot( delta, add_bias(dropped) )/batch_size).T + momentum_factor * momentum[i]
            
                if i != 0:
                    """Do not calculate the delta unnecessarily."""
                    # Skip the bias weight
                    weight_delta = np.dot( network.weights[ i ][1:,:], delta )
    
                    # Calculate the delta for the subsequent layer
                    delta = weight_delta * derivatives[i-1]
            
                # Store the momentum
                momentum[i] = dW
                                
                # Update the weights
                network.weights[ i ] += dW
            #end weight adjustment loop
        
        error = calculate_print_error(network.update( test_data ), test_targets )
        
        if epoch%print_rate==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
    print "[training]   Trained for %d epochs." % epoch
    
    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_network_to_file()