def Adam(network,
         trainingset,
         testset,
         cost_function,
         beta1=0.9,
         beta2=0.999,
         epsilon=1e-8,
         **kwargs):
    configuration = dict(default_configuration)
    configuration.update(kwargs)

    learning_rate = configuration["learning_rate"]
    m = [
        np.zeros(shape=weight_layer.shape) for weight_layer in network.weights
    ]
    v = [
        np.zeros(shape=weight_layer.shape) for weight_layer in network.weights
    ]

    def calculate_dW(layer_index, dX):
        m[layer_index] = beta1 * m[layer_index] + (1 - beta1) * dX
        v[layer_index] = beta2 * v[layer_index] + (1 - beta2) * (dX**2)

        return -learning_rate * m[layer_index] / (np.sqrt(v[layer_index]) +
                                                  epsilon)

    #end

    return backpropagation_foundation(network, trainingset, testset,
                                      cost_function, calculate_dW,
                                      **configuration)
def RMSprop(network,
            trainingset,
            testset,
            cost_function,
            decay_rate=0.99,
            epsilon=1e-8,
            **kwargs):
    configuration = dict(default_configuration)
    configuration.update(kwargs)

    learning_rate = configuration["learning_rate"]
    cache = [
        np.zeros(shape=weight_layer.shape) for weight_layer in network.weights
    ]

    def calculate_dW(layer_index, dX):
        cache[layer_index] = decay_rate * cache[layer_index] + (
            1 - decay_rate) * dX**2
        return -learning_rate * dX / (np.sqrt(cache[layer_index]) + epsilon)

    #end

    return backpropagation_foundation(network, trainingset, testset,
                                      cost_function, calculate_dW,
                                      **configuration)
def nesterov_momentum(network,
                      trainingset,
                      testset,
                      cost_function,
                      momentum_factor=0.9,
                      **kwargs):
    configuration = dict(default_configuration)
    configuration.update(kwargs)

    learning_rate = configuration["learning_rate"]
    momentum = collections.defaultdict(int)

    def calculate_dW(layer_index, dX):
        dW = -learning_rate * dX + momentum_factor * momentum[layer_index]
        weight_change = -momentum_factor * momentum[layer_index] + (
            1 + momentum_factor) * dW

        # Store the dW after calculating the weight change since we would like
        # to use the "previous" momentum.
        momentum[layer_index] = dW

        return dW

    #end

    return backpropagation_foundation(network, trainingset, testset,
                                      cost_function, calculate_dW,
                                      **configuration)
def vanilla(network, trainingset, testset, cost_function, **kwargs ):
    configuration = dict(default_configuration)
    configuration.update( kwargs )
    
    learning_rate = configuration["learning_rate"]
    def calculate_dW( layer_index, dX ):
        return -learning_rate * dX
    #end
    
    return backpropagation_foundation( network, trainingset, testset, cost_function, calculate_dW, **configuration  )
#end
def classical_momentum(network, trainingset, testset, cost_function, momentum_factor = 0.9, **kwargs  ):
    configuration = dict(default_configuration)
    configuration.update( kwargs )
    
    learning_rate = configuration["learning_rate"]
    momentum = collections.defaultdict( int )
    
    def calculate_dW( layer_index, dX ):
        dW = -learning_rate * dX + momentum_factor * momentum[ layer_index ]
        momentum[ layer_index ] = dW 
        return dW
    #end
    
    return backpropagation_foundation( network, trainingset, testset, cost_function, calculate_dW, **configuration  )