class AveragePooling(BasePooling): """ Average pooling layer. Parameters ---------- mode : {{'include_padding', 'exclude_padding'}} Gives you the choice to include or exclude padding. Defaults to ``include_padding``. {BasePooling.Parameters} Methods ------- {BasePooling.Methods} Attributes ---------- {BasePooling.Attributes} """ mode = ChoiceProperty(default='include_padding', choices={ 'include_padding': 'average_inc_pad', 'exclude_padding': 'average_exc_pad' }) def output(self, input_value): return pool.pool_2d(input_value, ds=self.size, mode=self.mode, ignore_border=True, st=self.stride_size, padding=self.padding)
class AveragePooling(BasePooling): """ Average pooling layer. Parameters ---------- mode : {{``include_padding``, ``exclude_padding``}} Give a choice to include or exclude padding. Defaults to ``include_padding``. {BasePooling.Parameters} Methods ------- {BasePooling.Methods} Attributes ---------- {BasePooling.Attributes} Examples -------- 2D pooling >>> from neupy import layers >>> >>> network = layers.join( ... layers.Input((3, 10, 10)), ... layers.AveragePooling((2, 2)), ... ) >>> network.output_shape (3, 5, 5) 1D pooling >>> from neupy import layers >>> >>> network = layers.join( ... layers.Input((10, 30)), ... layers.Reshape((10, 30, 1)), ... layers.AveragePooling((2, 1)), ... ) >>> network.output_shape (10, 15, 1) """ mode = ChoiceProperty(default='include_padding', choices={ 'include_padding': 'average_inc_pad', 'exclude_padding': 'average_exc_pad' }) def output(self, input_value): return pool.pool_2d(input_value, ws=self.size, mode=self.mode, ignore_border=self.ignore_border, stride=self.stride, pad=self.padding)
class DiscreteMemory(BaseSkeleton, Configurable): """ Base class for discrete memory networks. Notes ----- * {discrete_data_note} """ __discrete_data_note = """ Input and output data must contains only \ binary values. """ __discrete_params = """mode : {'sync', 'async'} Indentify pattern recovery mode. ``sync`` mode try recovery a pattern using the all input vector. ``async`` mode randomly chose some values from the input vector and repeat this procedure the number of times a given variable ``n_times``. Defaults to ``sync``. n_times : int Available only in ``async`` mode. Identify number of random trials. Defaults to ``100``. """ shared_docs = { 'discrete_data_note': __discrete_data_note, 'discrete_params': __discrete_params } mode = ChoiceProperty(default='sync', choices=['async', 'sync']) n_times = NonNegativeIntProperty(default=100) def __init__(self, **options): super(DiscreteMemory, self).__init__(**options) self.weight = None if 'n_times' in options and self.mode != 'async': self.logs.warning("You can use `n_times` property only in " "`async` mode.") def discrete_validation(self, matrix): """ Validate discrete matrix. Parameters ---------- matrix : array-like Matrix for validation. Returns ------- bool Got ``True`` all ``matrix`` discrete values are in `discrete_values` list and `False` otherwise. """ if np_any((matrix != 0) & (matrix != 1)): raise ValueError("This network is descrete. This mean that you " "can use data which contains 0 and 1 values")
class DiscreteMemory(BaseSkeleton, Configurable): """ Base class for discrete memory networks. Notes ----- - Input and output vectors should contain only binary values. Parameters ---------- mode : {{``sync``, ``async``}} Indentify pattern recovery mode. - ``sync`` mode tries to recover pattern using all values from the input vector. - ``async`` mode choose randomly some values from the input vector and iteratively repeat this procedure. Number of iterations defines by the ``n_times`` parameter. Defaults to ``sync``. n_times : int Available only in ``async`` mode. Identify number of random trials. Defaults to ``100``. """ mode = ChoiceProperty(default='sync', choices=['async', 'sync']) n_times = IntProperty(default=100, minval=1) def __init__(self, **options): super(DiscreteMemory, self).__init__(**options) self.weight = None if 'n_times' in options and self.mode != 'async': self.logs.warning("You can use `n_times` property only in " "`async` mode.") def discrete_validation(self, matrix): """ Validate discrete matrix. Parameters ---------- matrix : array-like Matrix for validation. """ if np.any(~np.isin(matrix, [0, 1])): raise ValueError( "This network expects only descrete inputs. It mean that " "it's possible to can use only matrices with binary values " "(0 and 1)." )
class BaseLayer(with_metaclass(LayerMeta, ChainConnection, BaseConfigurable)): """ Base class for all layers. Parameters ---------- {layer_params} """ __layer_params = """input_size : int Layer input size. weight : 2D array-like or None Define your layer weights. `None` means that your weights will be generate randomly dependence on property `init_method`. `None` by default. init_method : {'gauss', 'bounded', 'ortho'} Weight initialization method. `gauss` will generate random weights dependence on Standard Normal Distribution. `bounded` generate uniform random weghts in initialized bounds. `ortho` generate random orthogonal matrix. random_weight_bound : tuple of two int Available only for `init_method` eqaul to `bounded`, defaults to `(0, 1)`. """ shared_docs = {'layer_params': __layer_params} input_size = IntProperty() weight = ArrayProperty(default=None) random_weight_bound = NumberBoundProperty(default=(0, 1)) init_method = ChoiceProperty(default=GAUSSIAN, choices=[GAUSSIAN, BOUNDED, ORTHOGONAL]) def __init__(self, input_size, **options): super(BaseLayer, self).__init__() self.input_size = input_size self.use_bias = False # Default variables which will change after initialization self.relate_to_layer = None self.size = None # If you will set class method function variable, python understend # that this is new class method and will call it with `self` # first parameter. if hasattr(self.__class__, 'activation_function'): self.activation_function = self.__class__.activation_function # Initialize default options BaseConfigurable.__init__(self, **options) def relate_to(self, right_layer): self.relate_to_layer = right_layer def initialize(self, with_bias=False): self.use_bias = with_bias size = self.input_size + self.use_bias self.size = (size, self.relate_to_layer.input_size) self.weight = self._init_weight() # --------------- Weights manipulations --------------- # def _init_weight(self): if self.weight is not None: return self.weight init_method = self.init_method if init_method == GAUSSIAN: return randn(*self.size) elif init_method == BOUNDED: return random_bounded(self.size, *self.random_weight_bound) elif init_method == ORTHOGONAL: return random_orthogonal(self.size) @property def weight_without_bias(self): if self.use_bias: return self.weight[1:, :] return self.weight # --------------- Layer operations --------------- # def summator(self, input_value): return dot(input_value, self.weight) def output(self, input_value): input_data = self.preformat_input(input_value) summated = self.summator(input_data) return self.activation_function(summated) def preformat_input(self, input_data): if self.use_bias: input_data = add_bias_column(input_data) return input_data def __repr__(self): return '{name}({size})'.format(name=self.__class__.__name__, size=self.input_size)
class SOFM(Kohonen): """ Self-Organizing Feature Map (SOFM or SOM). Notes ----- - Training data samples should have normalized features. Parameters ---------- {BaseAssociative.n_inputs} n_outputs : int or None Number of outputs. Parameter is optional in case if ``feature_grid`` was specified. .. code-block:: python if n_outputs is None: n_outputs = np.prod(feature_grid) learning_radius : int Parameter defines radius within which we consider all neurons as neighbours to the winning neuron. The bigger the value the more neurons will be updated after each iteration. The ``0`` values means that we don't update neighbour neurons. Defaults to ``0``. std : int, float Parameters controls learning rate for each neighbour. The further neighbour neuron from the winning neuron the smaller that learning rate for it. Learning rate scales based on the factors produced by the normal distribution with center in the place of a winning neuron and standard deviation specified as a parameter. The learning rate for the winning neuron is always equal to the value specified in the ``step`` parameter and for neighbour neurons it's always lower. The bigger the value for this parameter the bigger learning rate for the neighbour neurons. Defaults to ``1``. features_grid : list, tuple, None Feature grid defines shape of the output neurons. The new shape should be compatible with the number of outputs. It means that the following condition should be true: .. code-block:: python np.prod(features_grid) == n_outputs SOFM implementation supports n-dimensional grids. For instance, in order to specify grid as cube instead of the regular rectangular shape we can set up options as the following: .. code-block:: python SOFM( ... features_grid=(5, 5, 5), ... ) Defaults to ``(n_outputs, 1)``. grid_type : {{``rect``, ``hexagon``}} Defines connection type in feature grid. Type defines which neurons we will consider as closest to the winning neuron during the training. - ``rect`` - Connections between neurons will be organized in hexagonal grid. - ``hexagon`` - Connections between neurons will be organized in hexagonal grid. It works only for 1d or 2d grids. Defaults to ``rect``. distance : {{``euclid``, ``dot_product``, ``cos``}} Defines function that will be used to compute closest weight to the input sample. - ``dot_product``: Just a regular dot product between data sample and network's weights - ``euclid``: Euclidean distance between data sample and network's weights - ``cos``: Cosine distance between data sample and network's weights Defaults to ``euclid``. reduce_radius_after : int or None Every specified number of epochs ``learning_radius`` parameter will be reduced by ``1``. Process continues until ``learning_radius`` equal to ``0``. The ``None`` value disables parameter reduction during the training. Defaults to ``100``. reduce_step_after : int or None Defines reduction rate at which parameter ``step`` will be reduced using the following formula: .. code-block:: python step = step / (1 + current_epoch / reduce_step_after) The ``None`` value disables parameter reduction during the training. Defaults to ``100``. reduce_std_after : int or None Defines reduction rate at which parameter ``std`` will be reduced using the following formula: .. code-block:: python std = std / (1 + current_epoch / reduce_std_after) The ``None`` value disables parameter reduction during the training. Defaults to ``100``. weight : array-like, Initializer or {{``init_pca``, ``sample_from_data``}} Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Also, it's possible to initialized weights base on the training data. There are two options: - ``sample_from_data`` - Before starting the training will randomly take number of training samples equal to number of expected outputs. - ``init_pca`` - Before training starts SOFM will applies PCA on a covariance matrix build from the training samples. Weights will be generated based on the two eigenvectors associated with the largest eigenvalues. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.signals} {Verbose.verbose} Methods ------- init_weights(train_data) Initialized weights based on the input data. It works only for the `init_pca` and `sample_from_data` options. For other cases it will throw an error. {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms, utils >>> >>> utils.reproducible() >>> >>> data = np.array([ ... [0.1961, 0.9806], ... [-0.1961, 0.9806], ... [-0.5812, -0.8137], ... [-0.8137, -0.5812], ... ]) >>> >>> sofm = algorithms.SOFM( ... n_inputs=2, ... n_outputs=2, ... step=0.1, ... learning_radius=0 ... ) >>> sofm.train(data, epochs=100) >>> sofm.predict(data) array([[0, 1], [0, 1], [1, 0], [1, 0]]) """ n_outputs = IntProperty(minval=1, allow_none=True, default=None) weight = SOFMWeightParameter(default=init.Normal(), choices={ 'init_pca': linear_initialization, 'sample_from_data': sample_data, }) features_grid = TypedListProperty(allow_none=True, default=None) DistanceParameter = namedtuple('DistanceParameter', 'name func') distance = ChoiceProperty(default='euclid', choices={ 'dot_product': DistanceParameter(name='dot_product', func=np.dot), 'euclid': DistanceParameter(name='euclid', func=neg_euclid_distance), 'cos': DistanceParameter(name='cosine', func=cosine_similarity), }) GridTypeMethods = namedtuple('GridTypeMethods', 'name find_neighbours find_step_scaler') grid_type = ChoiceProperty( default='rect', choices={ 'rect': GridTypeMethods(name='rectangle', find_neighbours=find_neighbours_on_rect_grid, find_step_scaler=find_step_scaler_on_rect_grid), 'hexagon': GridTypeMethods(name='hexagon', find_neighbours=find_neighbours_on_hexagon_grid, find_step_scaler=find_step_scaler_on_hexagon_grid) }) learning_radius = IntProperty(default=0, minval=0) std = NumberProperty(minval=0, default=1) reduce_radius_after = IntProperty(default=100, minval=1, allow_none=True) reduce_std_after = IntProperty(default=100, minval=1, allow_none=True) reduce_step_after = IntProperty(default=100, minval=1, allow_none=True) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) if self.n_outputs is None and self.features_grid is None: raise ValueError("One of the following parameters has to be " "specified: n_outputs, features_grid") elif self.n_outputs is None: self.n_outputs = np.prod(self.features_grid) n_grid_elements = np.prod(self.features_grid) invalid_feature_grid = (self.features_grid is not None and n_grid_elements != self.n_outputs) if invalid_feature_grid: raise ValueError( "Feature grid should contain the same number of elements " "as in the output layer: {0}, but found: {1} (shape: {2})" "".format(self.n_outputs, n_grid_elements, self.features_grid)) if self.features_grid is None: self.features_grid = (self.n_outputs, 1) if len(self.features_grid) > 2 and self.grid_type.name == 'hexagon': raise ValueError("SOFM with hexagon grid type should have " "one or two dimensional feature grid, but got " "{}d instead (shape: {!r})".format( len(self.features_grid), self.features_grid)) is_pca_init = (isinstance(options.get('weight'), six.string_types) and options.get('weight') == 'init_pca') self.initialized = False if not callable(self.weight): super(Kohonen, self).init_weights() self.initialized = True if self.distance.name == 'cosine': self.weight /= np.linalg.norm(self.weight, axis=0) elif is_pca_init and self.grid_type.name != 'rectangle': raise WeightInitializationError( "Cannot apply PCA weight initialization for non-rectangular " "grid. Grid type: {}".format(self.grid_type.name)) def predict_raw(self, X): X = format_data(X, is_feature1d=(self.n_inputs == 1)) if X.ndim != 2: raise ValueError("Only 2D inputs are allowed") n_samples = X.shape[0] output = np.zeros((n_samples, self.n_outputs)) for i, input_row in enumerate(X): output[i, :] = self.distance.func(input_row.reshape(1, -1), self.weight) return output def update_indexes(self, layer_output): neuron_winner = layer_output.argmax(axis=1).item(0) winner_neuron_coords = np.unravel_index(neuron_winner, self.features_grid) learning_radius = self.learning_radius step = self.step std = self.std if self.reduce_radius_after is not None: learning_radius -= self.last_epoch // self.reduce_radius_after learning_radius = max(0, learning_radius) if self.reduce_step_after is not None: step = decay_function(step, self.last_epoch, self.reduce_step_after) if self.reduce_std_after is not None: std = decay_function(std, self.last_epoch, self.reduce_std_after) methods = self.grid_type output_grid = np.reshape(layer_output, self.features_grid) output_with_neighbours = methods.find_neighbours( grid=output_grid, center=winner_neuron_coords, radius=learning_radius) step_scaler = methods.find_step_scaler(grid=output_grid, center=winner_neuron_coords, std=std) index_y, = np.nonzero(output_with_neighbours.reshape(self.n_outputs)) step_scaler = step_scaler.reshape(self.n_outputs) return index_y, step * step_scaler[index_y] def init_weights(self, X_train): if self.initialized: raise WeightInitializationError( "Weights have been already initialized") weight_initializer = self.weight self.weight = weight_initializer(X_train, self.features_grid) self.initialized = True if self.distance.name == 'cosine': self.weight /= np.linalg.norm(self.weight, axis=0) def train(self, X_train, epochs=100): if not self.initialized: self.init_weights(X_train) super(SOFM, self).train(X_train, epochs=epochs) def one_training_update(self, X_train, y_train=None): step = self.step predict = self.predict update_indexes = self.update_indexes error = 0 for input_row in X_train: input_row = np.reshape(input_row, (1, input_row.size)) layer_output = predict(input_row) index_y, step = update_indexes(layer_output) distance = input_row.T - self.weight[:, index_y] updated_weights = (self.weight[:, index_y] + step * distance) if self.distance.name == 'cosine': updated_weights /= np.linalg.norm(updated_weights, axis=0) self.weight[:, index_y] = updated_weights error += np.abs(distance).mean() return error / len(X_train)
class LevenbergMarquardt(NoStepSelection, GradientDescent): """ Levenberg-Marquardt algorithm. Notes ----- * Network minimizes only Mean Squared Error function. Parameters ---------- mu : float Control invertion for J.T * J matrix, defaults to `0.1`. mu_update_factor : float Factor to decrease the mu if update decrese the error, otherwise increse mu by the same factor. Defaults to ``1.2`` error: {{'mse'}} Levenberg-Marquardt works only for quadratic functions. Defaults to ``mse``. {GradientDescent.addons} {ConstructableNetwork.connection} {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {SupervisedLearning.train} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> lmnet = algorithms.LevenbergMarquardt( ... (2, 3, 1), ... verbose=False ... ) >>> lmnet.train(x_train, y_train) See Also -------- :network:`GradientDescent` : GradientDescent algorithm. """ mu = BoundedProperty(default=0.01, minval=0) mu_update_factor = BoundedProperty(default=1.2, minval=1) error = ChoiceProperty(default='mse', choices={'mse': errors.mse}) def init_variables(self): super(LevenbergMarquardt, self).init_variables() self.variables.update( mu=theano.shared(name='mu', value=asfloat(self.mu)), last_error=theano.shared(name='last_error', value=np.nan), ) def init_train_updates(self): network_output = self.variables.network_output prediction_func = self.variables.train_prediction_func last_error = self.variables.last_error error_func = self.variables.error_func mu = self.variables.mu new_mu = ifelse( T.lt(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) mse_for_each_sample = T.mean((network_output - prediction_func)**2, axis=1) params = list(iter_parameters(self)) param_vector = parameters2vector(self) J = compute_jaccobian(mse_for_each_sample, params) n_params = J.shape[1] updated_params = param_vector - T.nlinalg.matrix_inverse( J.T.dot(J) + new_mu * T.eye(n_params)).dot( J.T).dot(mse_for_each_sample) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates def on_epoch_start_update(self, epoch): super(LevenbergMarquardt, self).on_epoch_start_update(epoch) last_error = self.errors.last() if last_error is not None: self.variables.last_error.set_value(last_error)
class QuasiNewton(NoStepSelection, GradientDescent): """ Quasi-Newton algorithm optimization. Parameters ---------- {GradientDescent.Parameters} Attributes ---------- {GradientDescent.Attributes} Methods ------- {GradientDescent.Methods} Examples -------- Simple example >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> qnnet = algorithms.QuasiNewton( ... (2, 3, 1), ... update_function='bfgs', ... verbose=False ... ) >>> qnnet.train(x_train, y_train, epochs=10) See Also -------- :network:`GradientDescent` : GradientDescent algorithm. """ update_function = ChoiceProperty(default='bfgs', choices={ 'bfgs': bfgs, 'dfp': dfp, 'psb': psb, 'sr1': sr1, }) h0_scale = NumberProperty(default=1, minval=0) gradient_tol = ProperFractionProperty(default=1e-5) def init_variables(self): super(QuasiNewton, self).init_variables() n_params = sum(p.get_value().size for p in iter_parameters(self)) self.variables.update( inv_hessian=theano.shared( name='inv_hessian', value=asfloat(self.h0_scale * np.eye(int(n_params))), ), prev_params=theano.shared( name='prev_params', value=asfloat(np.zeros(n_params)), ), prev_full_gradient=theano.shared( name='prev_full_gradient', value=asfloat(np.zeros(n_params)), ), ) def init_train_updates(self): network_input = self.variables.network_input network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = list(iter_parameters(self)) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient)) param_delta = -new_inv_hessian.dot(full_gradient) def prediction(step): # TODO: I need to update this ugly solution later updated_params = param_vector + step * param_delta layer_input = network_input start_pos = 0 for layer in self.layers: for param in layer.parameters: end_pos = start_pos + param.size parameter_name, parameter_id = param.name.split('_') setattr( layer, parameter_name, T.reshape(updated_params[start_pos:end_pos], param.shape)) start_pos = end_pos layer_input = layer.output(layer_input) return layer_input def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates
class BasePooling(BaseLayer): """ Base class for the pooling layers. Parameters ---------- size : tuple with 2 integers Factor by which to downscale ``(vertical, horizontal)``. ``(2, 2)`` will halve the image in each dimension. stride : tuple or int. Stride size, which is the number of shifts over rows/cols to get the next pool region. If stride is None, it is considered equal to ds (no overlap on pooling regions). padding : {{``valid``, ``same``}} ``(pad_h, pad_w)``, pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. {BaseLayer.Parameters} Methods ------- {BaseLayer.Methods} Attributes ---------- {BaseLayer.Attributes} """ size = TypedListProperty(required=True, element_type=int) stride = Spatial2DProperty(allow_none=True) padding = ChoiceProperty(choices=('SAME', 'VALID', 'same', 'valid')) pooling_type = None def __init__(self, size, stride=None, padding='valid', name=None): super(BasePooling, self).__init__(name=name) self.size = size self.stride = stride self.padding = padding def fail_if_shape_invalid(self, input_shape): if input_shape and input_shape.ndims != 4: raise LayerConnectionError( "Pooling layer expects an input with 4 " "dimensions, got {} with shape {}. Layer: {}" "".format(len(input_shape), input_shape, self)) def get_output_shape(self, input_shape): input_shape = tf.TensorShape(input_shape) if input_shape.ndims is None: return tf.TensorShape((None, None, None, None)) self.fail_if_shape_invalid(input_shape) n_samples, rows, cols, n_kernels = input_shape row_filter_size, col_filter_size = self.size stride = self.size if self.stride is None else self.stride row_stride, col_stride = stride output_rows = pooling_output_shape( rows, row_filter_size, self.padding, row_stride) output_cols = pooling_output_shape( cols, col_filter_size, self.padding, col_stride) # In python 2, we can get float number after rounding procedure # and it might break processing in the subsequent layers. return tf.TensorShape((n_samples, output_rows, output_cols, n_kernels)) def output(self, input_value, **kwargs): return tf.nn.pool( input_value, self.size, pooling_type=self.pooling_type, padding=self.padding.upper(), strides=self.stride or self.size, data_format="NHWC") def __repr__(self): return self._repr_arguments( self.size, name=self.name, stride=self.stride, padding=self.padding, )
class ConjugateGradient(Backpropagation): """ Conjugate Gradient algorithm. Parameters ---------- update_function : {{'fletcher_reeves', 'polak_ribiere',\ 'hentenes_stiefel', 'conjugate_descent', 'liu_storey', 'dai_yuan'}} Update function. Defaults to ``fletcher_reeves``. {optimizations} {full_params} Methods ------- {supervised_train} {raw_predict} {full_methods} Examples -------- >>> import numpy as np >>> np.random.seed(0) >>> >>> from sklearn import datasets, preprocessing >>> from sklearn.cross_validation import train_test_split >>> from neupy import algorithms, layers >>> from neupy.functions import rmsle >>> >>> dataset = datasets.load_boston() >>> data, target = dataset.data, dataset.target >>> >>> data_scaler = preprocessing.MinMaxScaler() >>> target_scaler = preprocessing.MinMaxScaler() >>> >>> x_train, x_test, y_train, y_test = train_test_split( ... data_scaler.fit_transform(data), ... target_scaler.fit_transform(target), ... train_size=0.85 ... ) >>> >>> cgnet = algorithms.ConjugateGradient( ... connection=[ ... layers.SigmoidLayer(13), ... layers.SigmoidLayer(50), ... layers.OutputLayer(1), ... ], ... search_method='golden', ... update_function='fletcher_reeves', ... optimizations=[algorithms.LinearSearch], ... verbose=False ... ) >>> >>> cgnet.train(x_train, y_train, epochs=100) >>> y_predict = cgnet.predict(x_test) >>> >>> real = target_scaler.inverse_transform(y_test) >>> predicted = target_scaler.inverse_transform(y_predict) >>> >>> error = rmsle(real, predicted.round(1)) >>> error 0.20752676697596578 See Also -------- :network:`Backpropagation`: Backpropagation algorithm. :network:`LinearSearch`: Linear Search important algorithm for step \ selection in Conjugate Gradient algorithm. """ update_function = ChoiceProperty(default='fletcher_reeves', choices={ 'fletcher_reeves': fletcher_reeves, 'polak_ribiere': polak_ribiere, 'hentenes_stiefel': hentenes_stiefel, 'conjugate_descent': conjugate_descent, 'liu_storey': liu_storey, 'dai_yuan': dai_yuan, }) def init_layers(self): super(ConjugateGradient, self).init_layers() self.n_weights = sum(mul(*layer.size) for layer in self.train_layers) def get_weight_delta(self, output_train, target_train): gradients = super(ConjugateGradient, self).get_gradient(output_train, target_train) epoch = self.epoch gradient = matrix_list_in_one_vector(gradients) weight_delta = -gradient if epoch > 1 and epoch % self.n_weights == 0: # Must reset after every N iteration, because algoritm # lose conjugacy. self.logs.info("TRAIN", "Reset conjugate gradient vector") del self.prev_gradient if hasattr(self, 'prev_gradient'): gradient_old = self.prev_gradient weight_delta_old = self.prev_weight_delta beta = self.update_function(gradient_old, gradient, weight_delta_old) weight_delta += beta * weight_delta_old weight_deltas = vector_to_list_of_matrix( weight_delta, (layer.size for layer in self.train_layers)) self.prev_weight_delta = weight_delta.copy() self.prev_gradient = gradient.copy() return weight_deltas
class SOFM(Kohonen): """ Self-Organizing Feature Map (SOFM). Parameters ---------- {BaseAssociative.n_inputs} {BaseAssociative.n_outputs} learning_radius : int Learning radius. features_grid : list, tuple, None Feature grid defines shape of the output neurons. The new shape should be compatible with the number of outputs. Defaults to ``(n_outputs, 1)``. transform : {{``linear``, ``euclid``, ``cos``}} Indicate transformation operation related to the input layer. - The ``linear`` value mean that input data would be multiplied by weights in typical way. - The ``euclid`` method will identify the closest weight vector to the input one. - The ``cos`` transformation identifies cosine similarity between input dataset and network's weights. Defaults to ``linear``. {BaseAssociative.weight} {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms, environment >>> >>> environment.reproducible() >>> >>> data = np.array([ ... [0.1961, 0.9806], ... [-0.1961, 0.9806], ... [-0.5812, -0.8137], ... [-0.8137, -0.5812], ... ]) >>> >>> sofmnet = algorithms.SOFM( ... n_inputs=2, ... n_outputs=2, ... step=0.1, ... learning_radius=0, ... features_grid=(2, 1), ... ) >>> sofmnet.train(data, epochs=100) >>> sofmnet.predict(data) array([[0, 1], [0, 1], [1, 0], [1, 0]]) """ learning_radius = IntProperty(default=0, minval=0) features_grid = TypedListProperty(allow_none=True, default=None) transform = ChoiceProperty(default='linear', choices={ 'linear': np.dot, 'euclid': neg_euclid_distance, 'cos': cosine_similarity, }) def __init__(self, **options): super(SOFM, self).__init__(**options) invalid_feature_grid = (self.features_grid is not None and mul(*self.features_grid) != self.n_outputs) if invalid_feature_grid: raise ValueError( "Feature grid should contain the same number of elements as " "in the output layer: {0}, but found: {1} ({2}x{3})" "".format(self.n_outputs, mul(*self.features_grid), self.features_grid[0], self.features_grid[1])) if self.features_grid is None: self.features_grid = (self.n_outputs, 1) def predict_raw(self, input_data): input_data = format_data(input_data) n_samples = input_data.shape[0] output = np.zeros((n_samples, self.n_outputs)) for i, input_row in enumerate(input_data): output[i, :] = self.transform(input_row.reshape(1, -1), self.weight) return output def update_indexes(self, layer_output): neuron_winner = layer_output.argmax(axis=1) feature_bound = self.features_grid[1] output_with_neightbours = neuron_neighbours( np.reshape(layer_output, self.features_grid), (neuron_winner // feature_bound, neuron_winner % feature_bound), self.learning_radius) index_y, _ = np.nonzero( np.reshape(output_with_neightbours, (self.n_outputs, 1))) return index_y
class A(Configurable): choice = ChoiceProperty(choices='test')
class A(Configurable): choice = ChoiceProperty(choices=['one', 'two', 'three'], default='two')
class A(Configurable): choice = ChoiceProperty(choices={'one': 1, 'two': 2, 'three': 3})
class LevenbergMarquardt(BaseOptimizer): """ Levenberg-Marquardt algorithm is a variation of the Newton's method. It minimizes MSE error. The algorithm approximates Hessian matrix using dot product between two jacobian matrices. Notes ----- - Method requires all training data during propagation, which means it's not allowed to use mini-batches. - Network minimizes only Mean Squared Error (MSE) loss function. - Efficient for small training datasets, because it computes gradient per each sample separately. - Efficient for small-sized networks. Parameters ---------- {BaseOptimizer.network} mu : float Control invertion for J.T * J matrix, defaults to ``0.1``. mu_update_factor : float Factor to decrease the mu if update decrese the error, otherwise increse mu by the same factor. Defaults to ``1.2`` error : {{``mse``}} Levenberg-Marquardt works only for quadratic functions. Defaults to ``mse``. {BaseOptimizer.show_epoch} {BaseOptimizer.shuffle_data} {BaseOptimizer.signals} {BaseOptimizer.verbose} Attributes ---------- {BaseOptimizer.Attributes} Methods ------- {BaseOptimizer.Methods} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> from neupy.layers import * >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> network = Input(2) >> Sigmoid(3) >> Sigmoid(1) >>> optimizer = algorithms.LevenbergMarquardt(network) >>> optimizer.train(x_train, y_train) See Also -------- :network:`BaseOptimizer` : BaseOptimizer algorithm. """ mu = BoundedProperty(default=0.01, minval=0) mu_update_factor = BoundedProperty(default=1.2, minval=1) loss = ChoiceProperty(default='mse', choices={'mse': objectives.mse}) step = WithdrawProperty() regularizer = WithdrawProperty() def init_functions(self): self.variables.update( mu=tf.Variable(self.mu, name='lev-marq/mu'), last_error=tf.Variable(np.nan, name='lev-marq/last-error'), ) super(LevenbergMarquardt, self).init_functions() def init_train_updates(self): training_outputs = self.network.training_outputs last_error = self.variables.last_error error_func = self.variables.loss mu = self.variables.mu new_mu = tf.where( tf.less(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) err_for_each_sample = flatten((self.target - training_outputs)**2) variables = self.network.variables params = [var for var in variables.values() if var.trainable] param_vector = make_single_vector(params) J = compute_jacobian(err_for_each_sample, params) J_T = tf.transpose(J) n_params = J.shape[1] parameter_update = tf.matrix_solve( tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value), tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1))) updated_params = param_vector - flatten(parameter_update) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates def one_training_update(self, X_train, y_train): if self.errors.train: last_error = self.errors.train[-1] self.variables.last_error.load(last_error, tensorflow_session()) return super(LevenbergMarquardt, self).one_training_update(X_train, y_train)
class ConjugateGradient(NoMultipleStepSelection, GradientDescent): """ Conjugate Gradient algorithm. Parameters ---------- update_function : {{``fletcher_reeves``, ``polak_ribiere``,\ ``hentenes_stiefel``, ``conjugate_descent``, ``liu_storey``,\ ``dai_yuan``}} Update function. Defaults to ``fletcher_reeves``. {GradientDescent.Parameters} Attributes ---------- {GradientDescent.Attributes} Methods ------- {GradientDescent.Methods} Examples -------- >>> from sklearn import datasets, preprocessing >>> from sklearn.model_selection import train_test_split >>> from neupy import algorithms, layers, estimators, environment >>> >>> environment.reproducible() >>> >>> dataset = datasets.load_boston() >>> data, target = dataset.data, dataset.target >>> >>> data_scaler = preprocessing.MinMaxScaler() >>> target_scaler = preprocessing.MinMaxScaler() >>> >>> x_train, x_test, y_train, y_test = train_test_split( ... data_scaler.fit_transform(data), ... target_scaler.fit_transform(target), ... test_size=0.15 ... ) >>> >>> cgnet = algorithms.ConjugateGradient( ... connection=[ ... layers.Input(13), ... layers.Sigmoid(50), ... layers.Sigmoid(1), ... ], ... search_method='golden', ... update_function='fletcher_reeves', ... addons=[algorithms.LinearSearch], ... verbose=False ... ) >>> >>> cgnet.train(x_train, y_train, epochs=100) >>> y_predict = cgnet.predict(x_test).round(1) >>> >>> real = target_scaler.inverse_transform(y_test) >>> predicted = target_scaler.inverse_transform(y_predict) >>> >>> error = estimators.rmsle(real, predicted) >>> error 0.2472330191179734 See Also -------- :network:`GradientDescent`: GradientDescent algorithm. :network:`LinearSearch`: Linear Search important algorithm for step \ selection in Conjugate Gradient algorithm. """ update_function = ChoiceProperty(default='fletcher_reeves', choices={ 'fletcher_reeves': fletcher_reeves, 'polak_ribiere': polak_ribiere, 'hentenes_stiefel': hentenes_stiefel, 'conjugate_descent': conjugate_descent, 'liu_storey': liu_storey, 'dai_yuan': dai_yuan, }) def init_variables(self): super(ConjugateGradient, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.update(prev_delta=theano.shared( name="conj-grad/prev-delta", value=asfloat(np.zeros(n_parameters)), ), prev_gradient=theano.shared( name="conj-grad/prev-gradient", value=asfloat(np.zeros(n_parameters)), )) def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in parameters]) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta) updated_parameters = param_vector + step * parameter_delta updates = [ (previous_gradient, full_gradient), (previous_delta, parameter_delta), ] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
class ConjugateGradient(NoMultipleStepSelection, GradientDescent): """ Conjugate Gradient algorithm. Parameters ---------- update_function : {{'fletcher_reeves', 'polak_ribiere',\ 'hentenes_stiefel', 'conjugate_descent', 'liu_storey', 'dai_yuan'}} Update function. Defaults to ``fletcher_reeves``. {GradientDescent.addons} {ConstructableNetwork.connection} {ConstructableNetwork.error} {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} Methods ------- {BaseSkeleton.predict} {SupervisedLearning.train} {BaseSkeleton.fit} {BaseNetwork.plot_errors} Examples -------- >>> from sklearn import datasets, preprocessing >>> from sklearn.cross_validation import train_test_split >>> from neupy import algorithms, layers, estimators, environment >>> >>> environment.reproducible() >>> >>> dataset = datasets.load_boston() >>> data, target = dataset.data, dataset.target >>> >>> data_scaler = preprocessing.MinMaxScaler() >>> target_scaler = preprocessing.MinMaxScaler() >>> >>> x_train, x_test, y_train, y_test = train_test_split( ... data_scaler.fit_transform(data), ... target_scaler.fit_transform(target), ... train_size=0.85 ... ) >>> >>> cgnet = algorithms.ConjugateGradient( ... connection=[ ... layers.Sigmoid(13), ... layers.Sigmoid(50), ... layers.RoundedOutput(1, decimals=1), ... ], ... search_method='golden', ... update_function='fletcher_reeves', ... addons=[algorithms.LinearSearch], ... verbose=False ... ) >>> >>> cgnet.train(x_train, y_train, epochs=100) >>> y_predict = cgnet.predict(x_test) >>> >>> real = target_scaler.inverse_transform(y_test) >>> predicted = target_scaler.inverse_transform(y_predict) >>> >>> error = estimators.rmsle(real, predicted) >>> error 0.20752676697596578 See Also -------- :network:`GradientDescent`: GradientDescent algorithm. :network:`LinearSearch`: Linear Search important algorithm for step \ selection in Conjugate Gradient algorithm. """ update_function = ChoiceProperty(default='fletcher_reeves', choices={ 'fletcher_reeves': fletcher_reeves, 'polak_ribiere': polak_ribiere, 'hentenes_stiefel': hentenes_stiefel, 'conjugate_descent': conjugate_descent, 'liu_storey': liu_storey, 'dai_yuan': dai_yuan, }) def init_variables(self): super(ConjugateGradient, self).init_variables() n_parameters = count_parameters(self) self.variables.update(prev_delta=theano.shared( name="prev_delta", value=asfloat(np.zeros(n_parameters)), ), prev_gradient=theano.shared( name="prev_gradient", value=asfloat(np.zeros(n_parameters)), )) def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self) parameters = list(iter_parameters(self)) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta) updated_parameters = param_vector + step * parameter_delta updates = [ (previous_gradient, full_gradient), (previous_delta, parameter_delta), ] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
class LevenbergMarquardt(NoStepSelection, GradientDescent): """ Levenberg-Marquardt algorithm. Notes ----- * Network minimizes only Mean Squared Error function. Parameters ---------- mu : float Control invertion for J.T * J matrix, defaults to `0.1`. mu_update_factor : float Factor to decrease the mu if update decrese the error, otherwise increse mu by the same factor. error: {{'mse'}} Levenberg-Marquardt works only for quadratic functions. Defaults to ``mse``. {GradientDescent.addons} {ConstructableNetwork.connection} {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {SupervisedLearning.train} {BaseSkeleton.fit} {BaseNetwork.plot_errors} Examples -------- Simple example >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> lmnet = algorithms.LevenbergMarquardt( ... (2, 3, 1), ... verbose=False ... ) >>> lmnet.train(x_train, y_train) Diabets dataset example >>> import numpy as np >>> from sklearn import datasets, preprocessing >>> from sklearn.cross_validation import train_test_split >>> from neupy import algorithms, layers >>> from neupy.estimators import rmsle >>> >>> dataset = datasets.load_diabetes() >>> data, target = dataset.data, dataset.target >>> >>> data_scaler = preprocessing.MinMaxScaler() >>> target_scaler = preprocessing.MinMaxScaler() >>> >>> x_train, x_test, y_train, y_test = train_test_split( ... data_scaler.fit_transform(data), ... target_scaler.fit_transform(target), ... train_size=0.85 ... ) >>> >>> # Network ... lmnet = algorithms.LevenbergMarquardt( ... connection=[ ... layers.Sigmoid(10), ... layers.Sigmoid(40), ... layers.Output(1), ... ], ... mu_update_factor=2, ... mu=0.1, ... step=0.25, ... show_epoch=10, ... use_bias=False, ... verbose=False ... ) >>> lmnet.train(x_train, y_train, epochs=100) >>> y_predict = lmnet.predict(x_test) >>> >>> error = rmsle(target_scaler.inverse_transform(y_test), ... target_scaler.inverse_transform(y_predict).round()) >>> error 0.47548200957888398 See Also -------- :network:`GradientDescent` : GradientDescent algorithm. """ mu = BoundedProperty(default=0.01, minval=0) mu_update_factor = BoundedProperty(default=5, minval=1) error = ChoiceProperty(default='mse', choices={'mse': errors.mse}) def init_variables(self): super(LevenbergMarquardt, self).init_variables() self.variables.update( mu=theano.shared(name='mu', value=asfloat(self.mu)), last_error=theano.shared(name='last_error', value=np.nan), ) def init_train_updates(self): network_output = self.variables.network_output prediction_func = self.variables.train_prediction_func last_error = self.variables.last_error error_func = self.variables.error_func mu = self.variables.mu new_mu = ifelse( T.lt(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) mse_for_each_sample = T.mean((network_output - prediction_func)**2, axis=1) params = list(iter_parameters(self)) param_vector = parameters2vector(self) J = compute_jaccobian(mse_for_each_sample, params) n_params = J.shape[1] updated_params = param_vector - T.nlinalg.matrix_inverse( J.T.dot(J) + new_mu * T.eye(n_params)).dot( J.T).dot(mse_for_each_sample) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates def on_epoch_start_update(self, epoch): super(LevenbergMarquardt, self).on_epoch_start_update(epoch) last_error = self.errors.last() if last_error is not None: self.variables.last_error.set_value(last_error)
class QuasiNewton(WolfeLineSearchForStep, BaseGradientDescent): """ Quasi-Newton algorithm. Every iteration quasi-Network method approximates inverse Hessian matrix with iterative updates. It doesn't have ``step`` parameter. Instead, algorithm applies line search for the step parameter that satisfies strong Wolfe condition. Parameters that control wolfe search start with the ``wolfe_`` prefix. Parameters ---------- update_function : ``bfgs``, ``dfp``, ``sr1`` Update function for the iterative inverse hessian matrix approximation. Defaults to ``bfgs``. - ``bfgs`` - It's rank 2 formula update. It can suffer from round-off error and inaccurate line searches. - ``dfp`` - DFP is a method very similar to BFGS. It's rank 2 formula update. It can suffer from round-off error and inaccurate line searches. - ``sr1`` - Symmetric rank 1 (SR1). Generates update for the inverse hessian matrix adding symmetric rank-1 matrix. It's possible that there is no rank 1 updates for the matrix and in this case update won't be applied and original inverse hessian will be returned. h0_scale : float Default Hessian matrix is an identity matrix. The ``h0_scale`` parameter scales identity matrix. Defaults to ``1``. epsilon : float Controls numerical stability for the ``update_function`` parameter. Defaults to ``1e-7``. {WolfeLineSearchForStep.Parameters} {BaseGradientDescent.connection} {BaseGradientDescent.error} {BaseGradientDescent.show_epoch} {BaseGradientDescent.shuffle_data} {BaseGradientDescent.epoch_end_signal} {BaseGradientDescent.train_end_signal} {BaseGradientDescent.verbose} {BaseGradientDescent.addons} Notes ----- - Method requires all training data during propagation, which means it's not allowed to use mini-batches. Attributes ---------- {BaseGradientDescent.Attributes} Methods ------- {BaseGradientDescent.Methods} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> qnnet = algorithms.QuasiNewton( ... (2, 3, 1), ... update_function='bfgs' ... ) >>> qnnet.train(x_train, y_train, epochs=10) References ---------- [1] Yang Ding, Enkeleida Lushi, Qingguo Li, Investigation of quasi-Newton methods for unconstrained optimization. http://people.math.sfu.ca/~elushi/project_833.pdf [2] Jorge Nocedal, Stephen J. Wright, Numerical Optimization. Chapter 6, Quasi-Newton Methods, p. 135-163 """ update_function = ChoiceProperty(default='bfgs', choices={ 'bfgs': bfgs, 'dfp': dfp, 'sr1': sr1, }) epsilon = NumberProperty(default=1e-7, minval=0) h0_scale = NumberProperty(default=1, minval=0) step = WithdrawProperty() def init_variables(self): super(QuasiNewton, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.update( inv_hessian=tf.Variable( asfloat(self.h0_scale) * tf.eye(n_parameters), name="quasi-newton/inv-hessian", dtype=tf.float32, ), prev_params=tf.Variable( tf.zeros([n_parameters]), name="quasi-newton/prev-params", dtype=tf.float32, ), prev_full_gradient=tf.Variable( tf.zeros([n_parameters]), name="quasi-newton/prev-full-gradient", dtype=tf.float32, ), ) def init_train_updates(self): inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = parameter_values(self.connection) param_vector = make_single_vector(params) gradients = tf.gradients(self.variables.error_func, params) full_gradient = make_single_vector(gradients) new_inv_hessian = tf.where( tf.equal(self.variables.epoch, 1), inv_hessian, self.update_function(inv_H=inv_hessian, delta_w=param_vector - prev_params, delta_grad=full_gradient - prev_full_gradient, epsilon=self.epsilon)) param_delta = -dot(new_inv_hessian, full_gradient) step = self.find_optimal_step(param_vector, param_delta) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) # We have to compute these values first, otherwise # parallelization in tensorflow can mix update order # and, for example, previous gradient can be equal to # current gradient value. It happens because tensorflow # try to execute operations in parallel. required_variables = [new_inv_hessian, param_vector, full_gradient] with tf.control_dependencies(required_variables): updates.extend([ inv_hessian.assign(new_inv_hessian), prev_params.assign(param_vector), prev_full_gradient.assign(full_gradient), ]) return updates
class B(Configurable): choice = ChoiceProperty(choices=[])
class LevenbergMarquardt(StepSelectionBuiltIn, BaseGradientDescent): """ Levenberg-Marquardt algorithm is a variation of the Newton's method. It minimizes MSE error. The algorithm approximates Hessian matrix using dot product between two jacobian matrices. Notes ----- - Method requires all training data during propagation, which means it's not allowed to use mini-batches. - Network minimizes only Mean Squared Error (MSE) loss function. - Efficient for small training datasets, because it computes gradient per each sample separately. - Efficient for small-sized networks. Parameters ---------- {BaseGradientDescent.connection} mu : float Control invertion for J.T * J matrix, defaults to ``0.1``. mu_update_factor : float Factor to decrease the mu if update decrese the error, otherwise increse mu by the same factor. Defaults to ``1.2`` error : {{``mse``}} Levenberg-Marquardt works only for quadratic functions. Defaults to ``mse``. {BaseGradientDescent.show_epoch} {BaseGradientDescent.shuffle_data} {BaseGradientDescent.epoch_end_signal} {BaseGradientDescent.train_end_signal} {BaseGradientDescent.verbose} {BaseGradientDescent.addons} Attributes ---------- {BaseGradientDescent.Attributes} Methods ------- {BaseGradientDescent.Methods} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> lmnet = algorithms.LevenbergMarquardt((2, 3, 1)) >>> lmnet.train(x_train, y_train) See Also -------- :network:`BaseGradientDescent` : BaseGradientDescent algorithm. """ mu = BoundedProperty(default=0.01, minval=0) mu_update_factor = BoundedProperty(default=1.2, minval=1) error = ChoiceProperty(default='mse', choices={'mse': errors.mse}) step = WithdrawProperty() def init_variables(self): super(LevenbergMarquardt, self).init_variables() self.variables.update( mu=tf.Variable(self.mu, name='lev-marq/mu'), last_error=tf.Variable(np.nan, name='lev-marq/last-error'), ) def init_train_updates(self): network_output = self.variables.network_output prediction_func = self.variables.train_prediction_func last_error = self.variables.last_error error_func = self.variables.error_func mu = self.variables.mu new_mu = tf.where( tf.less(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) err_for_each_sample = flatten((network_output - prediction_func) ** 2) params = parameter_values(self.connection) param_vector = make_single_vector(params) J = compute_jacobian(err_for_each_sample, params) J_T = tf.transpose(J) n_params = J.shape[1] parameter_update = tf.matrix_solve( tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value), tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1)) ) updated_params = param_vector - flatten(parameter_update) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates def on_epoch_start_update(self, epoch): super(LevenbergMarquardt, self).on_epoch_start_update(epoch) last_error = self.errors.last() if last_error is not None: self.variables.last_error.load(last_error, tensorflow_session())
def test_choice_property_on_unknown_instance(self): prop = ChoiceProperty(choices=[1, 2, 3]) self.assertEqual(None, prop.__get__(None, None))
class QuasiNewton(StepSelectionBuiltIn, GradientDescent): """ Quasi-Newton algorithm optimization. Parameters ---------- update_function : {{'bfgs', 'dfp', 'psb', 'sr1'}} Update function. Defaults to ``bfgs``. h0_scale : float Default Hessian matrix is an identity matrix. The ``h0_scale`` parameter scales identity matrix. Defaults to ``1``. {GradientDescent.connection} {GradientDescent.error} {GradientDescent.show_epoch} {GradientDescent.shuffle_data} {GradientDescent.epoch_end_signal} {GradientDescent.train_end_signal} {GradientDescent.verbose} {GradientDescent.addons} Attributes ---------- {GradientDescent.Attributes} Methods ------- {GradientDescent.Methods} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> qnnet = algorithms.QuasiNewton( ... (2, 3, 1), ... update_function='bfgs' ... ) >>> qnnet.train(x_train, y_train, epochs=10) See Also -------- :network:`GradientDescent` : GradientDescent algorithm. """ update_function = ChoiceProperty(default='bfgs', choices={ 'bfgs': bfgs, 'dfp': dfp, 'psb': psb, 'sr1': sr1, }) h0_scale = NumberProperty(default=1, minval=0) step = WithdrawProperty() def init_variables(self): super(QuasiNewton, self).init_variables() n_params = count_parameters(self.connection) self.variables.update( inv_hessian=theano.shared( name='algo:quasi-newton/matrix:inv-hessian', value=asfloat(self.h0_scale * np.eye(int(n_params))), ), prev_params=theano.shared( name='algo:quasi-newton/vector:prev-params', value=asfloat(np.zeros(n_params)), ), prev_full_gradient=theano.shared( name='algo:quasi-newton/vector:prev-full-gradient', value=asfloat(np.zeros(n_params)), ), ) def init_train_updates(self): network_inputs = self.variables.network_inputs network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in params]) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient)) param_delta = -new_inv_hessian.dot(full_gradient) layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): updated_params = param_vector + step * param_delta # This trick allow us to replace shared variables # with theano variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + param.size updated_param_value = T.reshape( updated_params[start_pos:end_pos], param.shape) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(*network_inputs) # Restore previous parameters for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates
class ConjugateGradient(WolfeLineSearchForStep, BaseOptimizer): """ Conjugate Gradient algorithm. Parameters ---------- update_function : ``fletcher_reeves``, ``polak_ribiere``,\ ``hentenes_stiefel``, ``dai_yuan``, ``liu_storey`` Update function. Defaults to ``fletcher_reeves``. epsilon : float Ensures computational stability during the division in ``update_function`` when denominator is very small number. Defaults to ``1e-7``. {WolfeLineSearchForStep.Parameters} {BaseOptimizer.network} {BaseOptimizer.loss} {BaseOptimizer.show_epoch} {BaseOptimizer.shuffle_data} {BaseOptimizer.signals} {BaseOptimizer.verbose} {BaseOptimizer.regularizer} Attributes ---------- {BaseOptimizer.Attributes} Methods ------- {BaseOptimizer.Methods} Examples -------- >>> from sklearn import datasets, preprocessing >>> from sklearn.model_selection import train_test_split >>> from neupy import algorithms, layers >>> >>> dataset = datasets.load_boston() >>> data, target = dataset.data, dataset.target >>> >>> data_scaler = preprocessing.MinMaxScaler() >>> target_scaler = preprocessing.MinMaxScaler() >>> >>> x_train, x_test, y_train, y_test = train_test_split( ... data_scaler.fit_transform(data), ... target_scaler.fit_transform(target), ... test_size=0.15 ... ) >>> >>> cgnet = algorithms.ConjugateGradient( ... network=[ ... layers.Input(13), ... layers.Sigmoid(50), ... layers.Sigmoid(1), ... ], ... update_function='fletcher_reeves', ... verbose=False ... ) >>> >>> cgnet.train(x_train, y_train, epochs=100) >>> y_predict = cgnet.predict(x_test).round(1) >>> >>> real = target_scaler.inverse_transform(y_test) >>> predicted = target_scaler.inverse_transform(y_predict) References ---------- [1] Jorge Nocedal, Stephen J. Wright, Numerical Optimization. Chapter 5, Conjugate Gradient Methods, p. 101-133 """ epsilon = NumberProperty(default=1e-7, minval=0) update_function = ChoiceProperty( default='fletcher_reeves', choices={ 'fletcher_reeves': fletcher_reeves, 'polak_ribiere': polak_ribiere, 'hentenes_stiefel': hentenes_stiefel, 'liu_storey': liu_storey, 'dai_yuan': dai_yuan, } ) step = WithdrawProperty() def init_functions(self): n_parameters = self.network.n_parameters self.variables.update( prev_delta=tf.Variable( tf.zeros([n_parameters]), name="conj-grad/prev-delta", dtype=tf.float32, ), prev_gradient=tf.Variable( tf.zeros([n_parameters]), name="conj-grad/prev-gradient", dtype=tf.float32, ), iteration=tf.Variable( asfloat(self.last_epoch), name='conj-grad/current-iteration', dtype=tf.float32 ), ) super(ConjugateGradient, self).init_functions() def init_train_updates(self): iteration = self.variables.iteration previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = self.network.n_parameters variables = self.network.variables parameters = [var for var in variables.values() if var.trainable] param_vector = make_single_vector(parameters) gradients = tf.gradients(self.variables.loss, parameters) full_gradient = make_single_vector(gradients) beta = self.update_function( previous_gradient, full_gradient, previous_delta, self.epsilon) parameter_delta = tf.where( tf.equal(tf.mod(iteration, n_parameters), 0), -full_gradient, -full_gradient + beta * previous_delta ) step = self.find_optimal_step(param_vector, parameter_delta) updated_parameters = param_vector + step * parameter_delta updates = setup_parameter_updates(parameters, updated_parameters) # We have to compute these values first, otherwise # parallelization, in tensorflow, can mix update order # and, for example, previous gradient can be equal to # current gradient value. It happens because tensorflow # try to execute operations in parallel. with tf.control_dependencies([full_gradient, parameter_delta]): updates.extend([ previous_gradient.assign(full_gradient), previous_delta.assign(parameter_delta), iteration.assign(iteration + 1), ]) return updates
class LinearSearch(SingleStepConfigurable): """ Linear search for the step selection. Basicly this algorithms try different steps and compute your predicted error, after few iteration it will chose one which was better. Parameters ---------- tol : float Tolerance for termination, default to ``0.1``. Can be any number greater that zero. search_method : 'gloden', 'brent' Linear search method. Can be ``golden`` for golden search or ``brent`` for Brent's search, default to ``golden``. Warns ----- {SingleStepConfigurable.Warns} Examples -------- >>> from sklearn import datasets, preprocessing >>> from sklearn.cross_validation import train_test_split >>> from neupy import algorithms, layers, estimators, environment >>> >>> environment.reproducible() >>> >>> dataset = datasets.load_boston() >>> data, target = dataset.data, dataset.target >>> >>> data_scaler = preprocessing.MinMaxScaler() >>> target_scaler = preprocessing.MinMaxScaler() >>> >>> x_train, x_test, y_train, y_test = train_test_split( ... data_scaler.fit_transform(data), ... target_scaler.fit_transform(target), ... train_size=0.85 ... ) >>> >>> cgnet = algorithms.ConjugateGradient( ... connection=[ ... layers.Input(13), ... layers.Sigmoid(50), ... layers.Sigmoid(1), ... ], ... search_method='golden', ... addons=[algorithms.LinearSearch], ... verbose=False ... ) >>> >>> cgnet.train(x_train, y_train, epochs=100) >>> y_predict = cgnet.predict(x_test).round(1) >>> >>> real = target_scaler.inverse_transform(y_test) >>> predicted = target_scaler.inverse_transform(y_predict) >>> >>> error = estimators.rmsle(real, predicted) >>> error 0.20752676697596578 See Also -------- :network:`ConjugateGradient` """ tol = BoundedProperty(default=0.1, minval=0) maxiter = BoundedProperty(default=10, minval=1) search_method = ChoiceProperty(choices=['golden', 'brent'], default='golden') def train_epoch(self, input_train, target_train): train_epoch = self.methods.train_epoch prediction_error = self.methods.prediction_error params = [param for param, _ in self.init_train_updates()] param_defaults = [param.get_value() for param in params] def setup_new_step(new_step): for param_default, param in zip(param_defaults, params): param.set_value(param_default) self.variables.step.set_value(asfloat(new_step)) train_epoch(input_train, target_train) # Train epoch returns neural network error that was before # training epoch step, that's why we need to compute # it second time. error = prediction_error(input_train, target_train) return np.where(np.isnan(error), np.inf, error) options = {'xtol': self.tol} if self.search_method == 'brent': options['maxiter'] = self.maxiter res = minimize_scalar( setup_new_step, tol=self.tol, method=self.search_method, options=options, ) return setup_new_step(res.x)
class QuasiNewton(Backpropagation): """ Quasi-Newton :network:`Backpropagation` algorithm optimization. Parameters ---------- update_function : {{'bfgs', 'dfp', 'psb', 'sr1'}} Update function. Defaults to ``bfgs``. h0_scale : float Factor that scale indentity matrix H0 on the first iteration step. Defaults to ``1``. gradient_tol : float In the gradient less than this value algorithm will stop training procedure. Defaults to ``1e-5``. {optimizations} {raw_predict_param} {full_params} Methods ------- {supervised_train} {full_methods} Examples -------- Simple example >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> qnnet = algorithms.QuasiNewton( ... (2, 3, 1), ... update_function='bfgs', ... verbose=False ... ) >>> qnnet.train(x_train, y_train) See Also -------- :network:`Backpropagation` : Backpropagation algorithm. """ update_function = ChoiceProperty( default='bfgs', choices={ 'bfgs': bfgs, 'dfp': dfp, 'psb': psb, 'sr1': sr1, } ) h0_scale = NonNegativeNumberProperty(default=1) gradient_tol = BetweenZeroAndOneProperty(default=1e-5) default_optimizations = [WolfeSearch] def get_weight_delta(self, output_train, target_train): gradients = self.get_gradient(output_train, target_train) gradient = matrix_list_in_one_vector(gradients) if norm(gradient) < self.gradient_tol: raise StopIteration("Gradient norm less than {}" "".format(self.gradient_tol)) train_layers = self.train_layers weight = matrix_list_in_one_vector( (layer.weight for layer in train_layers) ) if hasattr(self, 'prev_gradient'): # In first epoch we didn't have previous weights and # gradients. For this reason we skip quasi coefitient # computation. inverse_hessian = self.update_function( self.prev_inverse_hessian, weight - self.prev_weight, gradient - self.prev_gradient ) else: inverse_hessian = self.h0_scale * eye(weight.size, dtype=int) self.prev_weight = weight.copy() self.prev_gradient = gradient.copy() self.prev_inverse_hessian = inverse_hessian return vector_to_list_of_matrix( -inverse_hessian.dot(gradient), (layer.size for layer in train_layers) )
class ParameterBasedLayer(BaseLayer): """ Layer that creates weight and bias parameters. Parameters ---------- size : int Layer input size. weight : 2D array-like or None Define your layer weights. ``None`` means that your weights will be generate randomly dependence on property ``init_method``. ``None`` by default. bias : 1D array-like or None Define your layer bias. ``None`` means that your weights will be generate randomly dependence on property ``init_method``. init_method : {{'bounded', 'normal', 'ortho', 'xavier_normal',\ 'xavier_uniform', 'he_normal', 'he_uniform'}} Weight initialization method. Defaults to ``xavier_normal``. * ``normal`` will generate random weights from normal distribution \ with standard deviation equal to ``0.01``. * ``bounded`` generate random weights from Uniform distribution. * ``ortho`` generate random orthogonal matrix. * ``xavier_normal`` generate random matrix from normal distrubtion \ where variance equal to :math:`\\frac{{2}}{{fan_{{in}} + \ fan_{{out}}}}`. Where :math:`fan_{{in}}` is a number of \ layer input units and :math:`fan_{{out}}` - number of layer \ output units. * ``xavier_uniform`` generate random matrix from uniform \ distribution \ where :math:`w_{{ij}} \in \ [-\\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}, \ \\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}`]. * ``he_normal`` generate random matrix from normal distrubtion \ where variance equal to :math:`\\frac{{2}}{{fan_{{in}}}}`. \ Where :math:`fan_{{in}}` is a number of layer input units. * ``he_uniform`` generate random matrix from uniformal \ distribution where :math:`w_{{ij}} \in [\ -\\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}, \ \\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}]` bounds : tuple of two float Available only for ``init_method`` equal to ``bounded``. Value identify minimum and maximum possible value in random weights. Defaults to ``(0, 1)``. """ size = IntProperty(minval=1) weight = SharedArrayProperty(default=None) bias = SharedArrayProperty(default=None) bounds = TypedListProperty(default=(0, 1), element_type=(int, float)) init_method = ChoiceProperty(default=XAVIER_NORMAL, choices=VALID_INIT_METHODS) def __init__(self, size, **options): if size is not None: options['size'] = size super(ParameterBasedLayer, self).__init__(**options) def weight_shape(self): output_size = self.relate_to_layer.size return (self.size, output_size) def bias_shape(self): output_size = self.relate_to_layer.size return (output_size,) def initialize(self): super(ParameterBasedLayer, self).initialize() self.weight = create_shared_parameter( value=self.weight, name='weight_{}'.format(self.layer_id), shape=self.weight_shape(), bounds=self.bounds, init_method=self.init_method, ) self.bias = create_shared_parameter( value=self.bias, name='bias_{}'.format(self.layer_id), shape=self.bias_shape(), bounds=self.bounds, init_method=self.init_method, ) self.parameters = [self.weight, self.bias] def __repr__(self): classname = self.__class__.__name__ return '{name}({size})'.format(name=classname, size=self.size)
class SOFM(Kohonen): """ Self-Organizing Feature Map. Parameters ---------- learning_radius : int Learning radius. features_grid : int Learning radius. transform : {{'linear', 'euclid', 'cos'}} Indicate transformation operation related to the input layer. The ``linear`` value mean that input data would be multiplied by weights in typical way. The ``euclid`` method will identify the closest weight vector to the input one. The ``cos`` made the same as ``euclid``, but instead of euclid distance it uses cosine similarity. Defaults to ``linear``. {BaseAssociative.n_inputs} {BaseAssociative.n_outputs} {BaseAssociative.weight} {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} """ learning_radius = IntProperty(default=0, minval=0) features_grid = TypedListProperty() transform = ChoiceProperty(default='linear', choices={ 'linear': dot_product, 'euclid': neg_euclid_distance, 'cos': cosine_similarity, }) def __init__(self, **options): super(SOFM, self).__init__(**options) invalid_feature_grid = (self.features_grid is not None and mul(*self.features_grid) != self.n_outputs) if invalid_feature_grid: raise ValueError( "Feature grid should contain the same number of elements as " "in the output layer: {0}, but found: {1} ({2}x{3})" "".format(self.n_outputs, mul(*self.features_grid), self.features_grid[0], self.features_grid[1])) def init_properties(self): super(SOFM, self).init_properties() if self.features_grid is None: self.features_grid = (self.n_outputs, 1) def predict_raw(self, input_data): input_data = format_data(input_data) output = np.zeros((input_data.shape[0], self.n_outputs)) for i, input_row in enumerate(input_data): output[i, :] = self.transform(input_row.reshape(1, -1), self.weight) return output def update_indexes(self, layer_output): neuron_winner = layer_output.argmax(axis=1) feature_bound = self.features_grid[1] output_with_neightbours = neuron_neighbours( np.reshape(layer_output, self.features_grid), (neuron_winner // feature_bound, neuron_winner % feature_bound), self.learning_radius) index_y, _ = np.nonzero( np.reshape(output_with_neightbours, (self.n_outputs, 1))) return index_y
class LinearSearch(SingleStep): """ Linear search for the step selection. Basicly this algorithms try different steps and compute your predicted error, after few iteration it will chose one which was better. Parameters ---------- tol : float Tolerance for termination, default to ``0.3``. Can be any number greater that zero. search_method : 'gloden', 'brent' Linear search method. Can be ``golden`` for golden search or ``brent`` for Brent's search, default to ``golden``. Attributes ---------- {first_step} Warns ----- {bp_depending} Examples -------- >>> import numpy as np >>> np.random.seed(0) >>> >>> from sklearn import datasets, preprocessing >>> from sklearn.cross_validation import train_test_split >>> from neupy import algorithms, layers >>> from neupy.functions import rmsle >>> >>> dataset = datasets.load_boston() >>> data, target = dataset.data, dataset.target >>> >>> data_scaler = preprocessing.MinMaxScaler() >>> target_scaler = preprocessing.MinMaxScaler() >>> >>> x_train, x_test, y_train, y_test = train_test_split( ... data_scaler.fit_transform(data), ... target_scaler.fit_transform(target), ... train_size=0.85 ... ) >>> >>> cgnet = algorithms.ConjugateGradient( ... connection=[ ... layers.SigmoidLayer(13), ... layers.SigmoidLayer(50), ... layers.OutputLayer(1), ... ], ... search_method='golden', ... optimizations=[algorithms.LinearSearch], ... verbose=False ... ) >>> >>> cgnet.train(x_train, y_train, epochs=100) >>> y_predict = cgnet.predict(x_test) >>> >>> real = target_scaler.inverse_transform(y_test) >>> predicted = target_scaler.inverse_transform(y_predict) >>> >>> error = rmsle(real, predicted.round(1)) >>> error 0.20752676697596578 See Also -------- :network:`ConjugateGradient` """ tol = NonNegativeNumberProperty(default=0.3) search_method = ChoiceProperty(choices=['golden', 'brent'], default='golden') def set_weights(self, new_weights): for layer, new_weight in zip(self.train_layers, new_weights): layer.weight = new_weight.copy() def check_updates(self, new_step, weights, delta): self.set_weights(weights) self.step = new_step super(LinearSearch, self).update_weights(delta) predicted_output = self.predict(self.input_train) return self.error(predicted_output, self.target_train) def update_weights(self, weight_deltas): real_weights = [layer.weight for layer in self.train_layers] res = minimize_scalar(self.check_updates, args=(real_weights, weight_deltas), tol=self.tol, method=self.search_method, options={'xtol': self.tol}) self.set_weights(real_weights) self.step = res.x return super(LinearSearch, self).update_weights(weight_deltas)
class LevenbergMarquardt(StepSelectionBuiltIn, GradientDescent): """ Levenberg-Marquardt algorithm. Notes ----- - Network minimizes only Mean Squared Error function. - Efficient for small training datasets, because it computes gradient per each sample separately. - Efficient for small-sized networks. Parameters ---------- {GradientDescent.connection} mu : float Control invertion for J.T * J matrix, defaults to `0.1`. mu_update_factor : float Factor to decrease the mu if update decrese the error, otherwise increse mu by the same factor. Defaults to ``1.2`` error : {{``mse``}} Levenberg-Marquardt works only for quadratic functions. Defaults to ``mse``. {GradientDescent.show_epoch} {GradientDescent.shuffle_data} {GradientDescent.epoch_end_signal} {GradientDescent.train_end_signal} {GradientDescent.verbose} {GradientDescent.addons} Attributes ---------- {GradientDescent.Attributes} Methods ------- {GradientDescent.Methods} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> x_train = np.array([[1, 2], [3, 4]]) >>> y_train = np.array([[1], [0]]) >>> >>> lmnet = algorithms.LevenbergMarquardt((2, 3, 1)) >>> lmnet.train(x_train, y_train) See Also -------- :network:`GradientDescent` : GradientDescent algorithm. """ mu = BoundedProperty(default=0.01, minval=0) mu_update_factor = BoundedProperty(default=1.2, minval=1) error = ChoiceProperty(default='mse', choices={'mse': errors.mse}) step = WithdrawProperty() def init_variables(self): super(LevenbergMarquardt, self).init_variables() self.variables.update( mu=theano.shared(name='lev-marq/mu', value=asfloat(self.mu)), last_error=theano.shared(name='lev-marq/last-error', value=np.nan), ) def init_train_updates(self): network_output = self.variables.network_output prediction_func = self.variables.train_prediction_func last_error = self.variables.last_error error_func = self.variables.error_func mu = self.variables.mu new_mu = ifelse( T.lt(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) se_for_each_sample = ((network_output - prediction_func)**2).ravel() params = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in params]) J = compute_jacobian(se_for_each_sample, params) n_params = J.shape[1] updated_params = param_vector - slinalg.solve( J.T.dot(J) + new_mu * T.eye(n_params), J.T.dot(se_for_each_sample)) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates def on_epoch_start_update(self, epoch): super(LevenbergMarquardt, self).on_epoch_start_update(epoch) last_error = self.errors.last() if last_error is not None: self.variables.last_error.set_value(last_error)