class RBFKMeans(NoStepSelection, UnsupervisedLearningMixin, BaseNetwork): """ Radial basis function K-means for clustering. Parameters ---------- n_clusters : int number of clusters in dataset. {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Attributes ---------- centers : numpy array [n_clusters, n_futures] After training this property will contain coordinates to cluster centers. Methods ------- {UnsupervisedLearningMixin.train} {BaseSkeleton.predict} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy.algorithms import RBFKMeans >>> >>> data = np.array([ ... [0.11, 0.20], ... [0.25, 0.32], ... [0.64, 0.60], ... [0.12, 0.42], ... [0.70, 0.73], ... [0.30, 0.27], ... [0.43, 0.81], ... [0.44, 0.87], ... [0.12, 0.92], ... [0.56, 0.67], ... [0.36, 0.35], ... ]) >>> rbfk_net = RBFKMeans(n_clusters=2, verbose=False) >>> rbfk_net.train(data, epsilon=1e-5) >>> rbfk_net.centers array([[ 0.228 , 0.312 ], [ 0.48166667, 0.76666667]]) >>> >>> new_data = np.array([[0.1, 0.1], [0.9, 0.9]]) >>> rbfk_net.predict(new_data) array([[ 0.], [ 1.]]) """ n_clusters = IntProperty(minval=2) def __init__(self, **options): self.centers = None super(RBFKMeans, self).__init__(**options) def predict(self, input_data): input_data = format_data(input_data) centers = self.centers classes = zeros((input_data.shape[0], 1)) for i, value in enumerate(input_data): classes[i] = argmin(norm(centers - value, axis=1)) return classes def train_epoch(self, input_train, target_train): centers = self.centers old_centers = centers.copy() output_train = self.predict(input_train) for i, center in enumerate(centers): positions = argwhere(output_train[:, 0] == i) if not np_any(positions): continue class_data = take(input_train, positions, axis=0) centers[i, :] = (1 / len(class_data)) * np_sum(class_data, axis=0) return np_abs(old_centers - centers) def train(self, input_train, epsilon=1e-5, epochs=100): n_clusters = self.n_clusters input_train = format_data(input_train) if input_train.shape[0] <= n_clusters: raise ValueError("Count of clusters must be less than count of " "input data.") self.centers = input_train[:n_clusters, :].copy() super(RBFKMeans, self).train(input_train, epsilon=epsilon, epochs=epochs)
class GrowingNeuralGas(BaseNetwork): """ Growing Neural Gas (GNG) algorithm. Current algorithm has two modifications that hasn't been mentioned in the paper, but they help to speed up training. - The ``n_start_nodes`` parameter provides possibility to increase number of nodes during initialization step. It's useful when algorithm takes a lot of time building up large amount of neurons. - The ``min_distance_for_update`` parameter allows to speed up training when some data samples has neurons very close to them. The ``min_distance_for_update`` parameter controls threshold for the minimum distance for which we will want to update weights. Parameters ---------- n_inputs : int Number of features in each sample. n_start_nodes : int Number of nodes that algorithm generates from the data during the initialization step. Defaults to ``2``. step : float Step (learning rate) for the neuron winner. Defaults to ``0.2``. neighbour_step : float Step (learning rate) for the neurons that connected via edges with neuron winner. This value typically has to be smaller than ``step`` value. Defaults to ``0.05``. max_edge_age : int It means that if edge won't be updated for ``max_edge_age`` iterations than it would be removed. The larger the value the more updates we allow to do before removing edge. Defaults to ``100``. n_iter_before_neuron_added : int Each ``n_iter_before_neuron_added`` weight update algorithm add new neuron. The smaller the value the more frequently algorithm adds new neurons to the network. Defaults to ``1000``. error_decay_rate : float This error decay rate would be applied to every neuron in the graph after each training iteration. It ensures that old errors will be reduced over time. Defaults to ``0.995``. after_split_error_decay_rate : float This decay rate reduces error for neurons with largest errors after algorithm added new neuron. This value typically lower than ``error_decay_rate``. Defaults to ``0.5``. max_nodes : int Maximum number of nodes that would be generated during the training. This parameter won't stop training when maximum number of nodes will be exceeded. Defaults to ``1000``. min_distance_for_update : float Parameter controls for which neurons we want to apply updates. In case if euclidean distance between data sample and closest neurons will be less than the ``min_distance_for_update`` value than update would be skipped for this data sample. Setting value to zero will disable effect provided by this parameter. Defaults to ``0``. {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.signals} {Verbose.verbose} Methods ------- train(X_train, epochs=100) Network learns topological structure of the data. Learned structure will be stored in the ``graph`` attribute. {BaseSkeleton.fit} initialize_nodes(data) Network initializes nodes randomly sampling ``n_start_nodes`` from the data. It would be applied automatically before the training in case if graph is empty. Note: Node re-initialization can reset network. Notes ----- - Unlike other algorithms this network doesn't make predictions. Instead, it learns topological structure of the data in form of the graph. After that training, structure of the network can be extracted from the ``graph`` attribute. - In order to speed up training, it might be useful to increase the ``n_start_nodes`` parameter. - During the training it happens that nodes learn topological structure of one part of the data better than the other, mostly because of the different data sample density in different places. Increasing the ``min_distance_for_update`` can speed up training ignoring updates for the neurons that very close to the data sample. (below specified ``min_distance_for_update`` value). Training can be stopped in case if none of the neurons has been updated during the training epoch. Attributes ---------- graph : NeuralGasGraph instance This attribute stores all neurons and connections between them in the form of undirected graph. {BaseNetwork.Attributes} Examples -------- >>> from neupy import algorithms >>> from sklearn.datasets import make_blobs >>> >>> data, _ = make_blobs( ... n_samples=1000, ... n_features=2, ... centers=2, ... cluster_std=0.4, ... ) >>> >>> neural_gas = algorithms.GrowingNeuralGas( ... n_inputs=2, ... shuffle_data=True, ... verbose=True, ... max_edge_age=10, ... n_iter_before_neuron_added=50, ... max_nodes=100, ... ) >>> neural_gas.graph.n_nodes 100 >>> len(neural_gas.graph.edges) 175 >>> edges = list(neural_gas.graph.edges.keys()) >>> neuron_1, neuron_2 = edges[0] >>> >>> neuron_1.weight array([[-6.77166299, 2.4121606 ]]) >>> neuron_2.weight array([[-6.829309 , 2.27839633]]) References ---------- [1] A Growing Neural Gas Network Learns Topologies, Bernd Fritzke """ n_inputs = IntProperty(minval=1, required=True) n_start_nodes = IntProperty(minval=2, default=2) step = NumberProperty(default=0.2, minval=0) neighbour_step = NumberProperty(default=0.05, minval=0) max_edge_age = IntProperty(default=100, minval=1) max_nodes = IntProperty(default=1000, minval=1) n_iter_before_neuron_added = IntProperty(default=1000, minval=1) after_split_error_decay_rate = ProperFractionProperty(default=0.5) error_decay_rate = ProperFractionProperty(default=0.995) min_distance_for_update = NumberProperty(default=0.0, minval=0) def __init__(self, *args, **kwargs): super(GrowingNeuralGas, self).__init__(*args, **kwargs) self.n_updates = 0 self.graph = NeuralGasGraph() def format_input_data(self, X): is_feature1d = self.n_inputs == 1 X = format_data(X, is_feature1d) if X.ndim != 2: raise ValueError("Cannot make prediction, because input " "data has more than 2 dimensions") n_samples, n_features = X.shape if n_features != self.n_inputs: raise ValueError("Input data expected to have {} features, " "but got {}".format(self.n_inputs, n_features)) return X def initialize_nodes(self, data): self.graph = NeuralGasGraph() for sample in sample_data_point(data, n=self.n_start_nodes): self.graph.add_node(NeuronNode(sample.reshape(1, -1))) def train(self, X_train, epochs=100): X_train = self.format_input_data(X_train) if not self.graph.nodes: self.initialize_nodes(X_train) return super(GrowingNeuralGas, self).train( X_train=X_train, y_train=None, X_test=None, y_test=None, epochs=epochs) def one_training_update(self, X_train, y_train=None): graph = self.graph step = self.step neighbour_step = self.neighbour_step max_nodes = self.max_nodes max_edge_age = self.max_edge_age error_decay_rate = self.error_decay_rate after_split_error_decay_rate = self.after_split_error_decay_rate n_iter_before_neuron_added = self.n_iter_before_neuron_added # We square this value, because we deal with # squared distances during the training. min_distance_for_update = np.square(self.min_distance_for_update) n_samples = len(X_train) total_error = 0 did_update = False for sample in X_train: nodes = graph.nodes weights = np.concatenate([node.weight for node in nodes]) distance = np.linalg.norm(weights - sample, axis=1) neuron_ids = np.argsort(distance) closest_neuron_id, second_closest_id = neuron_ids[:2] closest_neuron = nodes[closest_neuron_id] second_closest = nodes[second_closest_id] total_error += distance[closest_neuron_id] if distance[closest_neuron_id] < min_distance_for_update: continue self.n_updates += 1 did_update = True closest_neuron.error += distance[closest_neuron_id] closest_neuron.weight += step * (sample - closest_neuron.weight) graph.add_edge(closest_neuron, second_closest) for to_neuron in list(graph.edges_per_node[closest_neuron]): edge_id = graph.find_edge_id(to_neuron, closest_neuron) age = graph.edges[edge_id] if age >= max_edge_age: graph.remove_edge(to_neuron, closest_neuron) if not graph.edges_per_node[to_neuron]: graph.remove_node(to_neuron) else: graph.edges[edge_id] += 1 to_neuron.weight += neighbour_step * ( sample - to_neuron.weight) time_to_add_new_neuron = ( self.n_updates % n_iter_before_neuron_added == 0 and graph.n_nodes < max_nodes) if time_to_add_new_neuron: nodes = graph.nodes largest_error_neuron = max(nodes, key=attrgetter('error')) neighbour_neuron = max( graph.edges_per_node[largest_error_neuron], key=attrgetter('error')) largest_error_neuron.error *= after_split_error_decay_rate neighbour_neuron.error *= after_split_error_decay_rate new_weight = 0.5 * ( largest_error_neuron.weight + neighbour_neuron.weight ) new_neuron = NeuronNode(weight=new_weight.reshape(1, -1)) graph.remove_edge(neighbour_neuron, largest_error_neuron) graph.add_node(new_neuron) graph.add_edge(largest_error_neuron, new_neuron) graph.add_edge(neighbour_neuron, new_neuron) for node in graph.nodes: node.error *= error_decay_rate if not did_update and min_distance_for_update != 0 and n_samples > 1: raise StopTraining( "Distance between every data sample and neurons, closest " "to them, is less then {}".format(min_distance_for_update)) return total_error / n_samples def predict(self, *args, **kwargs): raise NotImplementedError( "Growing Neural Gas algorithm doesn't make prediction. " "It only learns graph structure from the data " "(class has `graph` attribute). ")
class LVQ(BaseNetwork): """ Learning Vector Quantization (LVQ) algorithm. Notes ----- - Input data needs to be normalized, because LVQ uses Euclidian distance to find clusters. - Training error is just a ratio of miscassified samples Parameters ---------- n_inputs : int Number of input units. It should be equal to the number of features in the input data set. n_subclasses : int, None Defines total number of subclasses. Values should be greater or equal to the number of classes. ``None`` will set up number of subclasses equal to the number of classes. Defaults to ``None`` (or the same as ``n_classes``). n_classes : int Number of classes in the data set. prototypes_per_class : list, None Defines number of prototypes per each class. For instance, if ``n_classes=3`` and ``n_subclasses=8`` then there are can be 3 subclasses for the first class, 3 for the second one and 2 for the third one (3 + 3 + 2 == 8). The following example can be specified as ``prototypes_per_class=[3, 3, 2]``. There are two rules that apply to this parameter: 1. ``sum(prototypes_per_class) == n_subclasses`` 2. ``len(prototypes_per_class) == n_classes`` The ``None`` value will distribute approximately equal number of subclasses per each class. It's approximately, because in casses when ``n_subclasses % n_classes != 0`` there is no way to distribute equal number of subclasses per each class. Defaults to ``None``. {BaseNetwork.step} n_updates_to_stepdrop : int or None If this options is not equal to ``None`` then after every update LVQ reduces step size and do it until number of applied updates would reach the ``n_updates_to_stepdrop`` value. The minimum possible step size defined in the ``minstep`` parameter. Be aware that number of updates is not the same as number of epochs. LVQ applies update after each propagated sample through the network. Relations between this parameter and maximum number of epochs is following .. code-block:: python n_updates_to_stepdrop = n_samples * n_max_epochs If parameter equal to ``None`` then step size wouldn't be reduced after each update. Defaults to ``None``. minstep : float Step size would never be lower than this value. This property useful only in case if ``n_updates_to_stepdrop`` is not ``None``. Defaults to ``1e-5``. {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {BaseSkeleton.fit} """ n_inputs = IntProperty(minval=1) n_subclasses = IntProperty(minval=2, default=None, allow_none=True) n_classes = IntProperty(minval=2) prototypes_per_class = TypedListProperty(allow_none=True, default=None) weight = Property(expected_type=(np.ndarray, init.Initializer), allow_none=True, default=None) n_updates_to_stepdrop = IntProperty(default=None, allow_none=True, minval=1) minstep = NumberProperty(minval=0, default=1e-5) def __init__(self, **options): self.initialized = False super(LVQ, self).__init__(**options) self.n_updates = 0 if self.n_subclasses is None: self.n_subclasses = self.n_classes if isinstance(self.weight, init.Initializer): weight_shape = (self.n_inputs, self.n_subclasses) self.weight = self.weight.sample(weight_shape) if self.weight is not None: self.initialized = True if self.n_subclasses < self.n_classes: raise ValueError("Number of subclasses should be greater " "or equal to the number of classes. Network " "was defined with {} subclasses and {} classes" "".format(self.n_subclasses, self.n_classes)) if self.prototypes_per_class is None: whole, reminder = divmod(self.n_subclasses, self.n_classes) self.prototypes_per_class = [whole] * self.n_classes if reminder: # Since we have reminder left, it means that we cannot # have an equal number of subclasses per each class, # therefor we will add +1 to randomly selected class. class_indeces = np.random.choice(self.n_classes, reminder, replace=False) for class_index in class_indeces: self.prototypes_per_class[class_index] += 1 if len(self.prototypes_per_class) != self.n_classes: raise ValueError("LVQ defined for classification problem that has " "{} classes, but the `prototypes_per_class` " "variable has defined data for {} classes." "".format(self.n_classes, len(self.prototypes_per_class))) if sum(self.prototypes_per_class) != self.n_subclasses: raise ValueError("Invalid distribution of subclasses for the " "`prototypes_per_class` variable. Got total " "of {} subclasses ({}) instead of {} expected" "".format(sum(self.prototypes_per_class), self.prototypes_per_class, self.n_subclasses)) self.subclass_to_class = [] for class_id, n_prototypes in enumerate(self.prototypes_per_class): self.subclass_to_class.extend([class_id] * n_prototypes) @property def training_step(self): if self.n_updates_to_stepdrop is None: return self.step updates_ratio = (1 - self.n_updates / self.n_updates_to_stepdrop) return self.minstep + (self.step - self.minstep) * updates_ratio def predict(self, input_data): if not self.initialized: raise NotTrained("LVQ network hasn't been trained yet") input_data = format_data(input_data) subclass_to_class = self.subclass_to_class weight = self.weight predictions = [] for input_row in input_data: output = euclid_distance(input_row, weight) winner_subclass = int(output.argmin(axis=1)) predicted_class = subclass_to_class[winner_subclass] predictions.append(predicted_class) return np.array(predictions) def train(self, input_train, target_train, *args, **kwargs): input_train = format_data(input_train) target_train = format_data(target_train) n_input_samples = len(input_train) if n_input_samples <= self.n_subclasses: raise ValueError("Number of training input samples should be " "greater than number of sublcasses. Training " "method recived {} input samples." "".format(n_input_samples)) if not self.initialized: target_classes = sorted(np.unique(target_train).astype(np.int)) expected_classes = list(range(self.n_classes)) if target_classes != expected_classes: raise ValueError("All classes should be integers from the " "range [0, {}], but got the following " "classes instead {}".format( self.n_classes - 1, target_classes)) weights = [] iterator = zip(target_classes, self.prototypes_per_class) for target_class, n_prototypes in iterator: is_valid_class = (target_train[:, 0] == target_class) is_valid_class = is_valid_class.astype('float64') n_samples_per_class = sum(is_valid_class) is_valid_class /= n_samples_per_class if n_samples_per_class <= n_prototypes: raise ValueError("Input data has {0} samples for class-{1}" ". Number of samples per specified " "class-{1} should be greater than {2}." "".format(n_samples_per_class, target_class, n_prototypes)) class_weight_indeces = np.random.choice( np.arange(n_input_samples), n_prototypes, replace=False, p=is_valid_class) class_weight = input_train[class_weight_indeces] weights.extend(class_weight) self.weight = np.array(weights) self.initialized = True super(LVQ, self).train(input_train, target_train, *args, **kwargs) def train_epoch(self, input_train, target_train): weight = self.weight subclass_to_class = self.subclass_to_class n_correct_predictions = 0 for input_row, target in zip(input_train, target_train): step = self.training_step output = euclid_distance(input_row, weight) winner_subclass = int(output.argmin()) predicted_class = subclass_to_class[winner_subclass] weight_update = input_row - weight[winner_subclass, :] is_correct_prediction = (predicted_class == target) if is_correct_prediction: weight[winner_subclass, :] += step * weight_update else: weight[winner_subclass, :] -= step * weight_update n_correct_predictions += is_correct_prediction self.n_updates += 1 n_samples = len(input_train) return 1 - n_correct_predictions / n_samples
class GRU(BaseRNNLayer): """ Gated Recurrent Unit (GRU) Layer. Parameters ---------- {BaseRNNLayer.size} weights : dict or Initializer Weight parameters for different gates. Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`. - In case if application requires the same initialization method for all weights, then it's possible to specify initialization method that would be automaticaly applied to all weight parameters in the GRU layer. .. code-block:: python layers.GRU(2, weights=init.Normal(0.1)) - In case if application requires different initialization values for different weights then it's possible to specify an exact weight by name. .. code-block:: python dict( weight_in_to_updategate=init.XavierUniform(), weight_hid_to_updategate=init.XavierUniform(), weight_in_to_resetgate=init.XavierUniform(), weight_hid_to_resetgate=init.XavierUniform(), weight_in_to_hidden_update=init.XavierUniform(), weight_hid_to_hidden_update=init.XavierUniform(), ) If application requires modification to only one (or multiple) parameter then it's better to specify the one that you need to modify and ignore other parameters .. code-block:: python dict(weight_in_to_updategate=init.Normal(0.1)) Other parameters like ``weight_in_to_resetgate`` will be equal to their default values. biases : dict or Initializer Bias parameters for different gates. Defaults to :class:`Constant(0) <neupy.init.Constant>`. - In case if application requires the same initialization method for all biases, then it's possible to specify initialization method that would be automaticaly applied to all bias parameters in the GRU layer. .. code-block:: python layers.GRU(2, biases=init.Constant(1)) - In case if application requires different initialization values for different weights then it's possible to specify an exact weight by name. .. code-block:: python dict( bias_updategate=init.Constant(0), bias_resetgate=init.Constant(0), bias_hidden_update=init.Constant(0), ) If application requires modification to only one (or multiple) parameter then it's better to specify the one that you need to modify and ignore other parameters .. code-block:: python dict(bias_resetgate=init.Constant(1)) Other parameters like ``bias_updategate`` will be equal to their default values. activation_functions : dict, callable Activation functions for different gates. Defaults to: .. code-block:: python # import theano.tensor as T dict( resetgate=T.nnet.sigmoid, updategate=T.nnet.sigmoid, hidden_update=T.tanh, ) If application requires modification to only one parameter then it's better to specify the one that you need to modify and ignore other parameters .. code-block:: python dict(resetgate=T.tanh) Other parameters like ``updategate`` or ``hidden_update`` will be equal to their default values. learn_init : bool If ``True``, make ``hid_init`` trainable variable. Defaults to ``False``. hid_init : array-like, Theano variable, scalar or Initializer Initializer for initial hidden state (:math:`h_0`). Defaults to :class:`Constant(0) <neupy.init.Constant>`. {BaseRNNLayer.only_return_final} backwards : bool If ``True``, process the sequence backwards and then reverse the output again such that the output from the layer is always from :math:`x_1` to :math:`x_n`. Defaults to ``False``. precompute_input : bool if ``True``, precompute ``input_to_hid`` before iterating through the sequence. This can result in a speed up at the expense of an increase in memory usage. Defaults to ``True``. unroll_scan : bool If ``True`` the recursion is unrolled instead of using scan. For some graphs this gives a significant speed up but it might also consume more memory. When ``unroll_scan=True``, backpropagation always includes the full sequence, so ``n_gradient_steps`` must be set to ``-1`` and the input sequence length must be known at compile time (i.e., cannot be given as ``None``). Defaults to ``False``. {BaseLayer.Parameters} Notes ----- Code was adapted from the `Lasagne <https://github.com/Lasagne/Lasagne>`_ library. Examples -------- Sequence classification .. code-block:: python from neupy import layers, algorithms n_time_steps = 40 n_categories = 20 embedded_size = 10 network = algorithms.RMSProp( [ layers.Input(n_time_steps), layers.Embedding(n_categories, embedded_size), layers.GRU(20), layers.Sigmoid(1), ] ) """ weights = MultiParameterProperty( default=dict( weight_in_to_updategate=init.XavierUniform(), weight_hid_to_updategate=init.XavierUniform(), weight_in_to_resetgate=init.XavierUniform(), weight_hid_to_resetgate=init.XavierUniform(), weight_in_to_hidden_update=init.XavierUniform(), weight_hid_to_hidden_update=init.XavierUniform(), )) biases = MultiParameterProperty( default=dict( bias_updategate=init.Constant(0), bias_resetgate=init.Constant(0), bias_hidden_update=init.Constant(0), )) activation_functions = MultiCallableProperty( default=dict( resetgate=T.nnet.sigmoid, updategate=T.nnet.sigmoid, hidden_update=T.tanh, )) learn_init = Property(default=False, expected_type=bool) hid_init = ParameterProperty(default=init.Constant(0)) backwards = Property(default=False, expected_type=bool) unroll_scan = Property(default=False, expected_type=bool) precompute_input = Property(default=True, expected_type=bool) n_gradient_steps = IntProperty(default=-1) gradient_clipping = NumberProperty(default=0, minval=0) def initialize(self): super(GRU, self).initialize() n_inputs = np.prod(self.input_shape[1:]) weights = self.weights biases = self.biases # Update gate parameters self.weight_in_to_updategate = self.add_parameter( value=weights.weight_in_to_updategate, name='weight_in_to_updategate', shape=(n_inputs, self.size)) self.weight_hid_to_updategate = self.add_parameter( value=weights.weight_hid_to_updategate, name='weight_hid_to_updategate', shape=(self.size, self.size)) self.bias_updategate = self.add_parameter( value=biases.bias_updategate, name='bias_updategate', shape=(self.size,)) # Reset gate parameters self.weight_in_to_resetgate = self.add_parameter( value=weights.weight_in_to_resetgate, name='weight_in_to_resetgate', shape=(n_inputs, self.size)) self.weight_hid_to_resetgate = self.add_parameter( value=weights.weight_hid_to_resetgate, name='weight_hid_to_resetgate', shape=(self.size, self.size)) self.bias_resetgate = self.add_parameter( value=biases.bias_resetgate, name='bias_forgetgate', shape=(self.size,)) # Hidden update gate parameters self.weight_in_to_hidden_update = self.add_parameter( value=weights.weight_in_to_hidden_update, name='weight_in_to_hidden_update', shape=(n_inputs, self.size)) self.weight_hid_to_hidden_update = self.add_parameter( value=weights.weight_hid_to_hidden_update, name='weight_hid_to_hidden_update', shape=(self.size, self.size)) self.bias_hidden_update = self.add_parameter( value=biases.bias_hidden_update, name='bias_hidden_update', shape=(self.size,)) self.add_parameter(value=self.hid_init, shape=(1, self.size), name="hid_init", trainable=self.learn_init) def output(self, input_value): # Treat all dimensions after the second as flattened # feature dimensions if input_value.ndim > 3: input_value = T.flatten(input_value, 3) # Because scan iterates over the first dimension we # dimshuffle to (n_time_steps, n_batch, n_features) input_value = input_value.dimshuffle(1, 0, 2) seq_len, n_batch, _ = input_value.shape # Stack input weight matrices into a (num_inputs, 3 * num_units) # matrix, which speeds up computation weight_in_stacked = T.concatenate([ self.weight_in_to_updategate, self.weight_in_to_resetgate, self.weight_in_to_hidden_update], axis=1) # Same for hidden weight matrices weight_hid_stacked = T.concatenate([ self.weight_hid_to_updategate, self.weight_hid_to_resetgate, self.weight_hid_to_hidden_update], axis=1) # Stack biases into a (3 * num_units) vector bias_stacked = T.concatenate([ self.bias_updategate, self.bias_resetgate, self.bias_hidden_update], axis=0) if self.precompute_input: # Because the input is given for all time steps, we can # precompute_input the inputs dot weight matrices before scanning. # weight_in_stacked is (n_features, 3 * num_units). # Input: (n_time_steps, n_batch, 3 * num_units). input_value = T.dot(input_value, weight_in_stacked) + bias_stacked # When theano.scan calls step, input_n will be # (n_batch, 3 * num_units). We define a slicing function # that extract the input to each GRU gate def slice_w(x, n): s = x[:, n * self.size:(n + 1) * self.size] if self.size == 1: s = T.addbroadcast(s, 1) # Theano cannot infer this by itself return s # Create single recurrent computation step function # input_n is the n'th vector of the input def one_gru_step(input_n, hid_previous, *args): # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, # and W_{hc} h_{t - 1} hid_input = T.dot(hid_previous, weight_hid_stacked) if self.gradient_clipping: input_n = theano.gradient.grad_clip( input_n, -self.gradient_clipping, self.gradient_clipping) hid_input = theano.gradient.grad_clip( hid_input, -self.gradient_clipping, self.gradient_clipping) if not self.precompute_input: # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, # and W_{xc}x_t + b_c input_n = T.dot(input_n, weight_in_stacked) + bias_stacked # Reset and update gates resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0) resetgate = self.activation_functions.resetgate(resetgate) updategate = slice_w(hid_input, 1) + slice_w(input_n, 1) updategate = self.activation_functions.updategate(updategate) # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1}) hidden_update_in = slice_w(input_n, 2) hidden_update_hid = slice_w(hid_input, 2) hidden_update = hidden_update_in + resetgate * hidden_update_hid if self.gradient_clipping: hidden_update = theano.gradient.grad_clip( hidden_update, -self.gradient_clipping, self.gradient_clipping) hidden_update = self.activation_functions.hidden_update( hidden_update) # Compute (1 - u_t)h_{t - 1} + u_t c_t hid = (1 - updategate) * hid_previous + updategate * hidden_update return hid hid_init = T.dot(T.ones((n_batch, 1)), self.hid_init) # The hidden-to-hidden weight matrix is always used in step non_sequences = [weight_hid_stacked] # When we aren't precomputing the input outside of scan, we need to # provide the input weights and biases to the step function if not self.precompute_input: non_sequences += [weight_in_stacked, bias_stacked] if self.unroll_scan: # Retrieve the dimensionality of the incoming layer n_time_steps = self.input_shape[0] # Explicitly unroll the recurrence instead of using scan hid_out, = unroll_scan( fn=one_gru_step, sequences=[input_value], outputs_info=[hid_init], go_backwards=self.backwards, non_sequences=non_sequences, n_steps=n_time_steps) else: # Scan op iterates over first dimension of input and # repeatedly applies the step function hid_out, _ = theano.scan( fn=one_gru_step, sequences=[input_value], outputs_info=[hid_init], go_backwards=self.backwards, non_sequences=non_sequences, truncate_gradient=self.n_gradient_steps, strict=True) # When it is requested that we only return the final sequence step, # we need to slice it out immediately after scan is applied if self.only_return_final: return hid_out[-1] # dimshuffle back to (n_batch, n_time_steps, n_features)) hid_out = hid_out.dimshuffle(1, 0, 2) # if scan is backward reverse the output if self.backwards: hid_out = hid_out[:, ::-1] return hid_out
class SOFM(Kohonen): """ Self-Organizing Feature Map (SOFM). Parameters ---------- {BaseAssociative.n_inputs} {BaseAssociative.n_outputs} learning_radius : int Learning radius. features_grid : list, tuple, None Feature grid defines shape of the output neurons. The new shape should be compatible with the number of outputs. Defaults to ``(n_outputs, 1)``. transform : {{``linear``, ``euclid``, ``cos``}} Indicate transformation operation related to the input layer. - The ``linear`` value mean that input data would be multiplied by weights in typical way. - The ``euclid`` method will identify the closest weight vector to the input one. - The ``cos`` transformation identifies cosine similarity between input dataset and network's weights. Defaults to ``linear``. {BaseAssociative.weight} {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms, environment >>> >>> environment.reproducible() >>> >>> data = np.array([ ... [0.1961, 0.9806], ... [-0.1961, 0.9806], ... [-0.5812, -0.8137], ... [-0.8137, -0.5812], ... ]) >>> >>> sofmnet = algorithms.SOFM( ... n_inputs=2, ... n_outputs=2, ... step=0.1, ... learning_radius=0, ... features_grid=(2, 1), ... ) >>> sofmnet.train(data, epochs=100) >>> sofmnet.predict(data) array([[0, 1], [0, 1], [1, 0], [1, 0]]) """ learning_radius = IntProperty(default=0, minval=0) features_grid = TypedListProperty(allow_none=True, default=None) transform = ChoiceProperty(default='linear', choices={ 'linear': np.dot, 'euclid': neg_euclid_distance, 'cos': cosine_similarity, }) def __init__(self, **options): super(SOFM, self).__init__(**options) invalid_feature_grid = (self.features_grid is not None and mul(*self.features_grid) != self.n_outputs) if invalid_feature_grid: raise ValueError( "Feature grid should contain the same number of elements as " "in the output layer: {0}, but found: {1} ({2}x{3})" "".format(self.n_outputs, mul(*self.features_grid), self.features_grid[0], self.features_grid[1])) if self.features_grid is None: self.features_grid = (self.n_outputs, 1) def predict_raw(self, input_data): input_data = format_data(input_data) n_samples = input_data.shape[0] output = np.zeros((n_samples, self.n_outputs)) for i, input_row in enumerate(input_data): output[i, :] = self.transform(input_row.reshape(1, -1), self.weight) return output def update_indexes(self, layer_output): neuron_winner = layer_output.argmax(axis=1) feature_bound = self.features_grid[1] output_with_neightbours = neuron_neighbours( np.reshape(layer_output, self.features_grid), (neuron_winner // feature_bound, neuron_winner % feature_bound), self.learning_radius) index_y, _ = np.nonzero( np.reshape(output_with_neightbours, (self.n_outputs, 1))) return index_y
class SOFM(Kohonen): """ Self-Organizing Feature Map (SOFM or SOM). Notes ----- - Training data samples should have normalized features. Parameters ---------- {BaseAssociative.n_inputs} n_outputs : int or None Number of outputs. Parameter is optional in case if ``feature_grid`` was specified. .. code-block:: python if n_outputs is None: n_outputs = np.prod(feature_grid) learning_radius : int Parameter defines radius within which we consider all neurons as neighbours to the winning neuron. The bigger the value the more neurons will be updated after each iteration. The ``0`` values means that we don't update neighbour neurons. Defaults to ``0``. std : int, float Parameters controls learning rate for each neighbour. The further neighbour neuron from the winning neuron the smaller that learning rate for it. Learning rate scales based on the factors produced by the normal distribution with center in the place of a winning neuron and standard deviation specified as a parameter. The learning rate for the winning neuron is always equal to the value specified in the ``step`` parameter and for neighbour neurons it's always lower. The bigger the value for this parameter the bigger learning rate for the neighbour neurons. Defaults to ``1``. features_grid : list, tuple, None Feature grid defines shape of the output neurons. The new shape should be compatible with the number of outputs. It means that the following condition should be true: .. code-block:: python np.prod(features_grid) == n_outputs SOFM implementation supports n-dimensional grids. For instance, in order to specify grid as cube instead of the regular rectangular shape we can set up options as the following: .. code-block:: python SOFM( ... features_grid=(5, 5, 5), ... ) Defaults to ``(n_outputs, 1)``. grid_type : {{``rect``, ``hexagon``}} Defines connection type in feature grid. Type defines which neurons we will consider as closest to the winning neuron during the training. - ``rect`` - Connections between neurons will be organized in hexagonal grid. - ``hexagon`` - Connections between neurons will be organized in hexagonal grid. It works only for 1d or 2d grids. Defaults to ``rect``. distance : {{``euclid``, ``dot_product``, ``cos``}} Defines function that will be used to compute closest weight to the input sample. - ``dot_product``: Just a regular dot product between data sample and network's weights - ``euclid``: Euclidean distance between data sample and network's weights - ``cos``: Cosine distance between data sample and network's weights Defaults to ``euclid``. reduce_radius_after : int or None Every specified number of epochs ``learning_radius`` parameter will be reduced by ``1``. Process continues until ``learning_radius`` equal to ``0``. The ``None`` value disables parameter reduction during the training. Defaults to ``100``. reduce_step_after : int or None Defines reduction rate at which parameter ``step`` will be reduced using the following formula: .. code-block:: python step = step / (1 + current_epoch / reduce_step_after) The ``None`` value disables parameter reduction during the training. Defaults to ``100``. reduce_std_after : int or None Defines reduction rate at which parameter ``std`` will be reduced using the following formula: .. code-block:: python std = std / (1 + current_epoch / reduce_std_after) The ``None`` value disables parameter reduction during the training. Defaults to ``100``. weight : array-like, Initializer or {{``init_pca``, ``sample_from_data``}} Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Also, it's possible to initialized weights base on the training data. There are two options: - ``sample_from_data`` - Before starting the training will randomly take number of training samples equal to number of expected outputs. - ``init_pca`` - Before training starts SOFM will applies PCA on a covariance matrix build from the training samples. Weights will be generated based on the two eigenvectors associated with the largest eigenvalues. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.signals} {Verbose.verbose} Methods ------- init_weights(train_data) Initialized weights based on the input data. It works only for the `init_pca` and `sample_from_data` options. For other cases it will throw an error. {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms, utils >>> >>> utils.reproducible() >>> >>> data = np.array([ ... [0.1961, 0.9806], ... [-0.1961, 0.9806], ... [-0.5812, -0.8137], ... [-0.8137, -0.5812], ... ]) >>> >>> sofm = algorithms.SOFM( ... n_inputs=2, ... n_outputs=2, ... step=0.1, ... learning_radius=0 ... ) >>> sofm.train(data, epochs=100) >>> sofm.predict(data) array([[0, 1], [0, 1], [1, 0], [1, 0]]) """ n_outputs = IntProperty(minval=1, allow_none=True, default=None) weight = SOFMWeightParameter(default=init.Normal(), choices={ 'init_pca': linear_initialization, 'sample_from_data': sample_data, }) features_grid = TypedListProperty(allow_none=True, default=None) DistanceParameter = namedtuple('DistanceParameter', 'name func') distance = ChoiceProperty(default='euclid', choices={ 'dot_product': DistanceParameter(name='dot_product', func=np.dot), 'euclid': DistanceParameter(name='euclid', func=neg_euclid_distance), 'cos': DistanceParameter(name='cosine', func=cosine_similarity), }) GridTypeMethods = namedtuple('GridTypeMethods', 'name find_neighbours find_step_scaler') grid_type = ChoiceProperty( default='rect', choices={ 'rect': GridTypeMethods(name='rectangle', find_neighbours=find_neighbours_on_rect_grid, find_step_scaler=find_step_scaler_on_rect_grid), 'hexagon': GridTypeMethods(name='hexagon', find_neighbours=find_neighbours_on_hexagon_grid, find_step_scaler=find_step_scaler_on_hexagon_grid) }) learning_radius = IntProperty(default=0, minval=0) std = NumberProperty(minval=0, default=1) reduce_radius_after = IntProperty(default=100, minval=1, allow_none=True) reduce_std_after = IntProperty(default=100, minval=1, allow_none=True) reduce_step_after = IntProperty(default=100, minval=1, allow_none=True) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) if self.n_outputs is None and self.features_grid is None: raise ValueError("One of the following parameters has to be " "specified: n_outputs, features_grid") elif self.n_outputs is None: self.n_outputs = np.prod(self.features_grid) n_grid_elements = np.prod(self.features_grid) invalid_feature_grid = (self.features_grid is not None and n_grid_elements != self.n_outputs) if invalid_feature_grid: raise ValueError( "Feature grid should contain the same number of elements " "as in the output layer: {0}, but found: {1} (shape: {2})" "".format(self.n_outputs, n_grid_elements, self.features_grid)) if self.features_grid is None: self.features_grid = (self.n_outputs, 1) if len(self.features_grid) > 2 and self.grid_type.name == 'hexagon': raise ValueError("SOFM with hexagon grid type should have " "one or two dimensional feature grid, but got " "{}d instead (shape: {!r})".format( len(self.features_grid), self.features_grid)) is_pca_init = (isinstance(options.get('weight'), six.string_types) and options.get('weight') == 'init_pca') self.initialized = False if not callable(self.weight): super(Kohonen, self).init_weights() self.initialized = True if self.distance.name == 'cosine': self.weight /= np.linalg.norm(self.weight, axis=0) elif is_pca_init and self.grid_type.name != 'rectangle': raise WeightInitializationError( "Cannot apply PCA weight initialization for non-rectangular " "grid. Grid type: {}".format(self.grid_type.name)) def predict_raw(self, X): X = format_data(X, is_feature1d=(self.n_inputs == 1)) if X.ndim != 2: raise ValueError("Only 2D inputs are allowed") n_samples = X.shape[0] output = np.zeros((n_samples, self.n_outputs)) for i, input_row in enumerate(X): output[i, :] = self.distance.func(input_row.reshape(1, -1), self.weight) return output def update_indexes(self, layer_output): neuron_winner = layer_output.argmax(axis=1).item(0) winner_neuron_coords = np.unravel_index(neuron_winner, self.features_grid) learning_radius = self.learning_radius step = self.step std = self.std if self.reduce_radius_after is not None: learning_radius -= self.last_epoch // self.reduce_radius_after learning_radius = max(0, learning_radius) if self.reduce_step_after is not None: step = decay_function(step, self.last_epoch, self.reduce_step_after) if self.reduce_std_after is not None: std = decay_function(std, self.last_epoch, self.reduce_std_after) methods = self.grid_type output_grid = np.reshape(layer_output, self.features_grid) output_with_neighbours = methods.find_neighbours( grid=output_grid, center=winner_neuron_coords, radius=learning_radius) step_scaler = methods.find_step_scaler(grid=output_grid, center=winner_neuron_coords, std=std) index_y, = np.nonzero(output_with_neighbours.reshape(self.n_outputs)) step_scaler = step_scaler.reshape(self.n_outputs) return index_y, step * step_scaler[index_y] def init_weights(self, X_train): if self.initialized: raise WeightInitializationError( "Weights have been already initialized") weight_initializer = self.weight self.weight = weight_initializer(X_train, self.features_grid) self.initialized = True if self.distance.name == 'cosine': self.weight /= np.linalg.norm(self.weight, axis=0) def train(self, X_train, epochs=100): if not self.initialized: self.init_weights(X_train) super(SOFM, self).train(X_train, epochs=epochs) def one_training_update(self, X_train, y_train=None): step = self.step predict = self.predict update_indexes = self.update_indexes error = 0 for input_row in X_train: input_row = np.reshape(input_row, (1, input_row.size)) layer_output = predict(input_row) index_y, step = update_indexes(layer_output) distance = input_row.T - self.weight[:, index_y] updated_weights = (self.weight[:, index_y] + step * distance) if self.distance.name == 'cosine': updated_weights /= np.linalg.norm(updated_weights, axis=0) self.weight[:, index_y] = updated_weights error += np.abs(distance).mean() return error / len(X_train)
class CMAC(BaseNetwork): """ Cerebellar Model Articulation Controller (CMAC) Network based on memory. Notes ----- - Network always use Mean Absolute Error (MAE). - Network works for multi dimensional target values. Parameters ---------- quantization : int Network transforms every input to discrete value. Quantization value contol number of total possible categories after quantization, defaults to ``10``. associative_unit_size : int Number of associative blocks in memory, defaults to ``2``. {BaseNetwork.Parameters} Attributes ---------- weight : dict Network's weight that contains memorized patterns. Methods ------- {BaseSkeleton.predict} train(input_train, target_train, input_test=None, target_test=None,\ epochs=100, epsilon=None) Train network. You can control network's training procedure with ``epochs`` and ``epsilon`` parameters. The ``input_test`` and ``target_test`` should be presented both in case of you need to validate network's training after each iteration. {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy.algorithms import CMAC >>> >>> train_space = np.linspace(0, 2 * np.pi, 100) >>> test_space = np.linspace(np.pi, 2 * np.pi, 50) >>> >>> input_train = np.reshape(train_space, (100, 1)) >>> input_test = np.reshape(test_space, (50, 1)) >>> >>> target_train = np.sin(input_train) >>> target_test = np.sin(input_test) >>> >>> cmac = CMAC( ... quantization=100, ... associative_unit_size=32, ... step=0.2, ... ) ... >>> cmac.train(input_train, target_train, epochs=100) >>> >>> predicted_test = cmac.predict(input_test) >>> cmac.error(target_test, predicted_test) 0.0023639417543036569 """ quantization = IntProperty(default=10, minval=1) associative_unit_size = IntProperty(default=2, minval=2) def __init__(self, **options): self.weight = {} super(CMAC, self).__init__(**options) def predict(self, input_data): input_data = format_data(input_data) get_memory_coords = self.get_memory_coords get_result_by_coords = self.get_result_by_coords predicted = [] for input_sample in self.quantize(input_data): coords = get_memory_coords(input_sample) predicted.append(get_result_by_coords(coords)) return np.array(predicted) def get_result_by_coords(self, coords): return sum(self.weight.setdefault(coord, 0) for coord in coords) / self.associative_unit_size def get_memory_coords(self, quantized_value): assoc_unit_size = self.associative_unit_size for i in range(assoc_unit_size): point = ((quantized_value + i) / assoc_unit_size).astype(int) yield tuple(np.concatenate([point, [i]])) def quantize(self, input_data): return (input_data * self.quantization).astype(int) def train_epoch(self, input_train, target_train): get_memory_coords = self.get_memory_coords get_result_by_coords = self.get_result_by_coords weight = self.weight step = self.step n_samples = input_train.shape[0] quantized_input = self.quantize(input_train) errors = 0 for input_sample, target_sample in zip(quantized_input, target_train): coords = list(get_memory_coords(input_sample)) predicted = get_result_by_coords(coords) error = target_sample - predicted for coord in coords: weight[coord] += step * error errors += abs(error) return errors / n_samples def prediction_error(self, input_data, target_data): predicted = self.predict(input_data) return np.mean(np.abs(predicted - target_data)) def train(self, input_train, target_train, input_test=None, target_test=None, epochs=100, epsilon=None, summary='table'): is_test_data_partialy_missed = ( (input_test is None and target_test is not None) or (input_test is not None and target_test is None)) if is_test_data_partialy_missed: raise ValueError("Input and target test samples are missed. " "They must be defined together or none of them.") input_train = format_data(input_train) target_train = format_data(target_train) if input_test is not None: input_test = format_data(input_test) if target_test is not None: target_test = format_data(target_test) return super(CMAC, self).train(input_train=input_train, target_train=target_train, input_test=input_test, target_test=target_test, epochs=epochs, epsilon=epsilon, summary=summary)
class RBFKMeans(StepSelectionBuiltIn, BaseNetwork): """ Radial basis function K-means for clustering. Parameters ---------- n_clusters : int Number of clusters. {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Attributes ---------- centers : array-like with shape (n_clusters, n_futures) Cluster centers. Methods ------- train(input_train, epsilon=1e-5, epochs=100) Trains network. {BaseSkeleton.predict} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy.algorithms import RBFKMeans >>> >>> data = np.array([ ... [0.11, 0.20], ... [0.25, 0.32], ... [0.64, 0.60], ... [0.12, 0.42], ... [0.70, 0.73], ... [0.30, 0.27], ... [0.43, 0.81], ... [0.44, 0.87], ... [0.12, 0.92], ... [0.56, 0.67], ... [0.36, 0.35], ... ]) >>> rbfk_net = RBFKMeans(n_clusters=2, verbose=False) >>> rbfk_net.train(data, epsilon=1e-5) >>> rbfk_net.centers array([[ 0.228 , 0.312 ], [ 0.48166667, 0.76666667]]) >>> >>> new_data = np.array([[0.1, 0.1], [0.9, 0.9]]) >>> rbfk_net.predict(new_data) array([[ 0.], [ 1.]]) """ n_clusters = IntProperty(minval=2) step = WithdrawProperty() def __init__(self, **options): self.centers = None super(RBFKMeans, self).__init__(**options) def predict(self, input_data): input_data = format_data(input_data) centers = self.centers classes = np.zeros((input_data.shape[0], 1)) for i, value in enumerate(input_data): classes[i] = np.argmin(norm(centers - value, axis=1)) return classes def train_epoch(self, input_train, target_train): centers = self.centers old_centers = centers.copy() output_train = self.predict(input_train) for i, center in enumerate(centers): positions = np.argwhere(output_train[:, 0] == i) if not np.any(positions): continue class_data = np.take(input_train, positions, axis=0) centers[i, :] = (1 / len(class_data)) * np.sum(class_data, axis=0) return np.abs(old_centers - centers) def train(self, input_train, epsilon=1e-5, epochs=100): n_clusters = self.n_clusters input_train = format_data(input_train) n_samples = input_train.shape[0] if n_samples <= n_clusters: raise ValueError("Number of samples in the dataset is less than " "spcified number of clusters. Got {} samples, " "expected at least {} (for {} clusters)" "".format(n_samples, n_clusters + 1, n_clusters)) self.centers = input_train[:n_clusters, :].copy() super(RBFKMeans, self).train(input_train, epsilon=epsilon, epochs=epochs)
class BaseNetwork(BaseSkeleton): """ Base class for Neural Network algorithms. Parameters ---------- step : float Learning rate, defaults to ``0.1``. show_epoch : int This property controls how often the network will display information about training. It has to be defined as positive integer. For instance, number ``100`` mean that network shows summary at 1st, 100th, 200th, 300th ... and last epochs. Defaults to ``1``. shuffle_data : bool If it's ``True`` than training data will be shuffled before the training. Defaults to ``True``. signals : dict, list or function Function that will be triggered after certain events during the training. {Verbose.Parameters} Methods ------- {BaseSkeleton.fit} predict(X) Propagetes input ``X`` through the network and returns produced output. plot_errors(logx=False, show=True, **figkwargs) Using errors collected during the training this method generates plot that can give additional insight into the performance reached during the training. Attributes ---------- errors : list Information about errors. It has two main attributes, namely ``train`` and ``valid``. These attributes provide access to the training and validation errors respectively. last_epoch : int Value equals to the last trained epoch. After initialization it is equal to ``0``. n_updates_made : int Number of training updates applied to the network. """ step = NumberProperty(default=0.1, minval=0) show_epoch = IntProperty(minval=1, default=1) shuffle_data = Property(default=False, expected_type=bool) signals = Property(expected_type=object) def __init__(self, *args, **options): super(BaseNetwork, self).__init__(*args, **options) self.last_epoch = 0 self.n_updates_made = 0 self.errors = base_signals.ErrorCollector() signals = list( as_tuple( base_signals.ProgressbarSignal(), base_signals.PrintLastErrorSignal(), self.errors, self.signals, )) for i, signal in enumerate(signals): if inspect.isfunction(signal): signals[i] = base_signals.EpochEndSignal(signal) elif inspect.isclass(signal): signals[i] = signal() self.events = Events(network=self, signals=signals) def one_training_update(self, X_train, y_train=None): """ Function would be trigger before run all training procedure related to the current epoch. Parameters ---------- epoch : int Current epoch number. """ raise NotImplementedError() def score(self, X_test, y_test): raise NotImplementedError() def plot_errors(self, logx=False, show=True, **figkwargs): return plot_optimizer_errors(optimizer=self, logx=logx, show=show, **figkwargs) def train(self, X_train, y_train=None, X_test=None, y_test=None, epochs=100, batch_size=None): """ Method train neural network. Parameters ---------- X_train : array-like y_train : array-like or None X_test : array-like or None y_test : array-like or None epochs : int Defaults to ``100``. epsilon : float or None Defaults to ``None``. """ if epochs <= 0: raise ValueError("Number of epochs needs to be a positive number") epochs = int(epochs) first_epoch = self.last_epoch + 1 batch_size = batch_size or getattr(self, 'batch_size', None) self.events.trigger( name='train_start', X_train=X_train, y_train=y_train, epochs=epochs, batch_size=batch_size, store_data=False, ) try: for epoch in range(first_epoch, first_epoch + epochs): self.events.trigger('epoch_start') self.last_epoch = epoch iterator = iters.minibatches( (X_train, y_train), batch_size, self.shuffle_data, ) for X_batch, y_batch in iterator: self.events.trigger('update_start') update_start_time = time.time() train_error = self.one_training_update(X_batch, y_batch) self.n_updates_made += 1 self.events.trigger( name='train_error', value=train_error, eta=time.time() - update_start_time, epoch=epoch, n_updates=self.n_updates_made, n_samples=iters.count_samples(X_batch), store_data=True, ) self.events.trigger('update_end') if X_test is not None: test_start_time = time.time() validation_error = self.score(X_test, y_test) self.events.trigger( name='valid_error', value=validation_error, eta=time.time() - test_start_time, epoch=epoch, n_updates=self.n_updates_made, n_samples=iters.count_samples(X_test), store_data=True, ) self.events.trigger('epoch_end') except StopTraining as err: self.logs.message( "TRAIN", "Epoch #{} was stopped. Message: {}".format(epoch, str(err))) self.events.trigger('train_end')
class RBM(BaseAlgorithm, BaseNetwork, MinibatchTrainingMixin, DumpableObject): """ Boolean/Bernoulli Restricted Boltzmann Machine (RBM). Algorithm assumes that inputs are either binary values or values between 0 and 1. Parameters ---------- n_visible : int Number of visible units. Number of features (columns) in the input data. n_hidden : int Number of hidden units. The large the number the more information network can capture from the data, but it also mean that network is more likely to overfit. batch_size : int Size of the mini-batch. Defaults to ``10``. weight : array-like, Tensorfow variable, Initializer or scalar Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`Normal <neupy.init.Normal>`. hidden_bias : array-like, Tensorfow variable, Initializer or scalar Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`Constant(value=0) <neupy.init.Constant>`. visible_bias : array-like, Tensorfow variable, Initializer or scalar Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`Constant(value=0) <neupy.init.Constant>`. {BaseNetwork.Parameters} Methods ------- train(input_train, epochs=100) Trains network. {BaseSkeleton.fit} visible_to_hidden(visible_input) Populates data throught the network and returns output from the hidden layer. hidden_to_visible(hidden_input) Propagates output from the hidden layer backward to the visible. gibbs_sampling(visible_input, n_iter=1) Makes Gibbs sampling ``n`` times using visible input. Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> data = np.array([ ... [1, 0, 1, 0], ... [1, 0, 1, 0], ... [1, 0, 0, 0], # incomplete sample ... [1, 0, 1, 0], ... ... [0, 1, 0, 1], ... [0, 0, 0, 1], # incomplete sample ... [0, 1, 0, 1], ... [0, 1, 0, 1], ... [0, 1, 0, 1], ... [0, 1, 0, 1], ... ]) >>> >>> rbm = algorithms.RBM(n_visible=4, n_hidden=1) >>> rbm.train(data, epochs=100) >>> >>> hidden_states = rbm.visible_to_hidden(data) >>> hidden_states.round(2) array([[ 0.99], [ 0.99], [ 0.95], [ 0.99], [ 0. ], [ 0.01], [ 0. ], [ 0. ], [ 0. ], [ 0. ]]) References ---------- [1] G. Hinton, A Practical Guide to Training Restricted Boltzmann Machines, 2010. http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf """ n_visible = IntProperty(minval=1) n_hidden = IntProperty(minval=1) batch_size = IntProperty(minval=1, default=10) weight = ParameterProperty(default=init.Normal()) hidden_bias = ParameterProperty(default=init.Constant(value=0)) visible_bias = ParameterProperty(default=init.Constant(value=0)) def __init__(self, n_visible, n_hidden, **options): options.update({'n_visible': n_visible, 'n_hidden': n_hidden}) super(RBM, self).__init__(**options) def init_input_output_variables(self): with tf.variable_scope('rbm'): self.weight = create_shared_parameter(value=self.weight, name='weight', shape=(self.n_visible, self.n_hidden)) self.hidden_bias = create_shared_parameter( value=self.hidden_bias, name='hidden-bias', shape=(self.n_hidden, ), ) self.visible_bias = create_shared_parameter( value=self.visible_bias, name='visible-bias', shape=(self.n_visible, ), ) self.variables.update(network_input=tf.placeholder( tf.float32, (None, self.n_visible), name="network-input", ), network_hidden_input=tf.placeholder( tf.float32, (None, self.n_hidden), name="network-hidden-input", )) def init_variables(self): with tf.variable_scope('rbm'): self.variables.update(h_samples=tf.Variable( tf.zeros([self.batch_size, self.n_hidden]), name="hidden-samples", dtype=tf.float32, ), ) def init_methods(self): def free_energy(visible_sample): with tf.name_scope('free-energy'): wx = tf.matmul(visible_sample, self.weight) wx_b = wx + self.hidden_bias visible_bias_term = dot(visible_sample, self.visible_bias) # We can get infinity when wx_b is a relatively large number # (maybe 100). Taking exponent makes it even larger and # for with float32 it can convert it to infinity. But because # number is so large we don't care about +1 value before taking # logarithms and therefore we can just pick value as it is # since our operation won't change anything. hidden_terms = tf.where( # exp(30) is such a big number that +1 won't # make any difference in the outcome. tf.greater(wx_b, 30), wx_b, tf.log1p(tf.exp(wx_b)), ) hidden_term = tf.reduce_sum(hidden_terms, axis=1) return -(visible_bias_term + hidden_term) def visible_to_hidden(visible_sample): with tf.name_scope('visible-to-hidden'): wx = tf.matmul(visible_sample, self.weight) wx_b = wx + self.hidden_bias return tf.nn.sigmoid(wx_b) def hidden_to_visible(hidden_sample): with tf.name_scope('hidden-to-visible'): wx = tf.matmul(hidden_sample, self.weight, transpose_b=True) wx_b = wx + self.visible_bias return tf.nn.sigmoid(wx_b) def sample_hidden_from_visible(visible_sample): with tf.name_scope('sample-hidden-to-visible'): hidden_prob = visible_to_hidden(visible_sample) hidden_sample = random_binomial(hidden_prob) return hidden_sample def sample_visible_from_hidden(hidden_sample): with tf.name_scope('sample-visible-to-hidden'): visible_prob = hidden_to_visible(hidden_sample) visible_sample = random_binomial(visible_prob) return visible_sample network_input = self.variables.network_input network_hidden_input = self.variables.network_hidden_input input_shape = tf.shape(network_input) n_samples = input_shape[0] weight = self.weight h_bias = self.hidden_bias v_bias = self.visible_bias h_samples = self.variables.h_samples step = asfloat(self.step) with tf.name_scope('positive-values'): # We have to use `cond` instead of `where`, because # different if-else cases might have different shapes # and it triggers exception in tensorflow. v_pos = tf.cond( tf.equal(n_samples, self.batch_size), lambda: network_input, lambda: random_sample(network_input, self.batch_size)) h_pos = visible_to_hidden(v_pos) with tf.name_scope('negative-values'): v_neg = sample_visible_from_hidden(h_samples) h_neg = visible_to_hidden(v_neg) with tf.name_scope('weight-update'): weight_update = ( tf.matmul(v_pos, h_pos, transpose_a=True) - tf.matmul(v_neg, h_neg, transpose_a=True)) / asfloat(n_samples) with tf.name_scope('hidden-bias-update'): h_bias_update = tf.reduce_mean(h_pos - h_neg, axis=0) with tf.name_scope('visible-bias-update'): v_bias_update = tf.reduce_mean(v_pos - v_neg, axis=0) with tf.name_scope('flipped-input-features'): # Each row will have random feature marked with number 1 # Other values will be equal to 0 possible_feature_corruptions = tf.eye(self.n_visible) corrupted_features = random_sample(possible_feature_corruptions, n_samples) rounded_input = tf.round(network_input) # If we scale input values from [0, 1] range to [-1, 1] # than it will be easier to flip feature values with simple # multiplication. scaled_rounded_input = 2 * rounded_input - 1 scaled_flipped_rounded_input = ( # for corrupted_features we convert 0 to 1 and 1 to -1 # in this way after multiplication we will flip all # signs where -1 in the transformed corrupted_features (-2 * corrupted_features + 1) * scaled_rounded_input) # Scale it back to the [0, 1] range flipped_rounded_input = (scaled_flipped_rounded_input + 1) / 2 with tf.name_scope('pseudo-likelihood-loss'): # Stochastic pseudo-likelihood error = tf.reduce_mean(self.n_visible * tf.log_sigmoid( free_energy(flipped_rounded_input) - free_energy(rounded_input))) with tf.name_scope('gibbs-sampling'): gibbs_sampling = sample_visible_from_hidden( sample_hidden_from_visible(network_input)) initialize_uninitialized_variables() self.methods.update(train_epoch=function( [network_input], error, name='rbm/train-epoch', updates=[ (weight, weight + step * weight_update), (h_bias, h_bias + step * h_bias_update), (v_bias, v_bias + step * v_bias_update), (h_samples, random_binomial(p=h_neg)), ]), prediction_error=function( [network_input], error, name='rbm/prediction-error', ), diff1=function( [network_input], free_energy(flipped_rounded_input), name='rbm/diff1-error', ), diff2=function( [network_input], free_energy(rounded_input), name='rbm/diff2-error', ), visible_to_hidden=function( [network_input], visible_to_hidden(network_input), name='rbm/visible-to-hidden', ), hidden_to_visible=function( [network_hidden_input], hidden_to_visible(network_hidden_input), name='rbm/hidden-to-visible', ), gibbs_sampling=function( [network_input], gibbs_sampling, name='rbm/gibbs-sampling', )) def train(self, input_train, input_test=None, epochs=100, summary='table'): """ Train RBM. Parameters ---------- input_train : 1D or 2D array-like input_test : 1D or 2D array-like or None Defaults to ``None``. epochs : int Number of training epochs. Defaults to ``100``. summary : {'table', 'inline'} Training summary type. Defaults to ``'table'``. """ return super(RBM, self).train(input_train=input_train, target_train=None, input_test=input_test, target_test=None, epochs=epochs, epsilon=None, summary=summary) def train_epoch(self, input_train, target_train=None): """ Train one epoch. Parameters ---------- input_train : array-like (n_samples, n_features) Returns ------- float """ errors = self.apply_batches( function=self.methods.train_epoch, input_data=input_train, description='Training batches', show_error_output=True, ) n_samples = len(input_train) return average_batch_errors(errors, n_samples, self.batch_size) def visible_to_hidden(self, visible_input): """ Populates data throught the network and returns output from the hidden layer. Parameters ---------- visible_input : array-like (n_samples, n_visible_features) Returns ------- array-like """ is_input_feature1d = (self.n_visible == 1) visible_input = format_data(visible_input, is_input_feature1d) outputs = self.apply_batches( function=self.methods.visible_to_hidden, input_data=visible_input, description='Hidden from visible batches', show_progressbar=True, show_error_output=False, scalar_output=False, ) return np.concatenate(outputs, axis=0) def hidden_to_visible(self, hidden_input): """ Propagates output from the hidden layer backward to the visible. Parameters ---------- hidden_input : array-like (n_samples, n_hidden_features) Returns ------- array-like """ is_input_feature1d = (self.n_hidden == 1) hidden_input = format_data(hidden_input, is_input_feature1d) outputs = self.apply_batches( function=self.methods.hidden_to_visible, input_data=hidden_input, description='Visible from hidden batches', show_progressbar=True, show_error_output=False, scalar_output=False, ) return np.concatenate(outputs, axis=0) def prediction_error(self, input_data, target_data=None): """ Compute the pseudo-likelihood of input samples. Parameters ---------- input_data : array-like Values of the visible layer Returns ------- float Value of the pseudo-likelihood. """ is_input_feature1d = (self.n_visible == 1) input_data = format_data(input_data, is_input_feature1d) errors = self.apply_batches( function=self.methods.prediction_error, input_data=input_data, description='Validation batches', show_error_output=True, ) return average_batch_errors( errors, n_samples=len(input_data), batch_size=self.batch_size, ) def gibbs_sampling(self, visible_input, n_iter=1): """ Makes Gibbs sampling n times using visible input. Parameters ---------- visible_input : 1d or 2d array n_iter : int Number of Gibbs sampling iterations. Defaults to ``1``. Returns ------- array-like Output from the visible units after perfoming n Gibbs samples. Array will contain only binary units (0 and 1). """ is_input_feature1d = (self.n_visible == 1) visible_input = format_data(visible_input, is_input_feature1d) gibbs_sampling = self.methods.gibbs_sampling input_ = visible_input for iteration in range(n_iter): input_ = gibbs_sampling(input_) return input_
class LocalResponseNorm(BaseLayer): """ Local Response Normalization Layer. Aggregation is purely across channels, not within channels, and performed "pixelwise". If the value of the :math:`i` th channel is :math:`x_i`, the output is .. math:: x_i = \\frac{{x_i}}{{ (k + ( \\alpha \\sum_j x_j^2 ))^\\beta }} where the summation is performed over this position on :math:`n` neighboring channels. Parameters ---------- alpha : float coefficient, see equation above beta : float offset, see equation above k : float exponent, see equation above n : int Number of adjacent channels to normalize over, must be odd {BaseLayer.Parameters} Methods ------- {BaseLayer.Methods} Attributes ---------- {BaseLayer.Attributes} """ alpha = NumberProperty(default=1e-4) beta = NumberProperty(default=0.75) k = NumberProperty(default=2) n = IntProperty(default=5) def __init__(self, **options): super(LocalResponseNorm, self).__init__(**options) if self.n % 2 == 0: raise ValueError("Only works with odd ``n``") def validate(self, input_shape): ndim = len(input_shape) if ndim != 3: raise LayerConnectionError( "Layer `{}` expected input with 3 dimensions, got {}" "".format(self, ndim)) def output(self, input_value): if not self.input_shape: raise LayerConnectionError( "Layer `{}` doesn't have defined input shape. Probably " "it doesn't have an input layer.".format(self)) half = self.n // 2 squared_value = input_value ** 2 n_samples = input_value.shape[0] channel = input_value.shape[1] height = input_value.shape[2] width = input_value.shape[3] zero = asfloat(0) extra_channels = T.alloc(zero, n_samples, channel + 2 * half, height, width) squared_value = T.set_subtensor( extra_channels[:, half:half + channel, :, :], squared_value ) scale = self.k for i in range(self.n): scale += self.alpha * squared_value[:, i:i + channel, :, :] scale = scale ** self.beta return input_value / scale
class LocalResponseNorm(BaseLayer): """ Local Response Normalization Layer. Aggregation is purely across channels, not within channels, and performed "pixelwise". If the value of the :math:`i` th channel is :math:`x_i`, the output is .. math:: x_i = \\frac{{x_i}}{{ (k + ( \\alpha \\sum_j x_j^2 ))^\\beta }} where the summation is performed over this position on :math:`n` neighboring channels. Parameters ---------- alpha : float Coefficient, see equation above beta : float Offset, see equation above k : float Exponent, see equation above depth_radius : int Number of adjacent channels to normalize over, must be odd. {BaseLayer.Parameters} Methods ------- {BaseLayer.Methods} Attributes ---------- {BaseLayer.Attributes} """ alpha = NumberProperty(default=1e-4) beta = NumberProperty(default=0.75) k = NumberProperty(default=2) depth_radius = IntProperty(default=5) def __init__(self, **options): super(LocalResponseNorm, self).__init__(**options) if self.depth_radius % 2 == 0: raise ValueError("Only works with odd ``n``") def validate(self, input_shape): ndim = len(input_shape) if ndim != 3: raise LayerConnectionError( "Layer `{}` expected input with 3 dimensions, got {}" "".format(self, ndim)) def output(self, input_value): return tf.nn.local_response_normalization( input_value, depth_radius=self.depth_radius, bias=self.k, alpha=self.alpha, beta=self.beta, )
class Embedding(BaseLayer): """ Embedding layer accepts indices as an input and returns rows from the weight matrix associated with these indices. It's useful when inputs are categorical features or for the word embedding tasks. Parameters ---------- input_size : int Layer's input vector dimension. It's, typically, associated with number of categories or number of unique words that input vector has. output_size : int Layer's output vector dimension. weight : array-like, Tensorfow variable, scalar or Initializer Defines layer's weights. Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`HeNormal() <neupy.init.HeNormal>`. {BaseLayer.name} Methods ------- {BaseLayer.Methods} Attributes ---------- {BaseLayer.Attributes} Examples -------- This example converts dataset that has only categorical variables into format that suitable for Embedding layer. >>> import numpy as np >>> from neupy.layers import * >>> >>> dataset = np.array([ ... ['cold', 'high'], ... ['hot', 'low'], ... ['cold', 'low'], ... ['hot', 'low'], ... ]) >>> >>> unique_value, dataset_indices = np.unique( ... dataset, return_inverse=True ... ) >>> dataset_indices = dataset_indices.reshape((4, 2)) >>> dataset_indices array([[0, 1], [2, 3], [0, 3], [2, 3]]) >>> >>> n_features = dataset.shape[1] >>> n_unique_categories = len(unique_value) >>> embedded_size = 1 >>> >>> network = join( ... Input(n_features), ... Embedding(n_unique_categories, embedded_size), ... # Output from the embedding layer is 3D ... # To make output 2D we need to reshape dimensions ... Reshape(), ... ) """ input_size = IntProperty(minval=1) output_size = IntProperty(minval=1) weight = ParameterProperty() def __init__(self, input_size, output_size, weight=init.HeNormal(), name=None): super(Embedding, self).__init__(name=name) self.input_size = input_size self.output_size = output_size self.weight = weight def get_output_shape(self, input_shape): input_shape = tf.TensorShape(input_shape) return input_shape.concatenate(self.output_size) def create_variables(self, input_shape): self.input_shape = input_shape self.weight = self.variable( value=self.weight, name='weight', shape=as_tuple(self.input_size, self.output_size)) def output(self, input_value, **kwargs): input_value = tf.cast(input_value, tf.int32) return tf.gather(self.weight, input_value) def __repr__(self): return self._repr_arguments( self.input_size, self.output_size, name=self.name, weight=self.weight, )
class ART1(BaseNetwork): """ Adaptive Resonance Theory (ART1) Network for binary data clustering. Notes ----- * Weights are not random, so the result will be always reproduceble. Parameters ---------- rho : float Control reset action in training process. Value must be between ``0`` and ``1``, defaults to ``0.5``. n_clusters : int Number of clusters, defaults to ``2``. Min value is also ``2``. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} Methods ------- train(input_data): Network network will train until it clusters all samples. {BaseSkeleton.predict} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> data = np.array([ ... [0, 1, 0], ... [1, 0, 0], ... [1, 1, 0], ... ]) >>>> >>> artnet = algorithms.ART1( ... step=2, ... rho=0.7, ... n_clusters=2, ... verbose=False ... ) >>> artnet.predict(data) array([ 0., 1., 1.]) """ rho = ProperFractionProperty(default=0.5) n_clusters = IntProperty(default=2, minval=2) def train(self, input_data): input_data = format_data(input_data) if input_data.ndim != 2: raise ValueError("Input value must be 2 dimentional, got " "{0}".format(input_data.ndim)) data_size = input_data.shape[1] n_clusters = self.n_clusters step = self.step rho = self.rho if list(sort(unique(input_data))) != [0, 1]: raise ValueError("ART1 Network works only with binary matrix, " "all matix must contains only 0 and 1") if not hasattr(self, 'weight_21'): self.weight_21 = ones((data_size, n_clusters)) if not hasattr(self, 'weight_12'): self.weight_12 = step / (step + n_clusters - 1) * self.weight_21.T weight_21 = self.weight_21 weight_12 = self.weight_12 if data_size != weight_21.shape[0]: raise ValueError( "Data dimention is invalid. Get {} columns data set. " "Must be - {} columns".format(data_size, weight_21.shape[0])) classes = zeros(input_data.shape[0]) # Train network for i, p in enumerate(input_data): disabled_neurons = [] reseted_values = [] reset = True while reset: output1 = p input2 = dot(weight_12, output1.T) output2 = zeros(input2.size) input2[disabled_neurons] = -inf winner_index = input2.argmax() output2[winner_index] = 1 expectation = dot(weight_21, output2) output1 = logical_and(p, expectation).astype(int) reset_value = dot(output1.T, output1) / dot(p.T, p) reset = reset_value < rho if reset: disabled_neurons.append(winner_index) reseted_values.append((reset_value, winner_index)) if len(disabled_neurons) >= n_clusters: # Got this case only if we test all possible clusters reset = False winner_index = None if not reset: if winner_index is not None: weight_12[winner_index, :] = (step * output1) / ( step + dot(output1.T, output1) - 1) weight_21[:, winner_index] = output1 else: # Get result with the best `rho` winner_index = max(reseted_values)[1] classes[i] = winner_index return classes def predict(self, input_data): return self.train(input_data)
class Concatenate(BaseLayer): """ Concatenate multiple input layers in one based on the specified axes. Parameters ---------- axis : int The axis along which the inputs will be joined. Default is ``1``. {BaseLayer.Parameters} Methods ------- {BaseLayer.Methods} Attributes ---------- {BaseLayer.Attributes} Examples -------- >>> from neupy import layers >>> >>> input_1 = layers.Input(10) >>> input_2 = layers.Input(20) >>> >>> network = [input_1, input_2] > layers.Concatenate() >>> >>> network.input_shape [(10,), (20,)] >>> network.output_shape (30,) """ axis = IntProperty(default=1) def validate(self, input_shapes): valid_shape = as_tuple(None, input_shapes[0]) for input_shape in input_shapes[1:]: for axis, axis_size in enumerate(input_shape, start=1): if axis != self.axis and valid_shape[axis] != axis_size: raise LayerConnectionError( "Cannot concatenate layers. Some of them don't " "match over dimension #{} (0-based indeces)." "".format(axis) ) @property def output_shape(self): if not self.input_shape: return axis = self.axis - 1 # because we do not include #0 dim input_shapes = copy.copy(self.input_shape) output_shape = list(input_shapes.pop(0)) for input_shape in input_shapes: output_shape[axis] += input_shape[axis] return tuple(output_shape) def output(self, *input_values): return T.concatenate(input_values, axis=self.axis)
class Oja(BaseNetwork): """ Oja is an unsupervised technique used for the dimensionality reduction tasks. Notes ----- - In practice use step as very small value. For instance, value ``1e-7`` can be a good choice. - Normalize the input data before use Oja algorithm. Input data shouldn't contains large values. - Set up smaller values for weight if error for a few first iterations is big compare to the input values scale. For instance, if your input data have values between ``0`` and ``1`` error value equal to ``100`` is big. - During the training network report mean absolute error (MAE) Parameters ---------- minimized_data_size : int Expected number of features after minimization, defaults to ``1``. weight : array-like or ``None`` Defines networks weights. Defaults to :class:`XavierNormal() <neupy.init.XavierNormal>`. {BaseNetwork.Parameters} Methods ------- reconstruct(X) Reconstruct original dataset from the minimized input. train(X, epochs=100) Trains the network to the data X. Network trains until maximum number of ``epochs`` was reached. predict(X) Returns hidden representation of the input data ``X``. Basically, it applies dimensionality reduction. {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> data = np.array([[2, 2], [1, 1], [4, 4], [5, 5]]) >>> >>> ojanet = algorithms.Oja( ... minimized_data_size=1, ... step=0.01, ... verbose=False ... ) >>> >>> ojanet.train(data, epochs=100) >>> minimized = ojanet.predict(data) >>> minimized array([[-2.82843122], [-1.41421561], [-5.65686243], [-7.07107804]]) >>> ojanet.reconstruct(minimized) array([[ 2.00000046, 2.00000046], [ 1.00000023, 1.00000023], [ 4.00000093, 4.00000093], [ 5.00000116, 5.00000116]]) """ minimized_data_size = IntProperty(minval=1) weight = ParameterProperty(default=init.XavierNormal()) def one_training_update(self, X, y_train): weight = self.weight minimized = np.dot(X, weight) reconstruct = np.dot(minimized, weight.T) error = X - reconstruct weight += self.step * np.dot(error.T, minimized) mae = np.sum(np.abs(error)) / X.size # Clean objects from the memory del minimized del reconstruct del error return mae def train(self, X, epochs=100): X = format_data(X) n_input_features = X.shape[1] if isinstance(self.weight, init.Initializer): weight_shape = (n_input_features, self.minimized_data_size) self.weight = self.weight.sample(weight_shape, return_array=True) if n_input_features != self.weight.shape[0]: raise ValueError("Invalid number of features. Expected {}, got {}" "".format(self.weight.shape[0], n_input_features)) super(Oja, self).train(X, epochs=epochs) def reconstruct(self, X): if not isinstance(self.weight, np.ndarray): raise NotTrained("Network hasn't been trained yet") X = format_data(X) if X.shape[1] != self.minimized_data_size: raise ValueError("Invalid input data feature space, expected " "{}, got {}.".format(X.shape[1], self.minimized_data_size)) return np.dot(X, self.weight.T) def predict(self, X): if not isinstance(self.weight, np.ndarray): raise NotTrained("Network hasn't been trained yet") X = format_data(X) return np.dot(X, self.weight)
class SOFM(Kohonen): """ Self-Organizing Feature Map. Parameters ---------- learning_radius : int Learning radius. features_grid : int Learning radius. transform : {{'linear', 'euclid', 'cos'}} Indicate transformation operation related to the input layer. The ``linear`` value mean that input data would be multiplied by weights in typical way. The ``euclid`` method will identify the closest weight vector to the input one. The ``cos`` made the same as ``euclid``, but instead of euclid distance it uses cosine similarity. Defaults to ``linear``. {BaseAssociative.n_inputs} {BaseAssociative.n_outputs} {BaseAssociative.weight} {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} """ learning_radius = IntProperty(default=0, minval=0) features_grid = TypedListProperty() transform = ChoiceProperty(default='linear', choices={ 'linear': dot_product, 'euclid': neg_euclid_distance, 'cos': cosine_similarity, }) def __init__(self, **options): super(SOFM, self).__init__(**options) invalid_feature_grid = (self.features_grid is not None and mul(*self.features_grid) != self.n_outputs) if invalid_feature_grid: raise ValueError( "Feature grid should contain the same number of elements as " "in the output layer: {0}, but found: {1} ({2}x{3})" "".format(self.n_outputs, mul(*self.features_grid), self.features_grid[0], self.features_grid[1])) def init_properties(self): super(SOFM, self).init_properties() if self.features_grid is None: self.features_grid = (self.n_outputs, 1) def predict_raw(self, input_data): input_data = format_data(input_data) output = np.zeros((input_data.shape[0], self.n_outputs)) for i, input_row in enumerate(input_data): output[i, :] = self.transform(input_row.reshape(1, -1), self.weight) return output def update_indexes(self, layer_output): neuron_winner = layer_output.argmax(axis=1) feature_bound = self.features_grid[1] output_with_neightbours = neuron_neighbours( np.reshape(layer_output, self.features_grid), (neuron_winner // feature_bound, neuron_winner % feature_bound), self.learning_radius) index_y, _ = np.nonzero( np.reshape(output_with_neightbours, (self.n_outputs, 1))) return index_y
class PNN(BaseSkeleton): """ Probabilistic Neural Network (PNN). Network applies only to the classification problems. Notes ----- - PNN Network is sensitive for cases when one input feature has higher values than the other one. Input data has to be normalized before training. - Standard deviation has to match the range of the input features Check ``std`` parameter description for more information. - The bigger training dataset the slower prediction. Algorithm is much more efficient for small datasets. - Network uses lazy learning which mean that network doesn't need iterative training. It just stores parameters and use them to make a predictions. Parameters ---------- std : float Standard deviation for the Probability Density Function (PDF). If your input features have high values than standard deviation should also be high. For instance, if input features from range ``[0, 20]`` that standard deviation should be also a big value like ``10`` or ``15``. Small values will lead to bad prediction. batch_size : int or None Set up min-batch size. The ``None`` value will ensure that all data samples will be propagated through the network at once. Defaults to ``128``. {Verbose.verbose} Methods ------- train(X_train, y_train, copy=True) Network just stores all the information about the data and use it for the prediction. Parameter ``copy`` copies input data before saving it inside the network. The ``y_train`` argument should be a vector or matrix with one feature column. predict(X) Return classes associated with each sample in the ``X``. predict_proba(X) Predict probabilities for each class. {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> >>> from sklearn import datasets, metrics >>> from sklearn.model_selection import train_test_split >>> from neupy import algorithms >>> >>> dataset = datasets.load_digits() >>> x_train, x_test, y_train, y_test = train_test_split( ... dataset.data, dataset.target, test_size=0.3 ... ) >>> >>> pnn = algorithms.PNN(std=10, verbose=False) >>> pnn.train(x_train, y_train) >>> >>> y_predicted = pnn.predict(x_test) >>> metrics.accuracy_score(y_test, y_predicted) 0.98888888888888893 """ std = BoundedProperty(minval=0) batch_size = IntProperty(default=128, minval=0, allow_none=True) def __init__(self, std, batch_size=128, verbose=False): self.std = std self.batch_size = batch_size self.classes = None self.X_train = None self.y_train = None super(PNN, self).__init__(batch_size=batch_size, verbose=verbose) def train(self, X_train, y_train, copy=True): """ Trains network. PNN doesn't actually train, it just stores input data and use it for prediction. Parameters ---------- X_train : array-like (n_samples, n_features) y_train : array-like (n_samples,) Target variable should be vector or matrix with one feature column. copy : bool If value equal to ``True`` than input matrices will be copied. Defaults to ``True``. Raises ------ ValueError In case if something is wrong with input data. """ X_train = format_data(X_train, copy=copy) y_train = format_data(y_train, copy=copy, make_float=False) self.X_train = X_train self.y_train = y_train if X_train.shape[0] != y_train.shape[0]: raise ValueError( "Number of samples in the input and " "target datasets are different") if y_train.shape[1] != 1: raise ValueError( "Target value should be vector or " "matrix with only one column") classes = self.classes = np.unique(y_train) n_classes = classes.size n_samples = X_train.shape[0] class_ratios = self.class_ratios = np.zeros(n_classes) row_comb_matrix = self.row_comb_matrix = np.zeros( (n_classes, n_samples)) for i, class_name in enumerate(classes): class_name = classes[i] class_val_positions = (y_train == class_name) row_comb_matrix[i, class_val_positions.ravel()] = 1 class_ratios[i] = np.sum(class_val_positions) def predict_proba(self, X): """ Predict probabilities for each class. Parameters ---------- X : array-like (n_samples, n_features) Returns ------- array-like (n_samples, n_classes) """ outputs = iters.apply_batches( function=self.predict_raw, inputs=format_data(X), batch_size=self.batch_size, show_progressbar=self.logs.enable, ) raw_output = np.concatenate(outputs, axis=1) total_output_sum = raw_output.sum(axis=0).reshape((-1, 1)) return raw_output.T / total_output_sum def predict_raw(self, X): """ Raw prediction. Parameters ---------- X : array-like (n_samples, n_features) Raises ------ NotTrained If network hasn't been trained. ValueError In case if something is wrong with input data. Returns ------- array-like (n_samples, n_classes) """ if self.classes is None: raise NotTrained( "Cannot make a prediction. Network hasn't been trained yet") if X.shape[1] != self.X_train.shape[1]: raise ValueError( "Input data must contain {0} features, got {1}" "".format(self.X_train.shape[1], X.shape[1])) class_ratios = self.class_ratios.reshape((-1, 1)) pdf_outputs = pdf_between_data(self.X_train, X, self.std) return np.dot(self.row_comb_matrix, pdf_outputs) / class_ratios def predict(self, X): """ Predicts class from the input data. Parameters ---------- X : array-like (n_samples, n_features) Returns ------- array-like (n_samples,) """ outputs = iters.apply_batches( function=self.predict_raw, inputs=format_data(X), batch_size=self.batch_size, show_progressbar=self.logs.enable, ) raw_output = np.concatenate(outputs, axis=1) return self.classes[raw_output.argmax(axis=0)]
class BaseLayer(with_metaclass(LayerMeta, ChainConnection, BaseConfigurable)): """ Base class for all layers. Parameters ---------- {layer_params} """ __layer_params = """input_size : int Layer input size. weight : 2D array-like or None Define your layer weights. `None` means that your weights will be generate randomly dependence on property `init_method`. `None` by default. init_method : {'gauss', 'bounded', 'ortho'} Weight initialization method. `gauss` will generate random weights dependence on Standard Normal Distribution. `bounded` generate uniform random weghts in initialized bounds. `ortho` generate random orthogonal matrix. random_weight_bound : tuple of two int Available only for `init_method` eqaul to `bounded`, defaults to `(0, 1)`. """ shared_docs = {'layer_params': __layer_params} input_size = IntProperty() weight = ArrayProperty(default=None) random_weight_bound = NumberBoundProperty(default=(0, 1)) init_method = ChoiceProperty(default=GAUSSIAN, choices=[GAUSSIAN, BOUNDED, ORTHOGONAL]) def __init__(self, input_size, **options): super(BaseLayer, self).__init__() self.input_size = input_size self.use_bias = False # Default variables which will change after initialization self.relate_to_layer = None self.size = None # If you will set class method function variable, python understend # that this is new class method and will call it with `self` # first parameter. if hasattr(self.__class__, 'activation_function'): self.activation_function = self.__class__.activation_function # Initialize default options BaseConfigurable.__init__(self, **options) def relate_to(self, right_layer): self.relate_to_layer = right_layer def initialize(self, with_bias=False): self.use_bias = with_bias size = self.input_size + self.use_bias self.size = (size, self.relate_to_layer.input_size) self.weight = self._init_weight() # --------------- Weights manipulations --------------- # def _init_weight(self): if self.weight is not None: return self.weight init_method = self.init_method if init_method == GAUSSIAN: return randn(*self.size) elif init_method == BOUNDED: return random_bounded(self.size, *self.random_weight_bound) elif init_method == ORTHOGONAL: return random_orthogonal(self.size) @property def weight_without_bias(self): if self.use_bias: return self.weight[1:, :] return self.weight # --------------- Layer operations --------------- # def summator(self, input_value): return dot(input_value, self.weight) def output(self, input_value): input_data = self.preformat_input(input_value) summated = self.summator(input_data) return self.activation_function(summated) def preformat_input(self, input_data): if self.use_bias: input_data = add_bias_column(input_data) return input_data def __repr__(self): return '{name}({size})'.format(name=self.__class__.__name__, size=self.input_size)
class BaseAssociative(BaseNetwork): """ Base class for associative learning. Parameters ---------- n_inputs : int Number of features (columns) in the input data. n_outputs : int Number of outputs in the network. weight : array-like, Initializer Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} train(input_train, summary='table', epochs=100) Train neural network. {BaseSkeleton.fit} """ n_inputs = IntProperty(minval=1, required=True) n_outputs = IntProperty(minval=1, required=True) weight = ParameterProperty(default=init.Normal()) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) self.init_layers() def init_layers(self): valid_weight_shape = (self.n_inputs, self.n_outputs) if isinstance(self.weight, init.Initializer): self.weight = self.weight.sample( valid_weight_shape, return_array=True) if self.weight.shape != valid_weight_shape: raise ValueError( "Weight matrix has invalid shape. Got {}, expected {}" "".format(self.weight.shape, valid_weight_shape)) self.weight = self.weight.astype(float) def format_input_data(self, input_data): is_feature1d = self.n_inputs == 1 input_data = format_data(input_data, is_feature1d) if input_data.ndim != 2: raise ValueError("Cannot make prediction, because input " "data has more than 2 dimensions") n_samples, n_features = input_data.shape if n_features != self.n_inputs: raise ValueError("Input data expected to have {} features, " "but got {}".format(self.n_inputs, n_features)) return input_data def train(self, input_train, summary='table', epochs=100): input_train = self.format_input_data(input_train) return super(BaseAssociative, self).train( input_train=input_train, target_train=None, input_test=None, target_test=None, epochs=epochs, epsilon=None, summary=summary)
class LSTM(BaseRNNLayer): """ Long Short Term Memory (LSTM) Layer. Parameters ---------- {BaseRNNLayer.size} weights : dict or Initializer Weight parameters for different gates. Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`. - In case if application requires the same initialization method for all weights, then it's possible to specify initialization method that would be automaticaly applied to all weight parameters in the LSTM layer. .. code-block:: python layers.LSTM(2, weights=init.Normal(0.1)) - In case if application requires different initialization values for different weights then it's possible to specify an exact weight by name. .. code-block:: python dict( weight_in_to_ingate=init.XavierUniform(), weight_hid_to_ingate=init.XavierUniform(), weight_cell_to_ingate=init.XavierUniform(), weight_in_to_forgetgate=init.XavierUniform(), weight_hid_to_forgetgate=init.XavierUniform(), weight_cell_to_forgetgate=init.XavierUniform(), weight_in_to_outgate=init.XavierUniform(), weight_hid_to_outgate=init.XavierUniform(), weight_cell_to_outgate=init.XavierUniform(), weight_in_to_cell=init.XavierUniform(), weight_hid_to_cell=init.XavierUniform(), ) If application requires modification to only one (or multiple) parameter then it's better to specify the one that you need to modify and ignore other parameters .. code-block:: python dict(weight_in_to_ingate=init.Normal(0.1)) Other parameters like ``weight_cell_to_outgate`` will be equal to their default values. biases : dict or Initializer Bias parameters for different gates. Defaults to :class:`Constant(0) <neupy.init.Constant>`. - In case if application requires the same initialization method for all biases, then it's possible to specify initialization method that would be automaticaly applied to all bias parameters in the LSTM layer. .. code-block:: python layers.LSTM(2, biases=init.Constant(1)) - In case if application requires different initialization values for different weights then it's possible to specify an exact weight by name. .. code-block:: python dict( bias_ingate=init.Constant(0), bias_forgetgate=init.Constant(0), bias_cell=init.Constant(0), bias_outgate=init.Constant(0), ) If application requires modification to only one (or multiple) parameter then it's better to specify the one that you need to modify and ignore other parameters .. code-block:: python dict(bias_ingate=init.Constant(1)) Other parameters like ``bias_cell`` will be equal to their default values. activation_functions : dict, callable Activation functions for different gates. Defaults to: .. code-block:: python # import theano.tensor as T dict( ingate=T.nnet.sigmoid, forgetgate=T.nnet.sigmoid, outgate=T.nnet.sigmoid, cell=T.tanh, ) If application requires modification to only one parameter then it's better to specify the one that you need to modify and ignore other parameters .. code-block:: python dict(ingate=T.tanh) Other parameters like ``forgetgate`` or ``outgate`` will be equal to their default values. learn_init : bool If ``True``, make ``cell_init`` and ``hid_init`` trainable variables. Defaults to ``False``. cell_init : array-like, Theano variable, scalar or Initializer Initializer for initial cell state (:math:`c_0`). Defaults to :class:`Constant(0) <neupy.init.Constant>`. hid_init : array-like, Theano variable, scalar or Initializer Initializer for initial hidden state (:math:`h_0`). Defaults to :class:`Constant(0) <neupy.init.Constant>`. backwards : bool If ``True``, process the sequence backwards and then reverse the output again such that the output from the layer is always from :math:`x_1` to :math:`x_n`. Defaults to ``False`` {BaseRNNLayer.only_return_final} precompute_input : bool if ``True``, precompute ``input_to_hid`` before iterating through the sequence. This can result in a speed up at the expense of an increase in memory usage. Defaults to ``True``. peepholes : bool If ``True``, the LSTM uses peephole connections. When ``False``, cell parameters are ignored. Defaults to ``False``. unroll_scan : bool If ``True`` the recursion is unrolled instead of using scan. For some graphs this gives a significant speed up but it might also consume more memory. When ``unroll_scan=True``, backpropagation always includes the full sequence, so ``n_gradient_steps`` must be set to ``-1`` and the input sequence length must be known at compile time (i.e., cannot be given as ``None``). Defaults to ``False``. gradient_clipping : flaot or int If nonzero, the gradient messages are clipped to the given value during the backward pass. Defaults to ``0``. n_gradient_steps : int Number of timesteps to include in the backpropagated gradient. If ``-1``, backpropagate through the entire sequence. Defaults to ``-1``. {BaseLayer.Parameters} Notes ----- Code was adapted from the `Lasagne <https://github.com/Lasagne/Lasagne>`_ library. Examples -------- Sequence classification .. code-block:: python from neupy import layers, algorithms n_time_steps = 40 n_categories = 20 embedded_size = 10 network = algorithms.RMSProp( [ layers.Input(n_time_steps), layers.Embedding(n_categories, embedded_size), layers.LSTM(20), layers.Sigmoid(1), ] ) """ weights = MultiParameterProperty( default=dict( weight_in_to_ingate=init.XavierUniform(), weight_hid_to_ingate=init.XavierUniform(), weight_cell_to_ingate=init.XavierUniform(), weight_in_to_forgetgate=init.XavierUniform(), weight_hid_to_forgetgate=init.XavierUniform(), weight_cell_to_forgetgate=init.XavierUniform(), weight_in_to_outgate=init.XavierUniform(), weight_hid_to_outgate=init.XavierUniform(), weight_cell_to_outgate=init.XavierUniform(), weight_in_to_cell=init.XavierUniform(), weight_hid_to_cell=init.XavierUniform(), )) biases = MultiParameterProperty( default=dict( bias_ingate=init.Constant(0), bias_forgetgate=init.Constant(0), bias_cell=init.Constant(0), bias_outgate=init.Constant(0), )) activation_functions = MultiCallableProperty( default=dict( ingate=T.nnet.sigmoid, forgetgate=T.nnet.sigmoid, outgate=T.nnet.sigmoid, cell=T.tanh, )) learn_init = Property(default=False, expected_type=bool) cell_init = ParameterProperty(default=init.Constant(0)) hid_init = ParameterProperty(default=init.Constant(0)) unroll_scan = Property(default=False, expected_type=bool) backwards = Property(default=False, expected_type=bool) precompute_input = Property(default=True, expected_type=bool) peepholes = Property(default=False, expected_type=bool) n_gradient_steps = IntProperty(default=-1) gradient_clipping = NumberProperty(default=0, minval=0) def initialize(self): super(LSTM, self).initialize() n_inputs = np.prod(self.input_shape[1:]) weights = self.weights biases = self.biases # Input gate parameters self.weight_in_to_ingate = self.add_parameter( value=weights.weight_in_to_ingate, name='weight_in_to_ingate', shape=(n_inputs, self.size)) self.weight_hid_to_ingate = self.add_parameter( value=weights.weight_hid_to_ingate, name='weight_hid_to_ingate', shape=(self.size, self.size)) self.bias_ingate = self.add_parameter( value=biases.bias_ingate, name='bias_ingate', shape=(self.size,)) # Forget gate parameters self.weight_in_to_forgetgate = self.add_parameter( value=weights.weight_in_to_forgetgate, name='weight_in_to_forgetgate', shape=(n_inputs, self.size)) self.weight_hid_to_forgetgate = self.add_parameter( value=weights.weight_hid_to_forgetgate, name='weight_hid_to_forgetgate', shape=(self.size, self.size)) self.bias_forgetgate = self.add_parameter( value=biases.bias_forgetgate, name='bias_forgetgate', shape=(self.size,)) # Cell parameters self.weight_in_to_cell = self.add_parameter( value=weights.weight_in_to_cell, name='weight_in_to_cell', shape=(n_inputs, self.size)) self.weight_hid_to_cell = self.add_parameter( value=weights.weight_hid_to_cell, name='weight_hid_to_cell', shape=(self.size, self.size)) self.bias_cell = self.add_parameter( value=biases.bias_cell, name='bias_cell', shape=(self.size,)) # If peephole (cell to gate) connections were enabled, initialize # peephole connections. These are elementwise products with the cell # state, so they are represented as vectors. if self.peepholes: self.weight_cell_to_ingate = self.add_parameter( value=weights.weight_cell_to_ingate, name='weight_cell_to_ingate', shape=(self.size,)) self.weight_cell_to_forgetgate = self.add_parameter( value=weights.weight_cell_to_forgetgate, name='weight_cell_to_forgetgate', shape=(self.size,)) self.weight_cell_to_outgate = self.add_parameter( value=weights.weight_cell_to_outgate, name='weight_cell_to_outgate', shape=(self.size,)) # Output gate parameters self.weight_in_to_outgate = self.add_parameter( value=weights.weight_in_to_outgate, name='weight_in_to_outgate', shape=(n_inputs, self.size)) self.weight_hid_to_outgate = self.add_parameter( value=weights.weight_hid_to_outgate, name='weight_hid_to_outgate', shape=(self.size, self.size)) self.bias_outgate = self.add_parameter( value=biases.bias_outgate, name='bias_outgate', shape=(self.size,)) # Initialization parameters self.add_parameter(value=self.cell_init, shape=(1, self.size), name="cell_init", trainable=self.learn_init) self.add_parameter(value=self.hid_init, shape=(1, self.size), name="hid_init", trainable=self.learn_init) def output(self, input_value): # Treat all dimensions after the second as flattened # feature dimensions if input_value.ndim > 3: input_value = T.flatten(input_value, 3) # Because scan iterates over the first dimension we # dimshuffle to (n_time_steps, n_batch, n_features) input_value = input_value.dimshuffle(1, 0, 2) seq_len, n_batch, _ = input_value.shape # Stack input weight matrices into a (num_inputs, 4 * num_units) # matrix, which speeds up computation weight_in_stacked = T.concatenate([ self.weight_in_to_ingate, self.weight_in_to_forgetgate, self.weight_in_to_cell, self.weight_in_to_outgate], axis=1) # Same for hidden weight matrices weight_hid_stacked = T.concatenate([ self.weight_hid_to_ingate, self.weight_hid_to_forgetgate, self.weight_hid_to_cell, self.weight_hid_to_outgate], axis=1) # Stack biases into a (4 * num_units) vector bias_stacked = T.concatenate([ self.bias_ingate, self.bias_forgetgate, self.bias_cell, self.bias_outgate], axis=0) if self.precompute_input: # Because the input is given for all time steps, we can # precompute_input the inputs dot weight matrices before scanning. # weight_in_stacked is (n_features, 4 * num_units). # Input: (n_time_steps, n_batch, 4 * num_units). input_value = T.dot(input_value, weight_in_stacked) + bias_stacked # When theano.scan calls step, input_n will be # (n_batch, 4 * num_units). We define a slicing function # that extract the input to each LSTM gate def slice_w(x, n): return x[:, n * self.size:(n + 1) * self.size] def one_lstm_step(input_n, cell_previous, hid_previous, *args): if not self.precompute_input: input_n = T.dot(input_n, weight_in_stacked) + bias_stacked # Calculate gates pre-activations and slice gates = input_n + T.dot(hid_previous, weight_hid_stacked) # Clip gradients if self.gradient_clipping: gates = theano.gradient.grad_clip( gates, -self.gradient_clipping, self.gradient_clipping) # Extract the pre-activation gate values ingate = slice_w(gates, 0) forgetgate = slice_w(gates, 1) cell_input = slice_w(gates, 2) outgate = slice_w(gates, 3) if self.peepholes: # Compute peephole connections ingate += cell_previous * self.weight_cell_to_ingate forgetgate += cell_previous * self.weight_cell_to_forgetgate # Apply nonlinearities ingate = self.activation_functions.ingate(ingate) forgetgate = self.activation_functions.forgetgate(forgetgate) cell_input = self.activation_functions.cell(cell_input) # Compute new cell value cell = forgetgate * cell_previous + ingate * cell_input if self.peepholes: outgate += cell * self.weight_cell_to_outgate outgate = self.activation_functions.outgate(outgate) # Compute new hidden unit activation hid = outgate * T.tanh(cell) return [cell, hid] ones = T.ones((n_batch, 1)) cell_init = T.dot(ones, self.cell_init) hid_init = T.dot(ones, self.hid_init) non_sequences = [weight_hid_stacked] # When we aren't precomputing the input outside of scan, we need to # provide the input weights and biases to the step function if not self.precompute_input: non_sequences += [weight_in_stacked, bias_stacked] # The "peephole" weight matrices are only used # when self.peepholes=True if self.peepholes: non_sequences += [self.weight_cell_to_ingate, self.weight_cell_to_forgetgate, self.weight_cell_to_outgate] if self.unroll_scan: # Retrieve the dimensionality of the incoming layer n_time_steps = self.input_shape[0] # Explicitly unroll the recurrence instead of using scan _, hid_out = unroll_scan( fn=one_lstm_step, sequences=[input_value], outputs_info=[cell_init, hid_init], go_backwards=self.backwards, non_sequences=non_sequences, n_steps=n_time_steps) else: (_, hid_out), _ = theano.scan( fn=one_lstm_step, sequences=input_value, outputs_info=[cell_init, hid_init], go_backwards=self.backwards, truncate_gradient=self.n_gradient_steps, non_sequences=non_sequences, strict=True) # When it is requested that we only return the final sequence step, # we need to slice it out immediately after scan is applied if self.only_return_final: return hid_out[-1] # dimshuffle back to (n_batch, n_time_steps, n_features)) hid_out = hid_out.dimshuffle(1, 0, 2) # if scan is backward reverse the output if self.backwards: hid_out = hid_out[:, ::-1] return hid_out
class BaseAssociative(BaseNetwork): """ Base class for associative learning. Parameters ---------- n_inputs : int Number of input units. n_outputs : int Number of output units. weight : array-like, Initializer Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} train(input_train, epochs=100) Train neural network. {BaseSkeleton.fit} """ n_inputs = IntProperty(minval=1, required=True) n_outputs = IntProperty(minval=1, required=True) weight = ParameterProperty(default=init.Normal()) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) self.init_layers() def init_layers(self): valid_weight_shape = (self.n_inputs, self.n_outputs) if isinstance(self.weight, init.Initializer): self.weight = self.weight.sample(valid_weight_shape) if self.weight.shape != valid_weight_shape: raise ValueError("Weight matrix has invalid shape. Got {}, " "expected {}".format(self.weight.shape, valid_weight_shape)) self.weight = self.weight.astype(float) def train(self, input_train, epochs=100): input_train = format_data(input_train, is_feature1d=True) return super(BaseAssociative, self).train(input_train=input_train, target_train=None, input_test=None, target_test=None, epochs=epochs, epsilon=None, summary='table')
class WolfeLineSearchForStep(StepSelectionBuiltIn, Configurable): """ Class that has all functions required in order to apply line search over step parameter that used during the network training. Parameters ---------- wolfe_maxiter : int Controls maximun number of iteration during the line search that identifies optimal step size during the weight update stage. Defaults to ``20``. wolfe_c1 : float Parameter for Armijo condition rule. It's used during the line search that identifies optimal step size during the weight update stage. Defaults ``1e-4``. wolfe_c2 : float Parameter for curvature condition rule. It's used during the line search that identifies optimal step size during the weight update stage. Defaults ``0.9``. """ wolfe_maxiter = IntProperty(default=20, minval=0) wolfe_c1 = NumberProperty(default=1e-4, minval=0) wolfe_c2 = NumberProperty(default=0.9, minval=0) def find_optimal_step(self, parameter_vector, parameter_update): network_inputs = self.variables.network_inputs network_output = self.variables.network_output layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): step = asfloat(step) updated_params = parameter_vector + step * parameter_update # This trick allow us to replace shared variables # with tensorflow variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + get_variable_size(param) updated_param_value = tf.reshape( updated_params[start_pos:end_pos], param.shape) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(*network_inputs) # Restore previous parameters for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) gradient, = tf.gradients(error_func, step) return gradient return line_search(phi, derphi, self.wolfe_maxiter, self.wolfe_c1, self.wolfe_c2)
class BaseStepAssociative(BaseAssociative): """ Base class for associative algorithms which have 2 layers and first one is has step function as activation. Parameters ---------- {BaseAssociative.n_inputs} {BaseAssociative.n_outputs} n_unconditioned : int Number of unconditioned units in neraul networks. All these units wouldn't update during the training procedure. Unconditioned should be the first feature in the dataset. weight : array-like Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Defaults to ``None`` which means that all unconditional weights will be equal to ``1``. Other weights equal to ``0``. bias : array-like, Initializer Neural network bias units. Defaults to :class:`Constant(-0.5) <neupy.init.Constant>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} """ n_inputs = IntProperty(minval=2, required=True) n_unconditioned = IntProperty(minval=1, required=True) weight = ArrayProperty() bias = ParameterProperty(default=init.Constant(-0.5)) def init_layers(self): if self.n_inputs <= self.n_unconditioned: raise ValueError( "Number of uncondition features should be less than total " "number of features. `n_inputs`={} and " "`n_unconditioned`={}".format(self.n_inputs, self.n_unconditioned)) valid_weight_shape = (self.n_inputs, self.n_outputs) valid_bias_shape = (self.n_outputs, ) if self.weight is None: self.weight = np.zeros(valid_weight_shape) self.weight[:self.n_unconditioned, :] = 1 if isinstance(self.bias, init.Initializer): self.bias = self.bias.sample(valid_bias_shape) super(BaseStepAssociative, self).init_layers() if self.bias.shape != valid_bias_shape: raise ValueError("Bias vector has invalid shape. Got {}, " "expected {}".format(self.bias.shape, valid_bias_shape)) self.bias = self.bias.astype(float) def predict(self, input_data): input_data = format_data(input_data, is_feature1d=False) raw_output = input_data.dot(self.weight) + self.bias return np.where(raw_output > 0, 1, 0) def train(self, input_train, *args, **kwargs): input_train = format_data(input_train, is_feature1d=False) return super(BaseStepAssociative, self).train(input_train, *args, **kwargs) def train_epoch(self, input_train, target_train): weight = self.weight n_unconditioned = self.n_unconditioned predict = self.predict weight_delta = self.weight_delta for input_row in input_train: input_row = np.reshape(input_row, (1, input_row.size)) layer_output = predict(input_row) weight[n_unconditioned:, :] += weight_delta( input_row, layer_output)
class CMAC(BaseNetwork): """ Cerebellar Model Articulation Controller (CMAC) Network based on memory. Notes ----- - Network always use Mean Absolute Error (MAE). - Network works for multi dimensional target values. Parameters ---------- quantization : int Network transforms every input to discrete value. Quantization value controls number of total number of categories after quantization, defaults to ``10``. associative_unit_size : int Number of associative blocks in memory, defaults to ``2``. {BaseNetwork.Parameters} Attributes ---------- weight : dict Network's weight that contains memorized patterns. Methods ------- {BaseSkeleton.predict} train(X_train, y_train, X_test=None, y_test=None, epochs=100) Trains the network to the data X. Network trains until maximum number of ``epochs`` was reached. {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy.algorithms import CMAC >>> >>> train_space = np.linspace(0, 2 * np.pi, 100) >>> test_space = np.linspace(np.pi, 2 * np.pi, 50) >>> >>> X_train = np.reshape(train_space, (100, 1)) >>> X_test = np.reshape(test_space, (50, 1)) >>> >>> y_train = np.sin(X_train) >>> y_test = np.sin(X_test) >>> >>> cmac = CMAC( ... quantization=100, ... associative_unit_size=32, ... step=0.2, ... ) ... >>> cmac.train(X_train, y_train, epochs=100) >>> >>> predicted_test = cmac.predict(X_test) >>> cmac.score(y_test, predicted_test) 0.0023639417543036569 """ quantization = IntProperty(default=10, minval=1) associative_unit_size = IntProperty(default=2, minval=2) def __init__(self, **options): self.weight = {} super(CMAC, self).__init__(**options) def predict(self, X): X = format_data(X) get_memory_coords = self.get_memory_coords get_result_by_coords = self.get_result_by_coords predicted = [] for input_sample in self.quantize(X): coords = get_memory_coords(input_sample) predicted.append(get_result_by_coords(coords)) return np.array(predicted) def get_result_by_coords(self, coords): return sum(self.weight.setdefault(coord, 0) for coord in coords) / self.associative_unit_size def get_memory_coords(self, quantized_value): assoc_unit_size = self.associative_unit_size for i in range(assoc_unit_size): point = ((quantized_value + i) / assoc_unit_size).astype(int) yield tuple(np.concatenate([point, [i]])) def quantize(self, X): return (X * self.quantization).astype(int) def one_training_update(self, X_train, y_train): get_memory_coords = self.get_memory_coords get_result_by_coords = self.get_result_by_coords weight = self.weight step = self.step n_samples = X_train.shape[0] quantized_input = self.quantize(X_train) errors = 0 for input_sample, target_sample in zip(quantized_input, y_train): coords = list(get_memory_coords(input_sample)) predicted = get_result_by_coords(coords) error = target_sample - predicted for coord in coords: weight[coord] += step * error errors += sum(abs(error)) return errors / n_samples def score(self, X, y): predicted = self.predict(X) return np.mean(np.abs(predicted - y)) def train(self, X_train, y_train, X_test=None, y_test=None, epochs=100): is_test_data_partialy_missed = ((X_test is None and y_test is not None) or (X_test is not None and y_test is None)) if is_test_data_partialy_missed: raise ValueError("Input and target test samples are missed. " "They must be defined together or none of them.") X_train = format_data(X_train) y_train = format_data(y_train) if X_test is not None: X_test = format_data(X_test) y_test = format_data(y_test) return super(CMAC, self).train(X_train, y_train, X_test, y_test, epochs=epochs)
class CMAC(SupervisedLearning, BaseNetwork): """ CMAC Network based on memory. Notes ----- * Network always use Mean Absolute Error (MAE). * Works for multi dimensional target values. Parameters ---------- quantization : int Network transform every input to discrete values. Quantization value contol number of total possible values after quantization, defaults to ``10``. associative_unit_size : int Number of associative blocks in memory, defaults to ``2``. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} Attributes ---------- weights : dict Neural network weights that contain memorized patterns. Methods ------- {BaseSkeleton.predict} {SupervisedLearning.train} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy.algorithms import CMAC >>> >>> train_space = np.linspace(0, 2 * np.pi, 100) >>> test_space = np.linspace(np.pi, 2 * np.pi, 50) >>> >>> input_train = np.reshape(train_space, (100, 1)) >>> input_test = np.reshape(test_space, (50, 1)) >>> >>> target_train = np.sin(input_train) >>> target_test = np.sin(input_test) >>> >>> cmac = CMAC( ... quantization=100, ... associative_unit_size=32, ... step=0.2, ... ) ... >>> cmac.train(input_train, target_train, epochs=100) >>> predicted_test = cmac.predict(input_test) >>> cmac.error(target_test, predicted_test) 0.0023639417543036569 """ quantization = IntProperty(default=10, minval=1) associative_unit_size = IntProperty(default=2, minval=2) def __init__(self, **options): self.weights = {} super(CMAC, self).__init__(**options) def predict(self, input_data): input_data = format_data(input_data) get_memory_coords = self.get_memory_coords get_result_by_coords = self.get_result_by_coords predicted = [] for input_sample in self.quantize(input_data): coords = get_memory_coords(input_sample) predicted.append(get_result_by_coords(coords)) return array(predicted) def get_result_by_coords(self, coords): return sum( self.weights.setdefault(coord, 0) for coord in coords ) / self.associative_unit_size def get_memory_coords(self, quantized_value): assoc_unit_size = self.associative_unit_size for i in range(assoc_unit_size): point = ((quantized_value + i) / assoc_unit_size).astype(int) yield tuple(concatenate([point, [i]])) def quantize(self, input_data): return (input_data * self.quantization).astype(int) def train_epoch(self, input_train, target_train): get_memory_coords = self.get_memory_coords get_result_by_coords = self.get_result_by_coords weights = self.weights step = self.step quantized_input = self.quantize(input_train) errors = 0 for input_sample, target_sample in zip(quantized_input, target_train): coords = list(get_memory_coords(input_sample)) predicted = get_result_by_coords(coords) error = target_sample - predicted for coord in coords: weights[coord] += step * error errors += abs(error) return errors / input_train.shape[0]
class ParameterBasedLayer(BaseLayer): """ Layer that creates weight and bias parameters. Parameters ---------- size : int Layer's output size. weight : array-like, Theano variable, scalar or Initializer Defines layer's weights. Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`XavierNormal() <neupy.init.XavierNormal>`. bias : 1D array-like, Theano variable, scalar, Initializer or None Defines layer's bias. Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`Constant(0) <neupy.init.Constant>`. The ``None`` value excludes bias from the calculations and do not add it into parameters list. {BaseLayer.Parameters} Methods ------- {BaseLayer.Methods} Attributes ---------- {BaseLayer.Attributes} """ size = IntProperty(minval=1) weight = ParameterProperty(default=init.XavierNormal()) bias = ParameterProperty(default=init.Constant(value=0), allow_none=True) def __init__(self, size, **options): super(ParameterBasedLayer, self).__init__(size=size, **options) @property def weight_shape(self): return as_tuple(self.input_shape, self.output_shape) @property def bias_shape(self): if self.bias is not None: return as_tuple(self.output_shape) def initialize(self): super(ParameterBasedLayer, self).initialize() self.add_parameter(value=self.weight, name='weight', shape=self.weight_shape, trainable=True) if self.bias is not None: self.add_parameter(value=self.bias, name='bias', shape=self.bias_shape, trainable=True) def __repr__(self): classname = self.__class__.__name__ return '{name}({size})'.format(name=classname, size=self.size)
class Oja(UnsupervisedLearning, BaseNetwork): """ Oja unsupervised algorithm that minimize input data feature space. Notes ----- * In practice use step as very small value. For example ``1e-7``. * Normalize the input data before use Oja algorithm. Input data \ shouldn't contains large values. * Set up smaller values for weights if error for a few first iterations \ is big compare to the input values scale. For example, if your input \ data have values between 0 and 1 error value equal to 100 is big. Parameters ---------- minimized_data_size : int Expected number of features after minimization, defaults to ``1`` weights : array-like or ``None`` Predefine default weights which controll your data in two sides. If weights are, ``None`` before train algorithms generate random weights. Defaults to ``None``. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- reconstruct(input_data): Reconstruct your minimized data. {BaseSkeleton.predict} {UnsupervisedLearning.train} {BaseSkeleton.fit} {BaseNetwork.plot_errors} Raises ------ ValueError * Try reconstruct without training. * Invalid number of input data features for ``train`` and \ ``reconstruct`` methods. Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> data = np.array([[2, 2], [1, 1], [4, 4], [5, 5]]) >>> >>> ojanet = algorithms.Oja( ... minimized_data_size=1, ... step=0.01, ... verbose=False ... ) >>> >>> ojanet.train(data, epsilon=1e-5) >>> minimized = ojanet.predict(data) >>> minimized array([[-2.82843122], [-1.41421561], [-5.65686243], [-7.07107804]]) >>> ojanet.reconstruct(minimized) array([[ 2.00000046, 2.00000046], [ 1.00000023, 1.00000023], [ 4.00000093, 4.00000093], [ 5.00000116, 5.00000116]]) """ minimized_data_size = IntProperty(minval=1) weights = ArrayProperty() def init_properties(self): del self.shuffle_data super(Oja, self).init_properties() def train_epoch(self, input_data, target_train): weights = self.weights minimized = dot(input_data, weights) reconstruct = dot(minimized, weights.T) error = input_data - reconstruct weights += self.step * dot(error.T, minimized) mae = np_sum(np_abs(error)) / input_data.size # Clear memory del minimized del reconstruct del error return mae def train(self, input_data, epsilon=1e-2, epochs=100): input_data = format_data(input_data) n_input_features = input_data.shape[1] if self.weights is None: self.weights = randn(n_input_features, self.minimized_data_size) if n_input_features != self.weights.shape[0]: raise ValueError( "Invalid number of features. Expected {}, got {}".format( self.weights.shape[0], n_input_features)) super(Oja, self).train(input_data, epsilon=epsilon, epochs=epochs) def reconstruct(self, input_data): if self.weights is None: raise ValueError("Train network before use reconstruct method.") input_data = format_data(input_data) if input_data.shape[1] != self.minimized_data_size: raise ValueError("Invalid input data feature space, expected " "{}, got {}.".format(input_data.shape[1], self.minimized_data_size)) return dot(input_data, self.weights.T) def predict(self, input_data): if self.weights is None: raise ValueError("Train network before use prediction method.") input_data = format_data(input_data) return dot(input_data, self.weights)
class ParameterBasedLayer(BaseLayer): """ Layer that creates weight and bias parameters. Parameters ---------- size : int Layer input size. weight : 2D array-like or None Define your layer weights. ``None`` means that your weights will be generate randomly dependence on property ``init_method``. ``None`` by default. bias : 1D array-like or None Define your layer bias. ``None`` means that your weights will be generate randomly dependence on property ``init_method``. init_method : {{'bounded', 'normal', 'ortho', 'xavier_normal',\ 'xavier_uniform', 'he_normal', 'he_uniform'}} Weight initialization method. Defaults to ``xavier_normal``. * ``normal`` will generate random weights from normal distribution \ with standard deviation equal to ``0.01``. * ``bounded`` generate random weights from Uniform distribution. * ``ortho`` generate random orthogonal matrix. * ``xavier_normal`` generate random matrix from normal distrubtion \ where variance equal to :math:`\\frac{{2}}{{fan_{{in}} + \ fan_{{out}}}}`. Where :math:`fan_{{in}}` is a number of \ layer input units and :math:`fan_{{out}}` - number of layer \ output units. * ``xavier_uniform`` generate random matrix from uniform \ distribution \ where :math:`w_{{ij}} \in \ [-\\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}, \ \\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}`]. * ``he_normal`` generate random matrix from normal distrubtion \ where variance equal to :math:`\\frac{{2}}{{fan_{{in}}}}`. \ Where :math:`fan_{{in}}` is a number of layer input units. * ``he_uniform`` generate random matrix from uniformal \ distribution where :math:`w_{{ij}} \in [\ -\\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}, \ \\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}]` bounds : tuple of two float Available only for ``init_method`` equal to ``bounded``. Value identify minimum and maximum possible value in random weights. Defaults to ``(0, 1)``. """ size = IntProperty(minval=1) weight = SharedArrayProperty(default=None) bias = SharedArrayProperty(default=None) bounds = TypedListProperty(default=(0, 1), element_type=(int, float)) init_method = ChoiceProperty(default=XAVIER_NORMAL, choices=VALID_INIT_METHODS) def __init__(self, size, **options): if size is not None: options['size'] = size super(ParameterBasedLayer, self).__init__(**options) def weight_shape(self): output_size = self.relate_to_layer.size return (self.size, output_size) def bias_shape(self): output_size = self.relate_to_layer.size return (output_size,) def initialize(self): super(ParameterBasedLayer, self).initialize() self.weight = create_shared_parameter( value=self.weight, name='weight_{}'.format(self.layer_id), shape=self.weight_shape(), bounds=self.bounds, init_method=self.init_method, ) self.bias = create_shared_parameter( value=self.bias, name='bias_{}'.format(self.layer_id), shape=self.bias_shape(), bounds=self.bounds, init_method=self.init_method, ) self.parameters = [self.weight, self.bias] def __repr__(self): classname = self.__class__.__name__ return '{name}({size})'.format(name=classname, size=self.size)
class Concatenate(BaseLayer): """ Concatenate multiple inputs into one. Inputs will be concatenated over the specified axis (controlled with parameter ``axis``). Parameters ---------- axis : int The axis along which the inputs will be concatenated. Default is ``-1``. {BaseLayer.name} Methods ------- {BaseLayer.Methods} Attributes ---------- {BaseLayer.Attributes} Examples -------- >>> from neupy.layers import * >>> network = (Input(10) | Input(20)) >> Concatenate() [(?, 10), (?, 20)] -> [... 3 layers ...] -> (?, 30) """ axis = IntProperty() def __init__(self, axis=-1, name=None): super(Concatenate, self).__init__(name=name) self.axis = axis def get_output_shape(self, *input_shapes): input_shapes = [tf.TensorShape(shape) for shape in input_shapes] # The axis value has 0-based indeces where 0s index points # to the batch dimension of the input. Shapes in the neupy # do not store information about the batch and we need to # put None value on the 0s position. valid_shape = input_shapes[0] if any(shape.ndims is None for shape in input_shapes): return tf.TensorShape(None) # Avoid using negative indeces possible_axes = list(range(len(valid_shape))) concat_axis = possible_axes[self.axis] for input_shape in input_shapes[1:]: if len(valid_shape) != len(input_shape): raise LayerConnectionError( "Cannot concatenate layers, because inputs have " "different number of dimensions. Shapes: {} and {}" "".format(valid_shape, input_shape)) for axis, axis_size in enumerate(input_shape): if axis != concat_axis and valid_shape[axis] != axis_size: raise LayerConnectionError( "Cannot concatenate layers, because some of them " "don't match over dimension #{} (0-based indeces). " "Shapes: {} and {}" "".format(axis, valid_shape, input_shape)) output_shape = input_shapes.pop(0) output_shape = [dim.value for dim in output_shape.dims] for input_shape in input_shapes: output_shape[self.axis] += input_shape[self.axis] return tf.TensorShape(output_shape) def output(self, *inputs, **kwargs): return tf.concat(inputs, axis=self.axis)