def init_train_updates(self): network_inputs = self.variables.network_inputs network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in params]) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient)) param_delta = -new_inv_hessian.dot(full_gradient) layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): updated_params = param_vector + step * param_delta # This trick allow us to replace shared variables # with theano variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + param.size updated_param_value = T.reshape( updated_params[start_pos:end_pos], param.shape) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(*network_inputs) # Restore previous parameters for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates
def load(connection, source, ignore_missed=False): """ Load and set parameters for layers from the specified source. Parameters ---------- connection : list of layers or connection source : str or dict It can be path to the pickle file that stores parameters or dictionary that has key values that store layer name and values is a dictionary that stores parameter names and their values. ignore_missed : bool ``False`` means that error will be triggered in case if some of the layers doesn't have storage parameters in the specified source. Defaults to ``False``. Raises ------ TypeError In case if source has invalid data type. """ if isinstance(connection, BaseNetwork): connection = connection.connection if isinstance(source, six.string_types): with open(source, 'rb') as f: data = pickle.load(f) elif isinstance(source, dict): data = source else: raise TypeError("Source type is unknown. Got {}, expected dict " "or str".format(type(source))) for layer, attrname, _ in iter_parameters(connection): if layer.name not in data or attrname not in data[layer.name]: if ignore_missed: continue raise ValueError("Cannot load parameters from the specified " "data source. Layer `{}` doesn't have " "stored parameter `{}`." "".format(layer.name, attrname)) loaded_parameter = data[layer.name][attrname] attrvalue = getattr(layer, attrname) attrvalue.set_value(asfloat(loaded_parameter)) # We need to initalize connection, to make sure # that each layer will generate shared variables # and validate connections connection.initialize()
def iter_params_and_grads(self): layers, parameters = [], [] for layer, _, parameter in iter_parameters(self.layers): layers.append(layer) parameters.append(parameter) gradients = tf.gradients(self.variables.error_func, parameters) iterator = zip(layers, parameters, gradients) for layer, parameter, gradient in iterator: yield layer, parameter, gradient
def init_train_updates(self): """ Initialize updates that would be applied after each training epoch. """ updates = [] for layer, _, parameter in iter_parameters(self.layers): updates.extend(self.init_param_updates(layer, parameter)) for layer in self.layers: updates.extend(layer.updates) return updates
def init_train_updates(self): original_updates = super(WeightDecay, self).init_train_updates() parameters = [param for _, _, param in iter_parameters(self.layers)] modified_updates = [] step = self.variables.step decay_rate = asfloat(self.decay_rate) for parameter, updated in original_updates: if parameter in parameters: updated -= step * decay_rate * parameter modified_updates.append((parameter, updated)) return modified_updates
def init_train_updates(self): original_updates = super(WeightElimination, self).init_train_updates() parameters = [param for _, _, param in iter_parameters(self.layers)] modified_updates = [] step = self.variables.step decay_koef = asfloat(self.decay_rate * step) zero_weight_square = asfloat(self.zero_weight**2) for parameter, updated in original_updates: if parameter in parameters: updated -= decay_koef * ( (2 * parameter / zero_weight_square) / tf.square(1 + tf.square(parameter) / zero_weight_square)) modified_updates.append((parameter, updated)) return modified_updates
def parameter_values(connection): """ Iterate over all network's trainable parameters. Parameters ---------- connection : layer, connection Yields ------ Theano shared variable Network's trainable parameter. """ parameters = [] for _, _, parameter in iter_parameters(connection): parameters.append(parameter) return parameters
def parameter_values(connection): """ List of all trainable parameters in the network. Parameters ---------- connection : layer, connection Yields ------ Theano shared variable Network's trainable parameter. """ parameters = [] for _, _, parameter in iter_parameters(connection): parameters.append(parameter) return parameters
def save(connection, filepath): """ Save layer parameters in pickle file. Parameters ---------- connection : network, list of layer or connection Connection that needs to be saved. filepath : str Path to the pickle file that will store network's parameters. """ if isinstance(connection, BaseNetwork): connection = connection.connection data = defaultdict(dict) for layer, attrname, parameter in iter_parameters(connection): data[layer.name][attrname] = parameter.get_value() with open(filepath, 'wb+') as f: pickle.dump(data, f)
def find_optimal_step(self, parameter_vector, parameter_update): network_inputs = self.variables.network_inputs network_output = self.variables.network_output layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): step = asfloat(step) updated_params = parameter_vector + step * parameter_update # This trick allow us to replace shared variables # with tensorflow variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + get_variable_size(param) updated_param_value = tf.reshape( updated_params[start_pos:end_pos], param.shape) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(*network_inputs) # Restore previous parameters for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) gradient, = tf.gradients(error_func, step) return gradient return line_search(phi, derphi, self.wolfe_maxiter, self.wolfe_c1, self.wolfe_c2)
def init_train_updates(self): network_input = self.variables.network_input network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = parameter_values(self.connection) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient) ) param_delta = -new_inv_hessian.dot(full_gradient) layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): updated_params = param_vector + step * param_delta # This trick allow us to replace shared variables # with theano variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + param.size updated_param_value = T.reshape( updated_params[start_pos:end_pos], param.shape ) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(network_input) # We need to replace back parameter to shared variable for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates