def init_functions(self): loss = self.loss(self.target, self.network.outputs) val_loss = self.loss(self.target, self.network.training_outputs) if self.regularizer is not None: loss += self.regularizer(self.network) self.variables.update( step=self.step, loss=loss, val_loss=val_loss, ) with tf.name_scope('training-updates'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): training_updates = self.init_train_updates() training_updates.extend(update_ops) tf_utils.initialize_uninitialized_variables() self.functions.update( predict=tf_utils.function(inputs=as_tuple(self.network.inputs), outputs=self.network.outputs, name='optimizer/predict'), one_training_update=tf_utils.function( inputs=as_tuple(self.network.inputs, self.target), outputs=loss, updates=training_updates, name='optimizer/one-update-step'), score=tf_utils.function(inputs=as_tuple(self.network.inputs, self.target), outputs=val_loss, name='optimizer/score'))
def test_function_with_updates(self): x = tf.placeholder(name='x', dtype=tf.float32) w = tf.Variable(asfloat(np.ones((4, 3))), name='w') b = tf.Variable(asfloat(np.ones((3, ))), name='b') y = tf.matmul(x, w) + b prediction = tf_utils.function([x], y, updates=[ (b, b - 0.5), w.assign(w + 0.5), ]) tf_utils.initialize_uninitialized_variables() actual = prediction(np.random.random((7, 4))) self.assertEqual(actual.shape, (7, 3)) np.testing.assert_array_almost_equal( self.eval(w), 1.5 * np.ones((4, 3)), ) np.testing.assert_array_almost_equal( self.eval(b), 0.5 * np.ones((3, )), )
def test_setup_parameter_updates(self): w1 = tf.Variable(np.ones((4, 3))) b1 = tf.Variable(np.zeros((3, ))) w2 = tf.Variable(np.ones((3, 2))) tf_utils.initialize_uninitialized_variables([w1, b1, w2]) updates = 2 * tf_utils.make_single_vector([w1, b1, w2]) + 1 updates = tf_utils.setup_parameter_updates([w1, b1, w2], updates) sess = tf_utils.tensorflow_session() for parameter, new_value in updates: sess.run(parameter.assign(new_value)) np.testing.assert_array_almost_equal( self.eval(w1), 3 * np.ones((4, 3)), ) np.testing.assert_array_almost_equal( self.eval(b1), np.ones(3), ) np.testing.assert_array_almost_equal( self.eval(w2), 3 * np.ones((3, 2)), )
def test_function_without_updates(self): x = tf.placeholder(name='x', dtype=tf.float32) w = tf.Variable(asfloat(np.random.random((4, 3))), name='w') b = tf.Variable(asfloat(np.random.random((3, ))), name='b') y = tf.matmul(x, w) + b prediction = tf_utils.function([x], y) tf_utils.initialize_uninitialized_variables() actual = prediction(np.random.random((7, 4))) self.assertEqual(actual.shape, (7, 3))
def test_initialize_uninitialized_variables(self): sess = tf_utils.tensorflow_session() a = tf.Variable(np.ones((4, 3)), name='a') b = tf.Variable(np.ones((4, 3)), name='b') tf_utils.initialize_uninitialized_variables() actual = sess.run(a + b) np.testing.assert_array_almost_equal(actual, 2 * np.ones((4, 3))) c = tf.Variable(np.ones((2, 3)), name='c') d = tf.Variable(np.ones((2, 3)), name='dx') tf_utils.initialize_uninitialized_variables([c]) with self.assertRaisesRegexp(FailedPreconditionError, "value dx"): sess.run(c + d)
def save_dict(network): """ Save network into the dictionary. Parameters ---------- network : network, list of layer or network Returns ------- dict Saved parameters and information about network in dictionary using specific format. Learn more about the NeuPy's storage format in the official documentation. Examples -------- >>> from neupy import layers, storage >>> >>> network = layers.Input(10) >> layers.Softmax(3) >>> layers_data = storage.save_dict(network) >>> >>> layers_data.keys() ['layers', 'graph', 'metadata'] """ network = extract_network(network) network.create_variables() session = tf_utils.tensorflow_session() tf_utils.initialize_uninitialized_variables() data = { 'metadata': { 'language': 'python', 'library': 'neupy', 'version': neupy.__version__, 'created': strftime("%a, %d %b %Y %H:%M:%S %Z", gmtime()), }, # Make it as a list in order to save the right order # of paramters, otherwise it can be convert to the dictionary. 'graph': network.layer_names_only(), 'layers': [], } for layer in network: parameters = {} configs = {} for attrname, parameter in layer.variables.items(): parameters[attrname] = { 'value': asfloat(session.run(parameter)), 'trainable': parameter.trainable, } for option_name in layer.options: if option_name not in parameters: configs[option_name] = getattr(layer, option_name) data['layers'].append({ 'class_name': layer.__class__.__name__, 'name': layer.name, 'parameters': parameters, 'configs': configs, }) return data
def init_methods(self): def free_energy(visible_sample): with tf.name_scope('free-energy'): wx = tf.matmul(visible_sample, self.weight) wx_b = wx + self.hidden_bias visible_bias_term = dot(visible_sample, self.visible_bias) # We can get infinity when wx_b is a relatively large number # (maybe 100). Taking exponent makes it even larger and # for with float32 it can convert it to infinity. But because # number is so large we don't care about +1 value before taking # logarithms and therefore we can just pick value as it is # since our operation won't change anything. hidden_terms = tf.where( # exp(30) is such a big number that +1 won't # make any difference in the outcome. tf.greater(wx_b, 30), wx_b, tf.log1p(tf.exp(wx_b)), ) hidden_term = tf.reduce_sum(hidden_terms, axis=1) return -(visible_bias_term + hidden_term) def visible_to_hidden(visible_sample): with tf.name_scope('visible-to-hidden'): wx = tf.matmul(visible_sample, self.weight) wx_b = wx + self.hidden_bias return tf.nn.sigmoid(wx_b) def hidden_to_visible(hidden_sample): with tf.name_scope('hidden-to-visible'): wx = tf.matmul(hidden_sample, self.weight, transpose_b=True) wx_b = wx + self.visible_bias return tf.nn.sigmoid(wx_b) def sample_hidden_from_visible(visible_sample): with tf.name_scope('sample-hidden-to-visible'): hidden_prob = visible_to_hidden(visible_sample) hidden_sample = random_binomial(hidden_prob) return hidden_sample def sample_visible_from_hidden(hidden_sample): with tf.name_scope('sample-visible-to-hidden'): visible_prob = hidden_to_visible(hidden_sample) visible_sample = random_binomial(visible_prob) return visible_sample network_input = self.network_input network_hidden_input = self.network_hidden_input input_shape = tf.shape(network_input) n_samples = input_shape[0] weight = self.weight h_bias = self.hidden_bias v_bias = self.visible_bias h_samples = self.h_samples step = asfloat(self.step) with tf.name_scope('positive-values'): # We have to use `cond` instead of `where`, because # different if-else cases might have different shapes # and it triggers exception in tensorflow. v_pos = tf.cond( tf.equal(n_samples, self.batch_size), lambda: network_input, lambda: random_sample(network_input, self.batch_size)) h_pos = visible_to_hidden(v_pos) with tf.name_scope('negative-values'): v_neg = sample_visible_from_hidden(h_samples) h_neg = visible_to_hidden(v_neg) with tf.name_scope('weight-update'): weight_update = ( tf.matmul(v_pos, h_pos, transpose_a=True) - tf.matmul(v_neg, h_neg, transpose_a=True)) / asfloat(n_samples) with tf.name_scope('hidden-bias-update'): h_bias_update = tf.reduce_mean(h_pos - h_neg, axis=0) with tf.name_scope('visible-bias-update'): v_bias_update = tf.reduce_mean(v_pos - v_neg, axis=0) with tf.name_scope('flipped-input-features'): # Each row will have random feature marked with number 1 # Other values will be equal to 0 possible_feature_corruptions = tf.eye(self.n_visible) corrupted_features = random_sample(possible_feature_corruptions, n_samples) rounded_input = tf.round(network_input) # If we scale input values from [0, 1] range to [-1, 1] # than it will be easier to flip feature values with simple # multiplication. scaled_rounded_input = 2 * rounded_input - 1 scaled_flipped_rounded_input = ( # for corrupted_features we convert 0 to 1 and 1 to -1 # in this way after multiplication we will flip all # signs where -1 in the transformed corrupted_features (-2 * corrupted_features + 1) * scaled_rounded_input) # Scale it back to the [0, 1] range flipped_rounded_input = (scaled_flipped_rounded_input + 1) / 2 with tf.name_scope('pseudo-likelihood-loss'): # Stochastic pseudo-likelihood error = tf.reduce_mean(self.n_visible * tf.log_sigmoid( free_energy(flipped_rounded_input) - free_energy(rounded_input))) with tf.name_scope('gibbs-sampling'): gibbs_sampling = sample_visible_from_hidden( sample_hidden_from_visible(network_input)) tf_utils.initialize_uninitialized_variables() self.weight_update_one_step = tf_utils.function( [network_input], error, name='rbm/train-epoch', updates=[ (weight, weight + step * weight_update), (h_bias, h_bias + step * h_bias_update), (v_bias, v_bias + step * v_bias_update), (h_samples, random_binomial(p=h_neg)), ]) self.score_func = tf_utils.function( [network_input], error, name='rbm/prediction-error', ) self.visible_to_hidden_one_step = tf_utils.function( [network_input], visible_to_hidden(network_input), name='rbm/visible-to-hidden', ) self.hidden_to_visible_one_step = tf_utils.function( [network_hidden_input], hidden_to_visible(network_hidden_input), name='rbm/hidden-to-visible', ) self.gibbs_sampling_one_step = tf_utils.function( [network_input], gibbs_sampling, name='rbm/gibbs-sampling', )
def training_outputs(self): networks_output = self.output(*as_tuple(self.inputs), training=True) tf_utils.initialize_uninitialized_variables() return networks_output