def model(lr, outqueue, training: bool, inputs, labels): h1Size = 320 h2Size = 10 droprate = 0.2 relu1 = dense_layer(h1Size, inputs, "d1") # Use the IPU optimised version of dropout: if training: drop1 = rand_ops.dropout(relu1, rate=droprate) else: drop1 = relu1 relu2 = dense_layer(h2Size, drop1, "d2") with tf.variable_scope("metrics", reuse=tf.AUTO_REUSE, use_resource=True): acc, acc_op = tf.metrics.accuracy(labels=labels, predictions=tf.argmax( relu2, axis=1, output_type=tf.dtypes.int32), name="accuracy") loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=relu2) if training: with tf.variable_scope("training", reuse=tf.AUTO_REUSE, use_resource=True): optimiser = tf.train.MomentumOptimizer( learning_rate=lr, momentum=0.0001, use_nesterov=True, name='optimise') train_op = optimiser.minimize(loss) with tf.control_dependencies([train_op, acc_op]): mean_loss = tf.reduce_mean(loss, name='train_loss') else: with tf.control_dependencies([acc_op]): mean_loss = tf.reduce_mean(loss, name='test_loss') return outqueue.enqueue({'mean_loss': mean_loss, 'acc': acc})
def stage(x, name): with variable_scope.variable_scope(name, use_resource=True): weight = variable_scope.get_variable( "w", shape=[4, 4], dtype=np.float32, initializer=init_ops.ones_initializer()) x = math_ops.matmul(x, weight) x = rand_ops.dropout(x, seed=[10, 10]) return x
def _apply_dropout(self, inputs, training): if not training: return inputs # Apply the same dropout mask across the sequence - this function is called # when the inputs is shaped as [S, B, N]. noise_shape = inputs.get_shape().as_list() noise_shape[0] = 1 return rand_ops.dropout(inputs, seed=self._dropout_seed, rate=self._dropout, noise_shape=noise_shape, name=self.name + "_dropout")
def stage_1(lr, inputs, labels): # Gen counter to keep track of last-iteration for dense-gradient computation with tf.variable_scope("counter", reuse=tf.AUTO_REUSE, use_resource=True): itr_counter = tf.get_variable("iterations", shape=[], dtype=tf.int32, trainable=False, initializer=tf.zeros_initializer()) inc = tf.assign_add(itr_counter, 1) mod_itrs = tf.math.floormod(inc, iterations_per_dense_grad) last_itr = tf.equal(mod_itrs, 0) fc1 = fc_layers['fc1'] relu1 = fc1(inputs, dense_grad_enabled and last_itr) # Use the IPU optimised version of dropout: if training: drop1 = rand_ops.dropout(relu1, rate=droprate) else: drop1 = relu1 return lr, labels, drop1, last_itr
def model(fc_layers, droprate, lr, iterations_per_step, training: bool, last_outqueue, inputs, labels): with tf.variable_scope("counter", reuse=tf.AUTO_REUSE, use_resource=True): itr_counter = tf.get_variable("iterations", shape=[], dtype=tf.int32, initializer=tf.zeros_initializer()) mod_itrs = tf.math.floormod(itr_counter, iterations_per_step) last_itr = tf.equal(mod_itrs, 0) inc = tf.assign_add(itr_counter, 1) fc1 = fc_layers['fc1'] with tf.variable_scope('fc1', reuse=tf.AUTO_REUSE, use_resource=True): relu1 = fc1(inputs, last_itr) # Use the IPU optimised version of dropout: if training: drop1 = rand_ops.dropout(relu1, rate=droprate) else: drop1 = relu1 fc2 = fc_layers['fc2'] with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE, use_resource=True): relu2 = fc2(drop1, last_itr) with tf.variable_scope("metrics", reuse=tf.AUTO_REUSE, use_resource=True): acc, acc_op = tf.metrics.accuracy(labels=labels, predictions=tf.argmax( relu2, axis=1, output_type=tf.dtypes.int32), name="accuracy") loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=relu2) if training: with tf.variable_scope("training", reuse=tf.AUTO_REUSE, use_resource=True): optimiser = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.0001, use_nesterov=True, name='optimise') train_op = optimiser.minimize(loss) momentum_slot_names = optimiser.get_slot_names() logger.debug(f"Optimiser slot names: {momentum_slot_names}") with tf.control_dependencies([train_op, acc_op]): mean_loss = tf.reduce_mean(loss, name='train_loss') else: with tf.control_dependencies([acc_op]): mean_loss = tf.reduce_mean(loss, name='test_loss') # Prepare results for feeds: last_results = {'mean_loss': mean_loss, 'acc': acc} for name, fc in fc_layers.items(): if fc.is_sparse(): with tf.variable_scope(name, reuse=True): weights_tensor = tf.convert_to_tensor(fc.get_values_var()) last_results[name + '_non_zeros'] = weights_tensor if training: dense_grad_w = fc.get_dense_grad_w(loss) fc.record_momentum_var(optimiser, momentum_slot_names[0]) last_results[name + '_momentum'] = tf.convert_to_tensor( fc.momentum_var) last_results[name + '_grad_w'] = tf.convert_to_tensor( dense_grad_w) # When training we only want to return the sparse # non-zero weight values on the last iteration. if training: def enqueue_last_itr(): enqueue_weights = last_outqueue.enqueue(last_results) with tf.control_dependencies([enqueue_weights]): return tf.no_op() def nop(): return tf.no_op() cond_op = tf.cond(last_itr, enqueue_last_itr, nop) enqueue_op = tf.group(inc, cond_op, train_op) else: enqueue_op = last_outqueue.enqueue({ 'mean_loss': mean_loss, 'acc': acc }) return enqueue_op
def dropped_inputs(): return rand_ops.dropout(inputs, seed=self.seed, rate=self.rate, noise_shape=self.noise_shape, name=self.name)