def testForward(self): with self.session(), test_util.force_cpu(): for dtype in [np.float16, np.float32, np.float64]: for trial in range(5): seed = 123 + trial labels, logits = self._generateInputs(dtype, seed=seed) result_a = nn_ops.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits) result_b = nn_ops.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits) self.assertAllEqual(result_a, result_b)
def testTrainingMomentum(self): with self.session() as sess: x = array_ops.placeholder(datatype, shape=[1, 224, 224, 4]) y_ = array_ops.placeholder(datatype, shape=[1, 1000]) with ipu.scopes.ipu_scope("/device:IPU:0"): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(y_))) train = momentum.MomentumOptimizer(0.01, 0.9).minimize(loss) report = tu.ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() data = np.zeros([1, 224, 224, 4]) labels = np.zeros([1, 1000]) sess.run(train, feed_dict={x: data, y_: labels}) report.parse_log() report.assert_total_tile_memory(38642237)
def testTraining(self): x = array_ops.placeholder(datatype, shape=[1, 224, 224, 4]) y_ = array_ops.placeholder(datatype, shape=[1, 1000]) with ipu_ops.ipu_scope("/device:IPU:0"): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(y_))) train = gradient_descent.GradientDescentOptimizer(0.01).minimize(loss) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() opts = utils.create_ipu_config(profiling=True) utils.configure_ipu_system(opts) sess = sl.Session() sess.run(variables.global_variables_initializer()) sess.run(report) data = np.zeros([1, 224, 224, 4]) labels = np.zeros([1, 1000]) sess.run(train, feed_dict={x: data, y_: labels}) out = sess.run(report) sess.close() evts = utils.extract_all_events(out) size = utils.get_memory_size_from_events(evts) self.assertTrue(size < 174000000)
def body(x, label): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(label))) return x, label, momentum.MomentumOptimizer( 0.01, 0.9).minimize(loss)
def init_graph(self): # 初始化喂入参数,placeholder名字要唯一,不能更改placeholder的任何信息 self.video_ids_ph = tf.placeholder(tf.int32, shape=[None, None], name='video_ids') self.search_id_ph = tf.placeholder(tf.int32, shape=[None], name='search_id') self.age_ph = tf.placeholder(tf.float32, shape=[None], name='age') self.gender_ph = tf.placeholder(tf.float32, shape=[None], name='gender') self.label_ph = tf.placeholder(tf.float32, shape=[None], name='label_ph') # 初始化视频embedding、搜索条件的embedding,concat两个embedding和age、gender video_embedding = tf.get_variable('video_embedding', shape=[self.video_total_num], dtype=tf.float32, initializer=tf.variance_scaling_initializer()) video_vecs = tf.nn.embedding_lookup(video_embedding, self.video_ids_ph) search_embedding = tf.get_variable(name='search_embedding', shape=[self.search_total_num], dtype=tf.float32, initializer=tf.variance_scaling_initializer()) search_vec = tf.nn.embedding_lookup(search_embedding, self.search_id_ph) input = tf.concat([tf.reshape(tf.reduce_mean(video_vecs, axis=1), shape=[-1, 1]), tf.reshape(search_vec, shape=[-1, 1]), tf.reshape(self.age_ph, shape=[-1, 1]), tf.reshape(self.gender_ph, shape=[-1, 1])], axis=1) # 经过多层深度训练,层数根据mAP确定 for i in range(self.depth): input = tf.layers.dense(inputs=input, units=self.units_list[i], kernel_regularizer=layers.l2_regularizer(0.001), activation=tf.nn.relu, name='fc{}'.format(i), trainable=self.is_training) input = tf.layers.batch_normalization(input, training=self.is_training, name='fc{}_bn'.format(i)) output = input # 初始化类别(就是每个视频的标签,对应论文中的百万级)的embedding对应的:weights和bias weights = tf.get_variable('soft_weight', shape=[self.class_distinct, 128], initializer=tf.variance_scaling_initializer()) biases = tf.get_variable('soft_bias', shape=[self.class_distinct], initializer=tf.variance_scaling_initializer()) if not self.is_training: # 计算预测值 self.logits_out = tf.matmul(output, tf.transpose(weights)) else: # label必须二维的,但是biases却是一维的 self.labels = tf.reshape(self.label_ph, shape=[-1, 1]) # 计算损失, num_true=1代表负采样有一个正例,one-hot值为1。 self.logits_out, self.labels_out = nn_impl._compute_sampled_logits(weights=weights, biases=biases, labels=self.labels, inputs=input, num_sampled=100, num_classes=self.class_distinct, num_true=1, sampled_values=None, remove_accidental_hits=True, partition_strategy="div", name="sampled_softmax_loss", seed=None) labels = array_ops.stop_gradient(self.labels_out, name="labels_stop_gradient") sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(labels=labels, logits=self.logits_out) self.loss = tf.reduce_mean(sampled_losses) # 获得梯度下降优化器 gradient_descent_optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) train_var = tf.trainable_variables() clip_gradients, _ = tf.clip_by_global_norm(tf.gradients(self.loss, train_var), 5) self.gradient_descent = gradient_descent_optimizer.apply_gradients(zip(clip_gradients, train_var), global_step=self.global_step)
def body(x, label): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(label))) opt = gradient_accumulation_optimizer.GradientAccumulationOptimizer( momentum.MomentumOptimizer(0.01, 0.9), 10) return x, label, opt.minimize(loss)
def body(x, label): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(y_))) return x, label, gradient_descent.GradientDescentOptimizer( 0.01).minimize(loss)
def gradients(seed=789): np.random.seed(seed) upstream_gradients = self._randomFloats( output_shape, dtype) with backprop.GradientTape(persistent=True) as tape: tape.watch(logits) op_output = nn_ops.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits) gradient_injector_output = op_output * upstream_gradients return tape.gradient(gradient_injector_output, logits)
def testMergedWeightDownload(self): x = array_ops.placeholder(datatype, shape=[16, 4]) y_ = array_ops.placeholder(datatype, shape=[16, 256]) with ipu_ops.ipu_scope("/device:IPU:0"): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(y_))) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() opts = utils.create_ipu_config(profiling=True) opts = utils.set_ipu_model_options(opts, True) opts = utils.auto_select_ipus(opts, 1) utils.configure_ipu_system(opts) sess = sl.Session() sess.run(variables.global_variables_initializer()) sess.run(report) data = np.zeros([16, 4]) labels = np.zeros([16, 256]) sess.run(loss, feed_dict={x: data, y_: labels}) out = sess.run(report) sess.close() evts = utils.extract_all_events(out) r = utils.extract_compile_reports(out) self.assertEqual(len(r), 1) j = json.loads(r[0][1]) # Find the switch switch_index = 0 for p in j['programs']: if p['type'] == 'Switch': break switch_index = switch_index + 1 # Find the first case - the download weights sequence download_weights_index = j['programs'][switch_index]['children'][0] # The download weights sequence should not have lots of entries (because the # copies will have been merged) self.assertTrue( len(j['programs'][download_weights_index]['children']) < 6) # Also check the overall size size = utils.get_memory_size_from_events(evts) self.assertTrue(size < 17600000)
def kd_loss(scores, masks): logits = tf.reshape(scores, [-1, 2]) # (?, 2) masks_foreground = tf.reshape(masks, [-1]) # foreground goes to value 1 labels = tf.stack([1 - masks_foreground, masks_foreground], axis=1) loss = softmax_cross_entropy_with_logits_v2( logits=logits, labels=tf.stop_gradient(labels), ) cost = tf.reduce_mean(loss) return cost
def my_model(x, y): x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv2", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv3", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01)) train = optim.minimize(cross_entropy) return [loss, train]
def testExceptionThrowing(self): with self.session(force_gpu=True): for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]: labels = constant_op.constant([[0.2, 0.4], [0.1, 0.2]], dtype=dtype) logits = constant_op.constant([[0.3, 0.5], [0.5, 0.6]], dtype=dtype) with self.assertRaisesRegex( errors_impl.UnimplementedError, "Deterministic GPU implementation of " + "SoftmaxCrossEntropyWithLogits not available."): result = nn_ops.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits) self.evaluate(result)
def testMarkOpsWithAutoshardingContext(self): with ipu.scopes.ipu_scope("/device:IPU:0"): with ipu.autoshard.ipu_autoshard(): x = array_ops.placeholder(dtypes.float32, shape=[1, 32, 32, 4]) y = array_ops.placeholder(dtypes.float32, shape=[1, 8]) inp = x with ops.name_scope('gradients'): x = layers.Conv2D(8, 3, padding='same', name="conv1", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv2", use_bias=False)(x) x = layers.Conv2D(8, 3, padding='same', name="conv3", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = sharded_optimizer.ShardedOptimizer( gd.GradientDescentOptimizer(0.01)) optim.minimize(loss) ipu.autoshard.automatic_sharding(2, inp, loss) to_autoshard = ops.get_default_graph().get_collection( ipu.sharding._IPU_AUTOSHARD) fwd_ops = [] bwd_ops = [] all_ops = ops.get_default_graph().get_operations() for o in all_ops: if o in to_autoshard: fwd_ops.append(o) else: bwd_ops.append(o) self.assertTrue(len(fwd_ops) > 10) self.assertTrue(len(bwd_ops) > 10) self.assertEqual(len([o for o in fwd_ops if o.type == 'Conv2D']), 3)
def testGradientLabelWithV2(self): with self.test_session(): l = constant_op.constant( [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], shape=[3, 4], dtype=dtypes.float64, name="l") f = constant_op.constant( [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[3, 4], dtype=dtypes.float64, name="f") x = nn_ops.softmax_cross_entropy_with_logits_v2( labels=l, logits=f, name="xent") err = gradient_checker.compute_gradient_error(l, [3, 4], x, [3]) self.assertLess(err, 5e-8)
def my_model(loss, x, y): with ops.device("/device:IPU:0"): inp = x lstm_cell = popnn_rnn.PopnnLSTM(256, dtype=dtypes.float32) x, _ = lstm_cell(x, training=True) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(0.01)) train = optim.minimize(cross_entropy) autoshard.automatic_sharding(2, inp, loss) return [loss, train]
def my_model(loss, x, y): with ops.device("/device:IPU:0"): inp = x x = layers.Conv2D( 8, 3, padding='same', name="conv1", use_bias=False)(x) x = layers.Conv2D( 8, 3, padding='same', name="conv2", use_bias=False)(x) x = layers.Conv2D( 8, 3, padding='same', name="conv3", use_bias=False)(x) x = math_ops.reduce_max(x, axis=[1, 2]) cross_entropy = nn.softmax_cross_entropy_with_logits_v2( logits=x, labels=array_ops.stop_gradient(y)) loss = math_ops.reduce_mean(cross_entropy) optim = so.ShardedOptimizer(gd.GradientDescentOptimizer(lr)) train = optim.minimize(cross_entropy) autoshard.automatic_sharding(2, inp, loss) return [loss, train]
def testMergedWeightDownload(self): with self.session() as sess: x = array_ops.placeholder(datatype, shape=[16, 4]) y_ = array_ops.placeholder(datatype, shape=[16, 256]) with ipu.scopes.ipu_scope("/device:IPU:0"): logits = inference(x) loss = math_ops.reduce_mean( nn_ops.softmax_cross_entropy_with_logits_v2( logits=logits, labels=array_ops.stop_gradient(y_))) report = ReportJSON(self, sess, compile_ipu_code=True, device_count_override=1) sess.run(variables.global_variables_initializer()) report.reset() data = np.zeros([16, 4]) labels = np.zeros([16, 256]) sess.run(loss, feed_dict={x: data, y_: labels}) report.parse_log() # Find the first case - the download weights sequence download_weights_index = report.get_first_program_of_type( 'Switch')['children'][0] self.assertLess( len(report.get_program(download_weights_index)['children']), 12, "The download weights sequence should not have lots of entries " "(because the copies will have been merged)") # Also check the overall size report.assert_total_tile_memory(8725954)
def compute_loss(): divided_prediction = divide(logits, temp) loss = reduce_mean( softmax_cross_entropy_with_logits_v2(labels=convert_to_tensor(y), logits=divided_prediction)) return loss