Exemple #1
0
 def testGradientAtZero(self):
   with self.test_session():
     logits = constant_op.constant([0.0, 0.0], dtype=dtypes.float64)
     targets = constant_op.constant([0.0, 1.0], dtype=dtypes.float64)
     loss = nn_impl.sigmoid_cross_entropy_with_logits(logits, targets)
     grads = gradients_impl.gradients(loss, logits)[0].eval()
   self.assertAllClose(grads, [0.5, -0.5])
Exemple #2
0
 def testGradientAtZero(self):
     with self.test_session():
         logits = constant_op.constant([0.0, 0.0], dtype=dtypes.float64)
         targets = constant_op.constant([0.0, 1.0], dtype=dtypes.float64)
         loss = nn_impl.sigmoid_cross_entropy_with_logits(logits, targets)
         grads = gradients_impl.gradients(loss, logits)[0].eval()
     self.assertAllClose(grads, [0.5, -0.5])
Exemple #3
0
 def testConstructionNamed(self):
     with self.test_session():
         logits, targets, _ = self._Inputs()
         loss = nn_impl.sigmoid_cross_entropy_with_logits(logits,
                                                          targets,
                                                          name="mylogistic")
     self.assertEqual("mylogistic", loss.op.name)
Exemple #4
0
def binary_cross_entropy(labels, logits, name=None):
    """ Computes the binary cross entropy between the labels and logits
        This is a safe version that adds epsilon to logits to prevent log(0)
    """
    return nn_impl.sigmoid_cross_entropy_with_logits(logits=ensure_finite(logits),
                                                     labels=labels,
                                                     name=name)
Exemple #5
0
 def testGradient(self):
   sizes = [4, 2]
   with self.test_session():
     logits, targets, _ = self._Inputs(sizes=sizes)
     loss = nn_impl.sigmoid_cross_entropy_with_logits(logits, targets)
     err = gradient_checker.compute_gradient_error(logits, sizes, loss, sizes)
   print("logistic loss gradient err = ", err)
   self.assertLess(err, 1e-7)
Exemple #6
0
 def testLogisticOutputMultiDim(self):
   for use_gpu in [True, False]:
     for dtype in [dtypes.float32, dtypes.float16]:
       with self.test_session(use_gpu=use_gpu):
         logits, targets, losses = self._Inputs(dtype=dtype, sizes=[2, 2, 2])
         loss = nn_impl.sigmoid_cross_entropy_with_logits(logits, targets)
         np_loss = np.array(losses).astype(np.float32)
         tf_loss = loss.eval()
       self.assertAllClose(np_loss, tf_loss, atol=0.001)
 def testGradient(self):
   sizes = [4, 2]
   with self.cached_session():
     logits, targets, _ = self._Inputs(sizes=sizes)
     loss = nn_impl.sigmoid_cross_entropy_with_logits(
         labels=targets, logits=logits)
     err = gradient_checker.compute_gradient_error(logits, sizes, loss, sizes)
   print("logistic loss gradient err = ", err)
   self.assertLess(err, 1e-7)
Exemple #8
0
 def testLogisticOutput(self):
   for use_gpu in [True, False]:
     for dtype in [dtypes.float32, dtypes.float16]:
       with self.cached_session(use_gpu=use_gpu):
         logits, targets, losses = self._Inputs(dtype=dtype)
         loss = nn_impl.sigmoid_cross_entropy_with_logits(
             labels=targets, logits=logits)
         np_loss = np.array(losses).astype(np.float32)
         tf_loss = self.evaluate(loss)
       self.assertAllClose(np_loss, tf_loss, atol=0.001)
 def testLogisticOutputMultiDim(self):
   for use_gpu in [True, False]:
     for dtype in [dtypes.float32, dtypes.float16]:
       with self.test_session(use_gpu=use_gpu):
         logits, targets, losses = self._Inputs(dtype=dtype, sizes=[2, 2, 2])
         loss = nn_impl.sigmoid_cross_entropy_with_logits(
             labels=targets, logits=logits)
         np_loss = np.array(losses).astype(np.float32)
         tf_loss = loss.eval()
       self.assertAllClose(np_loss, tf_loss, atol=0.001)
Exemple #10
0
def elementwise_loss(labels, logits, mask):
    return sigmoid_cross_entropy_with_logits(labels=labels,
                                             logits=logits) * mask
Exemple #11
0
 def testConstructionNamed(self):
   with self.cached_session():
     logits, targets, _ = self._Inputs()
     loss = nn_impl.sigmoid_cross_entropy_with_logits(
         labels=targets, logits=logits, name="mylogistic")
   self.assertEqual("mylogistic", loss.op.name)
 def testShapeError(self):
   with self.assertRaisesRegexp(ValueError, "must have the same shape"):
     nn_impl.sigmoid_cross_entropy_with_logits(labels=[1, 2, 3],
                                               logits=[[2, 1]])
Exemple #13
0
def build_distributed_graph():
    batch_size = 4
    shape_0 = [batch_size, 5]
    shape_1 = [batch_size, 6]
    maxval = int(0x7FFF)

    server0 = server_lib.Server.create_local_server()
    server1 = server_lib.Server.create_local_server()
    cluster_def = cluster_pb2.ClusterDef()
    job = cluster_def.job.add()
    job.name = 'worker'
    job.tasks[0] = server0.target[len('grpc://'):]
    job.tasks[1] = server1.target[len('grpc://'):]

    config = config_pb2.ConfigProto(
        cluster_def=cluster_def,
        experimental=config_pb2.ConfigProto.Experimental(
            share_session_state_in_clusterspec_propagation=True, ),
    )
    config.allow_soft_placement = False

    with ops.device('/job:worker/task:0'):
        feat_0 = random_ops.random_uniform(shape_0,
                                           maxval=maxval,
                                           dtype=dtypes.int64)
        feat_0 = array_ops.reshape(feat_0, (-1, ))

        feat_1 = random_ops.random_uniform(shape_1,
                                           maxval=maxval,
                                           dtype=dtypes.int64)
        feat_1 = array_ops.reshape(feat_1, (-1, ))

        var_0 = deo.get_variable(
            name='sp_var_0',
            devices=[
                '/job:worker/task:1',
            ],
            initializer=init_ops.random_normal_initializer(0, 0.005),
        )
        var_1 = deo.get_variable(
            name='sp_var_1',
            devices=[
                '/job:worker/task:1',
            ],
            initializer=init_ops.random_normal_initializer(0, 0.005),
        )
        var_list = [var_0, var_1]

        _, tw_0 = deo.embedding_lookup(
            params=var_0,
            ids=feat_0,
            name='sp_emb_0',
            return_trainable=True,
        )
        _, tw_1 = deo.embedding_lookup(
            params=var_1,
            ids=feat_1,
            name='sp_emb_1',
            return_trainable=True,
        )

        collapse_0 = array_ops.reshape(tw_0, (batch_size, -1))
        collapse_1 = array_ops.reshape(tw_1, (batch_size, -1))

        logits_0 = math_ops.reduce_sum(collapse_0, axis=1)
        logits_1 = math_ops.reduce_sum(collapse_1, axis=1)

        logits = math_ops.add(logits_0, logits_1)
        labels = array_ops.zeros((batch_size, ), dtype=dtypes.float32)

        loss = math_ops.reduce_mean(
            nn_impl.sigmoid_cross_entropy_with_logits(
                logits=logits,
                labels=labels,
            ))
        optimizers = get_multiple_optimizers()

        return server0, server1, config, var_list, optimizers, loss
Exemple #14
0
    def common_run_context(self, var_list, opt_list, name):
        save_dir = os.path.join(self.get_temp_dir(), 'save_restore')
        save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), 'restrict')

        batch_size = 2
        sample_length = 3
        emb_domain_list = list()
        tws = list()

        for _v in var_list:
            ids = random_ops.random_uniform((batch_size, sample_length),
                                            maxval=1000000,
                                            dtype=_v.key_dtype)
            ids = array_ops.reshape(ids, (-1, ))

            _, tw = deo.embedding_lookup(_v, ids, return_trainable=True)
            tws.append(tw)
            _collapse = array_ops.reshape(tw, (batch_size, -1))
            _logits = math_ops.reduce_sum(_collapse, axis=1)
            _logits = math_ops.cast(_logits, dtypes.float32)
            emb_domain_list.append(_logits)
        logits = math_ops.add_n(emb_domain_list)

        labels = array_ops.zeros((batch_size, ), dtype=dtypes.float32)
        loss = math_ops.reduce_mean(
            nn_impl.sigmoid_cross_entropy_with_logits(
                logits=logits,
                labels=labels,
            ))

        _train_ops = list()
        for _opt in opt_list:
            _train_ops.append(_opt.minimize(loss))
        train_op = control_flow_ops.group(_train_ops)

        restrictor = dvr.VariableRestrictor(var_list=var_list,
                                            optimizer_list=opt_list)

        policies = list(itertools.chain(*restrictor.policy_group.values()))
        tstp_vars = [policy.tstp_var for policy in policies]
        slot_vars = list()
        for tw in tws:
            for opt in opt_list:
                slot_vars += select_slot_vars(tw, opt)

        update_op = restrictor.update()

        threshold = int(batch_size * sample_length * 1.5)
        factor = 1.2
        restrict_op = restrictor.restrict(threshold=threshold, factor=factor)
        saver = saver_lib.Saver()

        with self.session(config=default_config,
                          use_gpu=test_util.is_gpu_available()) as sess:
            self.evaluate(variables.global_variables_initializer())
            n, MAX_ITER = 0, 1000
            while n < MAX_ITER:
                sess.run([train_op, update_op])
                if all(
                        self.evaluate(var.size()) > threshold * factor
                        for var in var_list):
                    break

            rt_save_path = saver.save(sess, save_path)
            self.assertAllEqual(rt_save_path, save_path)
            sess.close()

        with self.session(config=default_config,
                          use_gpu=test_util.is_gpu_available()) as sess:
            self.evaluate(variables.global_variables_initializer())
            saver.restore(sess, save_path)
            s1 = self.evaluate([var.size() for var in var_list])
            s2 = self.evaluate([tv.size() for tv in tstp_vars])
            s3 = self.evaluate([sv.size() for sv in slot_vars])

            self.assertAllGreater(s1, threshold * factor)
            self.assertAllGreater(s2, threshold * factor)
            if s3:
                self.assertAllGreater(s3, threshold * factor)

            saver.save(sess, save_path)

            sess.run(restrict_op)
            s1 = self.evaluate([var.size() for var in var_list])
            s2 = self.evaluate([tv.size() for tv in tstp_vars])
            s3 = self.evaluate([sv.size() for sv in slot_vars])

            self.assertAllLess(s1, threshold * factor + 1)
            self.assertAllLess(s2, threshold * factor + 1)
            if s3:
                self.assertAllLess(s3, threshold * factor + 1)
            sess.close()
Exemple #15
0
    def common_run_context(self, var_list, opt_list, name):
        batch_size = 2
        sample_length = 3
        emb_domain_list = list()
        tws = list()

        cluster = ps_worker_cluster(ps_num=2)
        ps_servers, worker_servers, cluster_def = cluster

        config = config_pb2.ConfigProto(
            cluster_def=cluster_def,
            experimental=config_pb2.ConfigProto.Experimental(
                share_session_state_in_clusterspec_propagation=True, ),
            allow_soft_placement=False,
            inter_op_parallelism_threads=2,
            intra_op_parallelism_threads=2,
            gpu_options=config_pb2.GPUOptions(allow_growth=True),
        )

        dev_placement = device_setter.replica_device_setter(
            ps_tasks=2,
            ps_device='/job:ps',
            worker_device='/job:worker',
            cluster=cluster_def,
        )

        with ops.device(dev_placement):
            shared_var_0 = deo.get_variable('distributed_sp_var_0',
                                            initializer=0.0,
                                            devices=['/job:worker/task:0'],
                                            dim=8)
            shared_var_1 = deo.get_variable('distributed_sp_var_1',
                                            initializer=0.0,
                                            devices=['/job:worker/task:0'],
                                            dim=4)
            opt_list = get_multiple_optimizers()

            distributed_var_list = [shared_var_0, shared_var_1]
            for _v in distributed_var_list:
                ids = random_ops.random_uniform((batch_size, sample_length),
                                                maxval=1000000,
                                                dtype=_v.key_dtype)
                ids = array_ops.reshape(ids, (-1, ))

                _, tw = deo.embedding_lookup(_v, ids, return_trainable=True)
                tws.append(tw)
                _collapse = array_ops.reshape(tw, (batch_size, -1))
                _logits = math_ops.reduce_sum(_collapse, axis=1)
                _logits = math_ops.cast(_logits, dtypes.float32)
                emb_domain_list.append(_logits)
            logits = math_ops.add_n(emb_domain_list)

            labels = array_ops.zeros((batch_size, ), dtype=dtypes.float32)
            loss = math_ops.reduce_mean(
                nn_impl.sigmoid_cross_entropy_with_logits(
                    logits=logits,
                    labels=labels,
                ))

            _train_ops = list()
            for _opt in opt_list:
                _train_ops.append(_opt.minimize(loss))
            train_op = control_flow_ops.group(_train_ops)

            restrictor = dvr.VariableRestrictor(var_list=distributed_var_list,
                                                optimizer_list=opt_list)
            update_op = restrictor.update()
            threshold = int(batch_size * sample_length * 1.5)
            factor = 1.2
            restrict_op = restrictor.restrict(threshold=threshold,
                                              factor=factor)

        policies = list(itertools.chain(*restrictor.policy_group.values()))
        tstp_vars = [policy.tstp_var for policy in policies]
        slot_vars = list()
        for tw in tws:
            for opt in opt_list:
                slot_vars += select_slot_vars(tw, opt)

        with session.Session(worker_servers[0].target, config=config) as sess:
            sess.run(variables.global_variables_initializer())
            n, MAX_ITER = 0, 1000
            while n < MAX_ITER:
                sess.run([train_op, update_op])
                if all(
                        sess.run(var.size()) > threshold * factor
                        for var in distributed_var_list):
                    break

            s1 = sess.run([var.size() for var in distributed_var_list])
            s2 = sess.run([tv.size() for tv in tstp_vars])
            s3 = sess.run([sv.size() for sv in slot_vars])

            self.assertAllGreater(s1, threshold * factor)
            self.assertAllGreater(s2, threshold * factor)
            if s3:
                self.assertAllGreater(s3, threshold * factor)

            sess.run(restrict_op)
            s1 = sess.run([var.size() for var in distributed_var_list])
            s2 = sess.run([tv.size() for tv in tstp_vars])
            s3 = sess.run([sv.size() for sv in slot_vars])

            self.assertAllLess(s1, threshold * factor + 1)
            self.assertAllLess(s2, threshold * factor + 1)
            if s3:
                self.assertAllLess(s3, threshold * factor + 1)
            sess.close()
Exemple #16
0
def TFNCELoss(X, target_word, L):
    tf.compat.v1.disable_eager_execution()
    in_embed = tf.compat.v1.placeholder(tf.float32, shape=X.shape)
    in_bias = tf.compat.v1.placeholder(tf.float32, shape=L.b.flatten().shape)
    in_weights = tf.compat.v1.placeholder(tf.float32,
                                          shape=L.W.transpose().shape)
    in_target_word = tf.compat.v1.placeholder(tf.int64)
    in_neg_samples = tf.compat.v1.placeholder(tf.int32)
    in_target_prob = tf.compat.v1.placeholder(tf.float32)
    in_neg_samp_prob = tf.compat.v1.placeholder(tf.float32)

    feed = {
        in_embed: X,
        in_weights: L.W.transpose(),
        in_target_word: target_word,
        in_bias: L.b.flatten(),
        in_neg_samples: L.derived_variables["noise_samples"][0],
        in_target_prob: L.derived_variables["noise_samples"][1],
        in_neg_samp_prob: L.derived_variables["noise_samples"][2],
    }

    nce_unreduced = tf.nn.nce_loss(
        weights=in_weights,
        biases=in_bias,
        labels=in_target_word,
        inputs=in_embed,
        sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob),
        num_sampled=L.num_negative_samples,
        num_classes=L.n_classes,
    )

    loss = tf.reduce_sum(nce_unreduced)
    dLdW = tf.gradients(loss, [in_weights])[0]
    dLdb = tf.gradients(loss, [in_bias])[0]
    dLdX = tf.gradients(loss, [in_embed])[0]

    sampled_logits, sampled_labels = _compute_sampled_logits(
        weights=in_weights,
        biases=in_bias,
        labels=in_target_word,
        inputs=in_embed,
        sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob),
        num_sampled=L.num_negative_samples,
        num_classes=L.n_classes,
        num_true=1,
        subtract_log_q=True,
    )

    sampled_losses = sigmoid_cross_entropy_with_logits(labels=sampled_labels,
                                                       logits=sampled_logits)

    with tf.compat.v1.Session() as session:
        session.run(tf.compat.v1.global_variables_initializer())
        (
            _final_loss,
            _nce_unreduced,
            _dLdW,
            _dLdb,
            _dLdX,
            _sampled_logits,
            _sampled_labels,
            _sampled_losses,
        ) = session.run(
            [
                loss,
                nce_unreduced,
                dLdW,
                dLdb,
                dLdX,
                sampled_logits,
                sampled_labels,
                sampled_losses,
            ],
            feed_dict=feed,
        )
    tf.compat.v1.reset_default_graph()

    return {
        "final_loss": _final_loss,
        "nce_unreduced": _nce_unreduced,
        "dLdW": _dLdW,
        "dLdb": _dLdb,
        "dLdX": _dLdX,
        "out_logits": _sampled_logits,
        "out_labels": _sampled_labels,
        "sampled_loss": _sampled_losses,
    }
Exemple #17
0
 def cost_function(labels, logits, num_classes):
     sampled_losses = sigmoid_cross_entropy_with_logits(
         labels=labels, logits=logits, name="sampled_losses")
     return _sum_rows(sampled_losses)