def test2DAreaMax(self):
     batch_size = 256
     feature_len = 100
     memory_height = 10
     heads = 2
     key_len = 6
     depth = 128
     max_area_height = 3
     max_area_width = 3
     queries = tf.random_uniform([batch_size, heads, key_len, depth],
                                 minval=-10.0,
                                 maxval=10.0)
     features = tf.random_uniform([batch_size, heads, feature_len, depth],
                                  minval=-10.0,
                                  maxval=10.0)
     target_values = tf.random_uniform([batch_size, heads, key_len, depth],
                                       minval=-0.2,
                                       maxval=0.2)
     keys = tf.layers.dense(features, units=depth)
     values = tf.layers.dense(features, units=depth)
     max_attention = area_attention.dot_product_area_attention(
         queries,
         keys,
         values,
         bias=None,
         area_key_mode="max",
         area_value_mode="max",
         name="max_key",
         max_area_width=max_area_width,
         max_area_height=max_area_height,
         memory_height=memory_height)
     max_gradients = tf.gradients(
         tf.reduce_mean(tf.pow(target_values - max_attention, 2)), features)
     with self.test_session() as session:
         session.run(tf.global_variables_initializer())
         result1, result2 = session.run([max_gradients, max_attention])
     self.assertFalse(np.any(np.logical_not(np.isfinite(result1))))
     self.assertFalse(np.any(np.logical_not(np.isfinite(result2))))
Exemple #2
0
    def __readImages(self,filename):
        image_string = tf.read_file(filename) #Gets a string tensor from a file
        decodedInput = tf.image.decode_image(image_string) #Decode a string tensor as image
        floatInput = tf.image.convert_image_dtype(decodedInput, dtype=tf.float32) #Transform image to float32
        assertion = tf.assert_equal(tf.shape(floatInput)[-1], 3, message="image does not have 3 channels")

        with tf.control_dependencies([assertion]):
            floatInput.set_shape([None, None, 3])
        gammadInput = floatInput
        #print("CAREFUL THE GAMMA IS NOT CORRECTED AUTOMATICALLY")
        #input = floatInput
        input = tf.pow(floatInput, 2.2) #correct for the gamma
        #If we want to log the inputs, we do it here
        if self.logInput:
            input = helpers.logTensor(input)
        #The preprocess function puts the vectors value between [-1; 1] from [0;1]
        input = helpers.preprocess(input)

        targets = tf.zeros(tf.shape(input)) # is here (None, None, 3)
        targets = tf.expand_dims(targets, axis = 0)
        targets = tf.tile(targets, (self.nbTargetsToRead, 1,1,1))

        return filename, input, targets, gammadInput
    def __init__(self, Y, logits, alpha=0.90, gamma=0.5):
        with tf.name_scope("Focal_Loss"):
            label = Y
            epsilon = 1e-10
            self.pred = tf.clip_by_value(tf.nn.sigmoid(logits), epsilon,
                                         1 - epsilon)
            ## cross-entropy
            #cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=label, logits=logits)
            #self.pred = tf.clip_by_value(
            #    tf.nn.softmax(logits), epsilon, 1-epsilon)
            log_pred = tf.log(self.pred)
            p_t = tf.reduce_sum(-tf.multiply(label, self.pred), axis=-1)
            cross_entropy = tf.reduce_sum(-tf.multiply(label, log_pred),
                                          axis=-1)

            #alpha_ = label * alpha * (1.-label) * (1.-alpha)
            _alpha = label[..., 1] * alpha + label[..., 0] * (1. - alpha)

            losses = tf.multiply(tf.pow(_alpha * (1. - p_t), gamma),
                                 cross_entropy)
            losses = tf.reduce_mean(losses, axis=[1, 2, 3])

            self.loss = tf.reduce_mean(losses)
Exemple #4
0
 def _get_cubic_root(self):
     """Get the cubic root."""
     # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
     # where x = sqrt(mu).
     # We substitute x, which is sqrt(mu), with x = y + 1.
     # It gives y^3 + py = q
     # where p = (D^2 h_min^2)/(2*C) and q = -p.
     # We use the Vieta's substitution to compute the root.
     # There is only one real solution y (which is in [0, 1] ).
     # http://mathworld.wolfram.com/VietasSubstitution.html
     assert_array = [
         tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [
             self._dist_to_opt_avg,
         ]),
         tf.Assert(tf.logical_not(tf.is_nan(self._h_min)), [
             self._h_min,
         ]),
         tf.Assert(tf.logical_not(tf.is_nan(self._grad_var)), [
             self._grad_var,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [
             self._dist_to_opt_avg,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._h_min)), [
             self._h_min,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._grad_var)), [
             self._grad_var,
         ])
     ]
     with tf.control_dependencies(assert_array):
         p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var
         w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
         w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0 / 3.0)
         y = w - p / 3.0 / w
         x = y + 1
     return x
    def _body(i, update, activation, center):
        """Body of the EM while loop."""
        del activation
        beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))
        # beta = final_beta
        # route: [outdim, height?, width?, batch, indim]
        if leaky:
            posterior = layers.leaky_routing(update, output_dim)
        else:
            posterior = tf.nn.softmax(update, dim=2)
        vote_conf = posterior * input_activation
        # masses: [batch, 1, outdim, 1, height, width]
        masses = tf.reduce_sum(vote_conf, axis=1, keep_dims=True) + 0.00001
        preactivate_unrolled = vote_conf * wx
        # center: [batch, 1, outdim, outatom, height, width]
        center = .9 * tf.reduce_sum(preactivate_unrolled,
                                    axis=1,
                                    keep_dims=True) / masses + .1 * center

        noise = (wx - center) * (wx - center)
        variance = min_var + tf.reduce_sum(
            vote_conf * noise, axis=1, keep_dims=True) / masses
        log_variance = tf.log(variance)
        p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True)
        log_2pi = tf.log(2 * math.pi)
        win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0))
        logit = beta * (win - activation_biases * 5000)
        activation_update = tf.minimum(
            0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
        # return activation, center
        log_det_sigma = tf.reduce_sum(log_variance, axis=3, keep_dims=True)
        sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
        exp_update = tf.reduce_sum(noise / (2 * variance),
                                   axis=3,
                                   keep_dims=True)
        prior_update = activation_update - sigma_update - exp_update
        return (prior_update, logit, center)
Exemple #6
0
                def metric_fn(per_example_loss, label_ids, logits,
                              is_real_example):
                    """Compute Matthew's correlations for STS-B."""
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    # https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
                    tp, tp_op = tf.metrics.true_positives(
                        predictions, label_ids, weights=is_real_example)
                    tn, tn_op = tf.metrics.true_negatives(
                        predictions, label_ids, weights=is_real_example)
                    fp, fp_op = tf.metrics.false_positives(
                        predictions, label_ids, weights=is_real_example)
                    fn, fn_op = tf.metrics.false_negatives(
                        predictions, label_ids, weights=is_real_example)

                    # Compute Matthew's correlation
                    mcc = tf.div_no_nan(
                        tp * tn - fp * fn,
                        tf.pow((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn),
                               0.5))

                    # Compute accuracy
                    accuracy = tf.metrics.accuracy(labels=label_ids,
                                                   predictions=predictions,
                                                   weights=is_real_example)

                    loss = tf.metrics.mean(values=per_example_loss,
                                           weights=is_real_example)

                    return {
                        "matthew_corr":
                        (mcc, tf.group(tp_op, tn_op, fp_op, fn_op)),
                        "eval_accuracy": accuracy,
                        "eval_loss": loss,
                    }
Exemple #7
0
 def fun_w(self, x, low, up):
     I1 = 0.110987
     x_list = tf.split(x, self.dim, 1)
     #**************************************************
     x_scale_list = []
     h_len = (up - low) / 2.0
     for i in range(self.dim):
         x_scale = (x_list[i] - low - h_len) / h_len
         x_scale_list.append(x_scale)
     #************************************************
     z_x_list = []
     for i in range(self.dim):
         supp_x = tf.greater(1 - tf.abs(x_scale_list[i]), 0)
         z_x = tf.where(supp_x,
                        tf.exp(1 / (tf.pow(x_scale_list[i], 2) - 1)) / I1,
                        tf.zeros_like(x_scale_list[i]))
         z_x_list.append(z_x)
     #***************************************************
     w_val = tf.constant(1.0)
     for i in range(self.dim):
         w_val = tf.multiply(w_val, z_x_list[i])
     dw = tf.gradients(w_val, x, unconnected_gradients='zero')[0]
     dw = tf.where(tf.is_nan(dw), tf.zeros_like(dw), dw)
     return (w_val, dw)
Exemple #8
0
  def _model_fn(input_fea, input_lab):
    """Creates a model, add summary, modes (train or eval), and hooks."""

    # input_fea and input_lab should be a list (laid_out_tensors).
    if not isinstance(input_fea, list):
      input_fea = [input_fea]
    if not isinstance(input_lab, list):
      input_lab = [input_lab]

    def _add_summary(lowering, train_or_eval, tf_loss, scalars, global_step):
      """Add all summaries."""
      for k in scalars.keys():
        if not isinstance(scalars[k], tf.Tensor):
          scalars[k] = tf.cast(
              lowering.export_to_tf_tensor(scalars[k]), tf.float32)

      def _host_loss_summary(global_step, tf_loss, **scalars):
        """Add summary.scalar in host side."""
        gs = tf.cast(global_step, tf.int64)
        sum_loss = contrib_summary.scalar(
            '{}_loss'.format(train_or_eval), tf_loss, step=gs)
        sum_ops = [sum_loss.op]
        for description, tf_metric in scalars.iteritems():
          sum_metric = contrib_summary.scalar(
              '{}_{}'.format(train_or_eval, description), tf_metric, step=gs)
          sum_ops.append(sum_metric)
        with tf.control_dependencies(sum_ops):
          return tf.identity(tf_loss)

      if FLAGS.use_tpu:
        # Cast the global step to tf.int32, since
        # outside_compilation does not support tf.int64.
        tf_loss = tpu.outside_compilation(
            _host_loss_summary,
            tf.cast(global_step, tf.int32),
            tf_loss,
            **scalars)
      else:
        tf_loss = _host_loss_summary(
            tf.cast(global_step, tf.int32),
            tf_loss,
            **scalars)

      return tf_loss

    global_step = tf.train.get_or_create_global_step()
    graph, mesh, mesh_impl = mesh_context.create_graph_mesh_and_mesh_impl()

    with mtf.utils.outside_all_rewrites():
      # Do not tpu_rewrite this part. Inside this unet, If you use Tensorflow,
      # instead of Mesh-Tensorflor, it will cause host to tpu send/rec.
      preds, loss, scalars, bn_update_ops = (
          unet.unet_with_spatial_partition(
              mesh, mesh_impl, train_or_eval, input_fea, input_lab))

    if train_or_eval == 'train':
      var_grads = mtf.gradients(
          [loss], [v.outputs[0] for v in graph.trainable_variables])

      lr = FLAGS.lr * tf.pow(
          FLAGS.lr_drop_rate,
          tf.floor(tf.cast(global_step, tf.float32) / FLAGS.lr_drop_steps))
      scalars['learning_rate'] = lr

      optimizer = mtf.optimize.AdafactorOptimizer(learning_rate=lr)
      update_ops = optimizer.apply_grads(var_grads, graph.trainable_variables)

      # This is where the actual tf graph got built.
      lowering = mtf.Lowering(graph, {mesh: mesh_impl})

      tf_update_ops = [lowering.lowered_operation(op) for op in update_ops]
      tf_update_ops.append(tf.assign_add(global_step, 1))
      tf_update_ops.extend(
          [lowering.lowered_operation(op) for op in bn_update_ops])

    else:  # train_or_eval == 'eval':
      preds = [mtf.anonymize(pred) for pred in preds]

      # This is where the actual tf graph got built.
      lowering = mtf.Lowering(graph, {mesh: mesh_impl})

      tf_preds = [tf.cast(
          lowering.export_to_tf_tensor(pred), tf.float32) for pred in preds]

    tf_loss = tf.cast(lowering.export_to_tf_tensor(loss), tf.float32)
    if FLAGS.write_summary:
      tf_loss = _add_summary(
          lowering, train_or_eval, tf_loss, scalars, global_step)
    master_to_slice_hook = mtf.MtfRestoreHook(lowering)

    if train_or_eval == 'train':
      with mtf.utils.outside_all_rewrites():
        saver = tf.train.Saver(tf.global_variables(),
                               save_relative_paths=True)
        tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
        saver_listener = mtf.MtfCheckpointSaverListener(lowering)
        slice_to_master_hook = tf.train.CheckpointSaverHook(
            FLAGS.checkpoint_dir,
            save_steps=FLAGS.save_checkpoints_steps,
            saver=saver, listeners=[saver_listener])
        captured_hooks.capture([master_to_slice_hook, slice_to_master_hook])
        return tf.group([tf_loss] + tf_update_ops)

    else:  # train_or_eval == 'eval':
      if FLAGS.use_tpu:
        tf_preds.extend([tf_loss, global_step])
        tf_preds_dtypes = [tf_pred.dtype for tf_pred in tf_preds]
        tf_preds_shapes = [tf_pred.shape for tf_pred in tf_preds]
        captured_hooks.capture([master_to_slice_hook, None])
        captured_output_dtypes_shapes.capture(
            [tf_preds_dtypes, tf_preds_shapes])
        return tpu_ops.outfeed_enqueue_tuple(tf_preds)

      else:
        tf_preds.extend([tf_loss, global_step])
        captured_hooks.capture([master_to_slice_hook, None])
        return tf_preds
Exemple #9
0
def loss_function(y_true, x_out):
    #    loss = tf.reduce_mean(tf.pow(tf.log(y_true+1) - x_out, 2))
    loss = tf.reduce_mean(tf.pow(tf.log(y_true + 1) - tf.log(x_out + 1), 2))
    #    loss = loss+tf.losses.get_regularization_loss()
    return loss
def add_distance_loss_to_center(labels, logits, groundtruth_coords):
    """Add distance loss function for ClickRegression."""
    weights = tf.to_int32(
        tf.not_equal(
            labels,
            model_input.dataset_descriptors[FLAGS.dataset].ignore_label))
    labels *= weights

    # Use GT box to get center if it exists. Less computation required.
    # Otherwise, calculate from label mask.
    if FLAGS.use_groundtruth_box:
        center_x = (groundtruth_coords['xmin'] +
                    groundtruth_coords['xmax']) / 2.0
        center_y = (groundtruth_coords['ymin'] +
                    groundtruth_coords['ymax']) / 2.0
        center = tf.stack([center_y, center_x], axis=1)
    else:
        # Make array of coordinates (each row contains three coordinates)
        ii, jj = tf.meshgrid(tf.range(FLAGS.image_size),
                             tf.range(FLAGS.image_size),
                             indexing='ij')
        coords = tf.stack([tf.reshape(ii, (-1, )),
                           tf.reshape(jj, (-1, ))],
                          axis=-1)
        coords = tf.cast(coords, tf.int32)

        # Rearrange input into one vector per volume
        volumes_flat = tf.reshape(
            labels, [-1, FLAGS.image_size * FLAGS.image_size * 1, 1])
        # Compute total mass for each volume. Add 0.00001 to prevent division by 0
        total_mass = tf.cast(tf.reduce_sum(volumes_flat, axis=1),
                             tf.float32) + ZERO_DIV_OFFSET
        # Compute centre of mass
        center = tf.cast(tf.reduce_sum(volumes_flat * coords, axis=1),
                         tf.float32) / total_mass
        center = center / FLAGS.image_size

    # Normalize coordinates by size of image
    logits = logits / FLAGS.image_size

    # Calculate loss based on the distance metric specified
    # Loss added later in model_fn by tf.losses.get_total_loss()
    if FLAGS.distance_metric == 'mse':
        tf.losses.mean_squared_error(center, logits)
    elif FLAGS.distance_metric in [
            'euclidean', 'euclidean_sqrt', 'euclidean_iter'
    ]:
        distance_to_center = tf.sqrt(
            tf.reduce_sum(tf.square(logits - center), axis=-1) +
            ZERO_DIV_OFFSET)
        if FLAGS.ratio_box_distance:
            distance_to_box = calc_distance_to_edge(groundtruth_coords, logits)
            box_distance_to_center = (tf.to_float(distance_to_center) -
                                      distance_to_box)
            loss = distance_to_center / (box_distance_to_center +
                                         ZERO_DIV_OFFSET)
        else:
            loss = distance_to_center

        if FLAGS.distance_metric == 'euclidean_sqrt':
            loss = tf.sqrt(loss)
        if FLAGS.distance_metric == 'euclidean_iter':
            iter_num = tf.to_float(tf.train.get_or_create_global_step())
            step = (iter_num // FLAGS.euclidean_step) + 1.0
            loss = tf.pow(loss, tf.to_float(1.0 / step))
        tf.losses.compute_weighted_loss(loss)
    def _body(i, posterior, activation, center, masses):
        """Body of the EM while loop."""
        del activation
        beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))
        # beta = final_beta
        # route: [outdim, height?, width?, batch, indim]
        vote_conf = posterior * input_activation
        # masses: [batch, 1, outdim, 1, height, width, 1, 1]
        masses = tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            vote_conf, axis=1, keep_dims=True),
                                             axis=-1,
                                             keep_dims=True),
                               axis=-2,
                               keep_dims=True) + 0.0000001
        preactivate_unrolled = vote_conf * wx
        # center: [batch, 1, outdim, outatom, height, width]
        center = .9 * tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            preactivate_unrolled, axis=1, keep_dims=True),
                                                  axis=-1,
                                                  keep_dims=True),
                                    axis=-2,
                                    keep_dims=True) / masses + .1 * center

        noise = (wx - center) * (wx - center)
        variance = min_var + tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            vote_conf * noise, axis=1, keep_dims=True),
                                                         axis=-1,
                                                         keep_dims=True),
                                           axis=-2,
                                           keep_dims=True) / masses
        log_variance = tf.log(variance)
        p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True)
        log_2pi = tf.log(2 * math.pi)
        win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0))
        logit = beta * (win - activation_biases * 5000)
        activation_update = tf.minimum(
            0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
        # return activation, center
        log_det_sigma = -1 * p_i
        sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
        exp_update = tf.reduce_sum(noise / (2 * variance),
                                   axis=3,
                                   keep_dims=True)
        prior_update = activation_update - sigma_update - exp_update
        max_prior_update = tf.reduce_max(tf.reduce_max(tf.reduce_max(
            tf.reduce_max(prior_update, axis=-1, keep_dims=True),
            axis=-2,
            keep_dims=True),
                                                       axis=-3,
                                                       keep_dims=True),
                                         axis=-4,
                                         keep_dims=True)
        prior_normal = tf.add(prior_update, -1 * max_prior_update)
        prior_exp = tf.exp(prior_normal)
        t_prior = tf.transpose(prior_exp, [0, 1, 2, 3, 4, 6, 5, 7])
        c_prior = tf.reshape(t_prior, [-1, n * k, n * k, 1])
        pad_prior = tf.pad(c_prior,
                           [[0, 0], [(k - 1) * (k - 1), (k - 1) * (k - 1)],
                            [(k - 1) * (k - 1),
                             (k - 1) * (k - 1)], [0, 0]], 'CONSTANT')
        patch_prior = tf.extract_image_patches(images=pad_prior,
                                               ksizes=[1, k, k, 1],
                                               strides=[1, k, k, 1],
                                               rates=[1, k - 1, k - 1, 1],
                                               padding='VALID')
        sum_prior = tf.reduce_sum(patch_prior, axis=-1, keep_dims=True)
        sum_prior_patch = tf.extract_image_patches(images=sum_prior,
                                                   ksizes=[1, k, k, 1],
                                                   strides=[1, 1, 1, 1],
                                                   rates=[1, 1, 1, 1],
                                                   padding='VALID')
        sum_prior_reshape = tf.reshape(
            sum_prior_patch,
            [-1, input_dim, output_dim, 1, n, n, k, k]) + 0.0000001
        posterior = prior_exp / sum_prior_reshape
        return (posterior, logit, center, masses)
Exemple #12
0
def l2norm_sqrd(a, b): return tf.reduce_sum(tf.pow(a-b, 2), 1)


def l2(a, b): return tf.reduce_mean(tf.pow(a-b, 2))
Exemple #13
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """See base class."""
        assignments = []
        for (grad, param) in grads_and_vars:
            if grad is None or param is None:
                continue

            param_name = self._get_variable_name(param.name)

            m = tf.get_variable(name=six.ensure_str(param_name) + "/m",
                                shape=param.shape.as_list(),
                                dtype=tf.float32,
                                trainable=False,
                                initializer=tf.zeros_initializer())

            # Note: shape is not passed here explicitly since tf.get_variable
            # complains when you do that while passing a Tensor as an initializer.
            prev_w_norm = tf.get_variable(
                name=six.ensure_str(param_name) + "/prev_w_norm",
                dtype=tf.float32,
                trainable=False,
                initializer=lambda w=param: tf.norm(w.initialized_value(),
                                                    ord=2))

            prev_eta = tf.get_variable(name=six.ensure_str(param_name) +
                                       "/prev_eta",
                                       shape=[],
                                       dtype=tf.float32,
                                       trainable=False,
                                       initializer=tf.zeros_initializer())
            prev_beta = tf.get_variable(name=six.ensure_str(param_name) +
                                        "/prev_beta",
                                        shape=[],
                                        dtype=tf.float32,
                                        trainable=False,
                                        initializer=tf.zeros_initializer())

            if self._do_use_weight_decay(param_name):
                grad += self.weight_decay_rate * param

            if self.use_adaptive:
                grad_squared_sum = tf.get_variable(
                    name=six.ensure_str(param_name) + "/grad_squared_sum",
                    shape=[],
                    dtype=tf.float32,
                    trainable=False,
                    initializer=tf.zeros_initializer())

                max_grad = tf.get_variable(name=six.ensure_str(param_name) +
                                           "/max_grad",
                                           shape=[],
                                           dtype=tf.float32,
                                           trainable=False,
                                           initializer=tf.zeros_initializer())

                iteration = tf.get_variable(name=six.ensure_str(param_name) +
                                            "/iteration",
                                            shape=[],
                                            dtype=tf.float32,
                                            trainable=False,
                                            initializer=tf.zeros_initializer())

                next_grad_squared_sum = grad_squared_sum + tf.norm(grad, 2)
                next_iteration = iteration + 1
                next_max_grad = tf.maximum(max_grad, tf.norm(grad, 2))
                assignments.extend([
                    grad_squared_sum.assign(next_grad_squared_sum),
                    iteration.assign(next_iteration),
                    max_grad.assign(next_max_grad)
                ])

                # Intuitively we should be able to leave g_sum=next_grad_squared_sum,
                # but current theory needs this extra t^1/4 max_grad term.
                g_sum = next_grad_squared_sum + tf.pow(next_iteration,
                                                       0.25) * next_max_grad

                eta = self.learning_rate / tf.pow(
                    tf.pow(next_iteration, 3.0) * tf.pow(g_sum, 2.0),
                    1.0 / 7.0)
                a = tf.minimum(
                    1.0, 1.0 / (next_iteration * tf.pow(eta, 2.0) * g_sum))
                beta = 1.0 - a
            else:
                eta = self.learning_rate
                beta = self.beta

            next_m = (tf.multiply(beta, m) + tf.multiply(1.0 - beta, grad))

            ratio = 1.0
            w_norm = tf.norm(param, ord=2)
            if self._do_layer_adaptation(param_name):
                g_norm = tf.norm(next_m, ord=2)
                ratio = self.gamma * tf.where(
                    tf.math.greater(w_norm, 0),
                    tf.where(tf.math.greater(g_norm, 0),
                             (w_norm / g_norm), 1.0), 1.0)
            normalized_m_with_lr = ratio * eta * next_m

            if self.use_igt:
                prev_x = self.compute_x(param_name, param, m, prev_w_norm,
                                        prev_eta, prev_beta)
                next_x = prev_x - normalized_m_with_lr
                next_param = next_x + tf.divide(
                    tf.multiply(beta, normalized_m_with_lr), beta - 1.0)
            else:
                next_param = param - normalized_m_with_lr
            assignments.extend([
                param.assign(next_param),
                m.assign(next_m),
                prev_w_norm.assign(w_norm),
                prev_eta.assign(eta),
                prev_beta.assign(beta)
            ])
        return tf.group(*assignments, name=name)
Exemple #14
0
def simple_linear_regression():
    raw_train_dataset = library.data_processing(train_data_path)
    X_d = pd.DataFrame(raw_train_dataset[[
        'TEMP'
    ]]).to_numpy()  # Change the variables here to train using different values
    Y_d = pd.DataFrame(raw_train_dataset[['PM2.5']]).to_numpy()

    X = tf.placeholder(tf.float32, [X_d.shape[0], X_d.shape[1]], name='x')
    Y = tf.placeholder(tf.float32, name='y')

    w = tf.Variable(np.random.normal(), [None, X_d.shape[1]], name='weight')
    b = tf.Variable(np.random.normal(), name='bias')

    y_pred = tf.add(tf.multiply(X, w), b)

    loss = tf.reduce_sum(tf.square(y_pred - Y)) / (2 * X_d.shape[0])
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    init = tf.global_variables_initializer()

    # Launch the graph
    with tf.Session() as sess:
        sess.run(init)
        count = 0
        # Fit all training data
        for epoch in range(training_epochs):
            for (x, y) in zip(X_d, Y_d):
                sess.run(optimizer, feed_dict={X: x, Y: y})

            # Display logs per epoch step
            if epoch % display_step == 0:
                print(
                    "Epoch:", '%04d' % (epoch + 1), "cost=",
                    "{:.9f}".format(sess.run(loss, feed_dict={
                        X: X_d,
                        Y: Y_d
                    })), "W=", sess.run(w), "b=", sess.run(b))

                fig = plt.figure(figsize=(10, 10), dpi=100)
                ax = fig.add_subplot(111)
                ax.set_ylim(0, 1)
                ax.plot(X_d, Y_d, 'ro', label='Original data')

                ax.plot(X_d,
                        sess.run(w) * X_d + sess.run(b),
                        label='Fitted line')
                ax.legend()
                plt.show()
                fig.savefig('plot_{:05d}.png'.format(count),
                            bbox_inches='tight',
                            dpi=100)
                count = count + 1
                plt.close(fig)
        print("Optimization Finished!")
        training_cost = sess.run(loss, feed_dict={X: X_d, Y: Y_d})
        t_w = sess.run(w)
        t_b = sess.run(b)
        print("Training cost=", training_cost, "W=", t_w, "b=", t_b, '\n')

        raw_test_dataset = library.data_processing(test_data_path)
        X_test_d = pd.DataFrame(raw_test_dataset[['TEMP']]).to_numpy(
        )  # Change the variables here to train using different values
        Y_test_d = pd.DataFrame(raw_test_dataset[['PM2.5']]).to_numpy()
        print("Testing... (L2 loss Comparison)")
        testing_cost = sess.run(tf.reduce_sum(tf.pow(y_pred - Y, 2)) /
                                (2 * X_test_d.shape[0]),
                                feed_dict={
                                    X: X_test_d,
                                    Y: Y_test_d
                                })
        print("Testing cost=", testing_cost)
        print("Absolute l2 loss difference:",
              abs(training_cost - testing_cost))
Exemple #15
0
def model_fn(features, labels, mode, params):
    """Construct a TPUEstimatorSpec for a model."""
    if mode != tf.estimator.ModeKeys.TRAIN:
        raise NotImplementedError(
            'Expected that mode == TRAIN, but got {:!r}'.format(mode))

    # Data was transposed from NHWC to HWCN on the host side. Transpose it back.
    # This transposition will be optimized away by the XLA compiler. It serves
    # as a hint to the compiler that it should expect the input data to come
    # in HWCN format rather than NHWC.
    train_features = tf.transpose(features['train'], [3, 0, 1, 2])
    validation_features = tf.transpose(features['validation'], [3, 0, 1, 2])

    if params['use_bfloat16'] == 'ontpu':
        train_features = tf.cast(train_features, tf.bfloat16)
        validation_features = tf.cast(validation_features, tf.bfloat16)

    global_step = tf.train.get_global_step()

    # Randomly sample a network architecture.
    with tf.variable_scope('rl_controller') as rl_scope:
        pass

    model_spec = mobile_classifier_factory.get_model_spec(params['ssd'])

    tf.io.gfile.makedirs(params['checkpoint_dir'])
    model_spec_filename = os.path.join(params['checkpoint_dir'],
                                       'model_spec.json')
    with tf.io.gfile.GFile(model_spec_filename, 'w') as handle:
        handle.write(schema_io.serialize(model_spec))

    increase_ops_prob = custom_layers.linear_decay(
        global_step, params['increase_ops_warmup_steps'])
    increase_filters_prob = custom_layers.linear_decay(
        global_step, params['increase_filters_warmup_steps'])
    model_spec, dist_info = controller.independent_sample(
        model_spec,
        increase_ops_probability=increase_ops_prob,
        increase_filters_probability=increase_filters_prob,
        name=rl_scope)

    if params['enable_cost_model']:
        cost_model_features = mobile_cost_model.coupled_tf_features(model_spec)
        estimated_cost = cost_model_lib.estimate_cost(cost_model_features,
                                                      params['ssd'])

    # We divide the regularization strength by 2 for backwards compatibility with
    # the deprecated tf.contrib.layers.l2_regularizer() function, which was used
    # in our published experiments.
    kernel_regularizer = tf.keras.regularizers.l2(
        params['model_weight_decay'] / 2)

    # Set up the basic TensorFlow training/inference graph.
    model = mobile_classifier_factory.get_model_for_search(
        model_spec, kernel_regularizer=kernel_regularizer)
    model.build(train_features.shape)

    with tf.name_scope('training'):
        model_logits, _ = model.apply(train_features, training=True)
        # Cast back to float32 (effectively only when using use_bfloat16 is true).
        model_logits = tf.cast(model_logits, tf.float32)

        model_empirical_loss = tf.losses.softmax_cross_entropy(
            onehot_labels=labels['train'],
            logits=model_logits,
            label_smoothing=0.1)
        model_regularization_loss = model.regularization_loss()
        model_loss = model_empirical_loss + model_regularization_loss

        # Set up the model weight training logic.
        model_learning_rate = custom_layers.cosine_decay_with_linear_warmup(
            peak_learning_rate=params['model_learning_rate'],
            global_step=global_step,
            max_global_step=params['max_global_step'],
            warmup_steps=params['model_warmup_steps'])

        model_optimizer = tf.tpu.CrossShardOptimizer(
            tf.train.RMSPropOptimizer(model_learning_rate,
                                      decay=0.9,
                                      momentum=params['model_momentum'],
                                      epsilon=1.0))

        model_vars = model.trainable_variables()
        model_update_ops = model.updates()
        with tf.control_dependencies(model_update_ops):
            grads_and_vars = model_optimizer.compute_gradients(
                model_loss, var_list=model_vars)
            if params['use_gradient_sync_barrier']:
                # Force all gradients to be computed before any are applied.
                grads_and_vars = _grads_and_vars_barrier(grads_and_vars)

            # NOTE: We do not pass `global_step` to apply_gradients(), so the global
            # step is not incremented by `model_optimizer`. The global_step will be
            # incremented later on, when we update the RL controller weights. If we
            # incremented it here too, we'd end up incrementing the global_step twice
            # at each training step.
            model_op = model_optimizer.apply_gradients(grads_and_vars)
            if params['use_gradient_sync_barrier']:
                # Finish computing gradients for the shared model weights before we
                # start on the RL update step.
                #
                # NOTE: The barrier above forces TensorFlow to finish computing grads
                # for all of the trainable variables before any of the grads can be
                # consumed. So while the call to with_data_dependencies() here only
                # explicitly depends on grads_and_vars[0][0], the call implicitly forces
                # TensorFlow to finish computing the gradients for *all* trainable
                # variables before computing the validation features.
                validation_features = layers.with_data_dependencies(
                    [grads_and_vars[0][0]], [validation_features])[0]

    with tf.name_scope('validation'):
        # Estimate the model accuracy on a batch of examples from the validation
        # set. Force this logic to run after the model optimization step.
        with tf.control_dependencies([model_op]):
            validation_logits, _ = model.apply(validation_features,
                                               training=False)

        # NOTE(b/130311965): An earlier version of this code cast validation_logits
        # from bfloat16 to float32 before applying an argmax when the --use_bfloat16
        # flag was true. As of cl/240923609, this caused XLA to compute incorrect
        # model accuracies. Please avoid casting from bfloat16 to bfloat32 before
        # taking the argmax.
        is_prediction_correct = tf.equal(
            tf.argmax(validation_logits, axis=1),
            tf.argmax(labels['validation'], axis=1))
        validation_accuracy = tf.reduce_mean(
            tf.cast(is_prediction_correct, tf.float32))

    # Estimate the reward for the current network architecture and update the
    # reward to incorporate the cost of the network architecture.
    if params['enable_cost_model']:
        rl_stats = search_space_utils.reward_for_single_cost_model(
            validation_accuracy,
            rl_reward_function=params['rl_reward_function'],
            estimated_cost=estimated_cost,
            rl_cost_model_target=params['rl_cost_model_target'],
            rl_cost_model_exponent=params['rl_cost_model_exponent'])
        rl_cost_ratio = rl_stats['rl_cost_ratio']
        rl_reward = rl_stats['rl_reward']
        rl_cost_adjustment = rl_stats['rl_cost_adjustment']
    else:
        rl_reward = validation_accuracy

    # Compute a baseline. We first take a cross-replica sum of the rewards
    # for all the TPU shards, then incorporate the result into an exponential
    # moving average. Within a single batch, each TPU shard will select a
    # different set of op masks from the RL controller. Each shard will basically
    # evaluate a different candidate architecture in our search space.

    # Count the number of TPU shards (cores) used for training.
    num_tpu_shards = tf.tpu.cross_replica_sum(
        tf.ones(shape=(), dtype=rl_reward.dtype))
    rl_step_baseline = tf.tpu.cross_replica_sum(rl_reward)
    rl_step_baseline = rl_step_baseline / num_tpu_shards

    rl_baseline = custom_layers.update_exponential_moving_average(
        rl_step_baseline, momentum=params['rl_baseline_momentum'])

    # Apply a REINFORCE update to the RL controller.
    log_prob = dist_info['sample_log_prob']
    rl_advantage = rl_reward - rl_baseline
    rl_empirical_loss = -tf.stop_gradient(rl_advantage) * log_prob

    # We set rl_entropy_loss proportional to (-entropy) so that minimizing the
    # loss will lead to an entropy that is as large as possible.
    rl_entropy = dist_info['entropy']
    rl_entropy_loss = -params['rl_entropy_regularization'] * rl_entropy

    # We use an RL learning rate of 0 for the first N epochs of training. See
    # Appendix A of FBNet. (https://arxiv.org/pdf/1812.03443.pdf). Although they
    # don't mention it explicitly, there are some indications that ProxylessNAS
    # (https://openreview.net/forum?id=HylVB3AqYm) might also be doing this.
    enable_rl_optimizer = tf.cast(
        tf.greater_equal(global_step, params['rl_delay_steps']), tf.float32)
    rl_learning_rate = params['rl_learning_rate'] * enable_rl_optimizer

    if params['use_exponential_rl_learning_rate_schedule']:
        #  rl_learning_rate_progress will be 0 when the RL controller starts
        #  learning and 1 when the search ends.
        rl_learning_rate_progress = tf.nn.relu(
            tf.div(
                tf.cast(global_step - params['rl_delay_steps'], tf.float32),
                max(1, params['max_global_step'] - params['rl_delay_steps'])))
        # exponentially increase the RL learning rate over time.
        rl_learning_rate_multiplier = tf.pow(10.0, rl_learning_rate_progress)
        rl_learning_rate = rl_learning_rate * rl_learning_rate_multiplier

    rl_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, rl_scope.name)
    with tf.control_dependencies(rl_update_ops):
        # In order to evaluate train_op, we must first evaluate validation_accuracy.
        # And to evaluate validation_accuracy, we must first evaluate model_op. So
        # running this op will perform a step of model training followed by
        # a step of RL controller training.
        if params['use_gradient_sync_barrier']:
            transform_grads_fn = _grads_and_vars_barrier
        else:
            transform_grads_fn = None

        train_op = tpu_optimizer_ops.apply_adam(
            rl_empirical_loss,
            regularization_loss=rl_entropy_loss,
            global_step=global_step,
            var_list=tf.trainable_variables(rl_scope.name),
            learning_rate=rl_learning_rate,
            beta1=0.0,
            beta2=0.999,
            epsilon=1e-8,
            transform_grads_fn=transform_grads_fn)

    # TensorBoard logging
    tensorboard_scalars = collections.OrderedDict([
        ('model/loss', model_loss),
        ('model/empirical_loss', model_empirical_loss),
        ('model/regularization_loss', model_regularization_loss),
        ('model/learning_rate', model_learning_rate),
        ('rlcontroller/empirical_loss', rl_empirical_loss),
        ('rlcontroller/entropy_loss', rl_entropy_loss),
        ('rlcontroller/validation_accuracy', validation_accuracy),
        ('rlcontroller/reward', rl_reward),
        ('rlcontroller/step_baseline', rl_step_baseline),
        ('rlcontroller/baseline', rl_baseline),
        ('rlcontroller/advantage', rl_advantage),
        ('rlcontroller/log_prob', log_prob),
    ])

    if params['enable_cost_model']:
        tensorboard_scalars['rlcontroller/estimated_cost'] = estimated_cost
        tensorboard_scalars['rlcontroller/cost_ratio'] = rl_cost_ratio
        tensorboard_scalars[
            'rlcontroller/cost_adjustment'] = rl_cost_adjustment
        tensorboard_scalars['rlcontroller/learning_rate'] = rl_learning_rate

    tensorboard_scalars['rlcontroller/increase_ops_prob'] = increase_ops_prob
    tensorboard_scalars['rlcontroller/increase_filters_prob'] = (
        increase_filters_prob)

    # Log the values of all the choices made by the RL controller.
    for name_i, logits_i in dist_info['logits_by_path'].items():
        assert len(logits_i.shape) == 1, logits_i
        for j in range(int(logits_i.shape[0])):
            key = 'rlpathlogits/{:s}/{:d}'.format(name_i, j)
            tensorboard_scalars[key] = logits_i[j]

    for name_i, logits_i in dist_info['logits_by_tag'].items():
        assert len(logits_i.shape) == 1, logits_i
        for j in range(int(logits_i.shape[0])):
            key = 'rltaglogits/{:s}/{:d}'.format(name_i, j)
            tensorboard_scalars[key] = logits_i[j]

    # NOTE: host_call only works on rank-1 tensors. There's also a fairly
    # large performance penalty if we try to pass too many distinct tensors
    # from the TPU to the host at once. We avoid these problems by (i) calling
    # tf.stack to merge all of the float32 scalar values into a single rank-1
    # tensor that can be sent to the host relatively cheaply and (ii) reshaping
    # the remaining values from scalars to rank-1 tensors.
    def host_call_fn(step, scalar_values):
        values = tf.unstack(scalar_values)
        with tf2.summary.create_file_writer(
                params['checkpoint_dir']).as_default():
            with tf2.summary.record_if(
                    tf.math.equal(step[0] % params['tpu_iterations_per_loop'],
                                  0)):
                for key, value in zip(list(tensorboard_scalars.keys()),
                                      values):
                    tf2.summary.scalar(key, value, step=step[0])
                return tf.summary.all_v2_summary_ops()

    host_call_values = tf.stack(list(tensorboard_scalars.values()))
    host_call = (host_call_fn,
                 [tf.reshape(global_step, [1]), host_call_values])

    # Construct the estimator specification.
    return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                             loss=model_loss,
                                             train_op=train_op,
                                             host_call=host_call)
Exemple #16
0
def order_loss(labels, logits, margin=0.2):
  label_act = tf.reduce_sum(labels * logits, axis=-1, keep_dims=True)
  negative_cost = (1 - labels) * tf.cast(
      tf.greater(logits, label_act - margin), tf.float32) * tf.pow(
          logits + margin - label_act, 2)
  return negative_cost
Exemple #17
0
weight1 = tf.Variable(tf.truncated_normal([9, 50], stddev=0.1))
bias1 = tf.Variable(tf.constant(0.1, shape=[50]))

weight2 = tf.Variable(tf.truncated_normal([50, 50], stddev=0.1))
bias2 = tf.Variable(tf.constant(0.1, shape=[50]))

weight3 = tf.Variable(tf.truncated_normal([50, 1], stddev=0.1))
bias3 = tf.Variable(tf.constant(0.1, shape=[1]))

sample_size = len(data)
#输出y
y = hidden_layer(x, weight1, bias1, weight2, bias2, weight3, bias3)

#损失函数
error_loss = tf.reduce_sum(tf.pow(y_ - y, 2)) / sample_size
tf.add_to_collection("losses", error_loss)

#加入正则化
#regularizer = tf.contrib.layers.l2_regularizer(0.01)
regularizer = tf.keras.regularizers.l2(0.001)
regularization = regularizer(weight1) + regularizer(weight2) + regularizer(
    weight3)
tf.add_to_collection("losses", regularization)

loss = tf.add_n(tf.get_collection("losses"))

#定义优化器
train_op = tf.train.AdamOptimizer(0.05).minimize(loss)
#train_op = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
Exemple #18
0
        def body(i, old_adv_x, old_loss, labels=labels):
            """Find example with max loss value amongst batch of perturbations."""
            deltas = tf.random_uniform(deltas_shape)

            # generate uniform samples from the l^p unit ball interior
            if self.ord == np.inf:
                deltas *= 2. * self.eps
                deltas -= self.eps
            elif self.ord == 1:
                # ref: https://mathoverflow.net/questions/9185/how-to-generate-random-points-in-ell-p-balls  pylint: disable=line-too-long
                exp = -tf.log(deltas)
                shift = -tf.log(tf.random_uniform(deltas_shape[:2]))
                norm = tf.reduce_sum(tf.abs(exp),
                                     range(2,
                                           len(deltas_shape) - 2))
                scale = tf.reshape(
                    shift + norm,
                    deltas_shape[:2] + [1] * (len(deltas_shape) - 2))
                deltas = exp / scale
            elif self.ord == 2:
                # ref: https://blogs.sas.com/content/iml/2016/04/06/generate-points-uniformly-in-ball.html  pylint: disable=line-too-long
                dims = tf.reduce_prod(deltas_shape[2:])
                deltas = tf.pow(deltas, 1. / dims)
                normal = tf.random_normal(deltas)
                normal /= tf.sqrt(tf.reduce_sum(normal**2,
                                                axis=range(
                                                    2,
                                                    len(deltas_shape) - 2)),
                                  keepdims=True)
                deltas *= normal
            else:
                raise NotImplementedError('Only L-inf, L1 and L2 norms are '
                                          'currently implemented.')

            adv_x = tf.expand_dims(x, 1) + deltas
            labels = tf.expand_dims(labels, 1)
            labels = tf.tile(labels, [1, self.num_samples, 1])

            if (self.clip_min is not None) and (self.clip_max is not None):
                adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

            adv_x_r = tf.reshape(adv_x, [-1] + deltas_shape[2:])
            preds = self.model.get_probs(adv_x_r)
            preds_shape = preds.shape.as_list()
            preds = tf.reshape(preds, deltas_shape[:2] + preds_shape[1:])

            if labels is None:
                # Using model predictions as ground truth to avoid label leaking
                preds_max = tf.reduce_max(preds, -1, keep_dims=True)
                labels = tf.to_float(tf.equal(preds, preds_max))
                labels = tf.stop_gradient(labels)
            labels = labels / tf.reduce_sum(labels, -1, keep_dims=True)

            # Compute loss
            loss = utils_tf.model_loss(labels, preds, mean=False)
            if self.y_target is not None:
                loss = -loss

            # find the maximum loss value
            input_idx = tf.one_hot(tf.argmax(loss, axis=1),
                                   self.num_samples,
                                   axis=1)
            loss = tf.reduce_sum(loss * input_idx, axis=1)
            input_idx = tf.reshape(
                input_idx, deltas_shape[:2] + [1] * (len(deltas_shape) - 2))
            adv_x = tf.reduce_sum(adv_x * input_idx, axis=1)

            condition = tf.greater(old_loss, loss)
            new_loss = tf.where(condition, old_loss, loss)
            new_adv_x = tf.where(condition, old_adv_x, adv_x)
            print(new_loss, new_adv_x)

            return i + 1, new_adv_x, new_loss
        def _body(i, posterior, center, wx, activation_biases, sigma_biases,
                  input_activation, tile_filter):
            """Body of EM while loop."""
            tf.logging.info('  Wx: %s', wx)

            beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))

            posterior = tf.Print(posterior, [
                layer_name, i, h, ih,
                tf.reduce_min(posterior),
                tf.reduce_max(posterior)
            ],
                                 message='posterior')
            # route: [outdim, height?, width?, batch, indim]
            with tf.name_scope('vote_conf'):
                vote_conf = posterior * input_activation
                vote_conf = tf.maximum(vote_conf, 0.0)

            # masses: [batch, 1, outdim, 1, height, width, 1, 1]
            with tf.name_scope('masses'):
                masses = tf.reduce_sum(vote_conf,
                                       axis=[1, -1, -2],
                                       keepdims=True,
                                       name='masses_calculation') + 0.0000001
            with tf.name_scope('preactivate_unrolled'):
                preactivate_unrolled = vote_conf * wx

            # center: [batch, 1, outdim, outatom, height, width]
            with tf.name_scope('center'):
                center = .9 * tf.reduce_sum(
                    preactivate_unrolled, axis=[1, -1, -2],
                    keepdims=True) / masses + .1 * center

            # Rematerialization to save GPU memory. (+22ms/-1.6GB)
            # @tf.contrib.layers.recompute_grad
            def compute_noise_and_variance(wx, center, vote_conf, masses):
                noise = tf.squared_difference(wx, center)
                variance = min_var + tf.reduce_sum(
                    vote_conf * noise,
                    axis=[1, -1, -2],
                    keepdims=True,
                    name='variance_calculation') / masses
                return noise, variance

            with tf.name_scope('compute_noise_and_variance'):
                noise, variance = compute_noise_and_variance(
                    wx, center, vote_conf, masses)

            with tf.name_scope('win'):
                log_variance = tf.log(variance)
                p_i = -1 * tf.reduce_sum(log_variance, axis=3, keepdims=True)
                log_2pi = tf.log(2 * math.pi)
                sigma_b = tf.log(sigma_biases * sigma_biases + min_var)
                win = masses * (p_i - num_out_atoms *
                                (sigma_b + log_2pi + 1.0))
            with tf.name_scope('logit'):
                logit = beta * (win - activation_biases * 50 * num_out_atoms)
            with tf.name_scope('activation_update'):
                activation_update = tf.minimum(
                    0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
            with tf.name_scope('sigma_update'):
                log_det_sigma = -1 * p_i
                sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
            with tf.name_scope('exp_update'):
                exp_update = tf.reduce_sum(noise / (2 * variance),
                                           axis=3,
                                           keep_dims=True)
            prior_update = tf.subtract(activation_update - sigma_update,
                                       exp_update,
                                       name='prior_update_sub')
            max_prior_update = tf.reduce_max(prior_update,
                                             axis=[2, 3, 4, 5, 6, 7],
                                             keepdims=True,
                                             name='max_prior_opdate')
            prior_normal = tf.add(prior_update, -1 * max_prior_update)
            prior_exp = tf.exp(prior_normal)
            prior_exp_out = tf.reduce_sum(prior_exp,
                                          axis=2,
                                          keepdims=True,
                                          name='prior_exp_out')
            prior_exp_reshape = tf.reshape(prior_exp_out, [-1, h, h, k * k],
                                           name='prior_exp_reshape')

            sum_prior = tf.nn.conv2d_transpose(prior_exp_reshape,
                                               tile_filter,
                                               output_shape=[b * c, ih, ih, 1],
                                               strides=[1, s, s, 1],
                                               padding='VALID')
            sum_prior = tf.maximum(1e-6, sum_prior)

            sum_prior_patch = utils.kernel_tile(sum_prior,
                                                k,
                                                s,
                                                1,
                                                name='sum_prior_patch')

            with utils.maybe_jit_scope(), tf.name_scope('posterior'):
                sum_prior_reshape = tf.reshape(
                    sum_prior_patch, [-1, input_dim, 1, 1, h, h, k, k])
                posterior = prior_exp / sum_prior_reshape

            return (i + 1, posterior, logit, center, masses)
Exemple #20
0
def gelu(x):
    """GeLU activation function."""
    return 0.5 * x * (
        1 + tf.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
Exemple #21
0
 def getDistance(self):
     dist = tf.pow(self.o1 - self.o2, 2)
     dist = tf.reduce_mean(dist, axis=1)
     dist = tf.sqrt(dist + 1e-6)
     return dist
Exemple #22
0
def gelu(x):
    return 0.5 * x * (
        1 + tf.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
#the Learning Rate and the number of Epochs.
learning_rate = 0.01
print("learning_rate", learning_rate)
training_epochs = 1000
print("training_epochs", training_epochs)

#Now, we will be building the Hypothesis, the Cost Function,
#and the Optimizer. We won’t be implementing the Gradient Descent Optimizer
#manually since it is built inside Tensorflow. After that, we will be initializing the Variables.

# Hypothesis
y_pred = tf.add(tf.multiply(X, W), b)
print("y_pred=", y_pred)

# Mean Squared Error Cost Function
cost = tf.reduce_sum(tf.pow(y_pred - Y, 2)) / (2 * n)
print("cost=", cost)

# Gradient Descent Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
print("optimizer=", optimizer)

# Global Variables Initializer
init = tf.global_variables_initializer()

#Now we will begin the training process inside a Tensorflow Session.

# Starting the Tensorflow Session
with tf.Session() as sess:

    # Initializing the Variables
Exemple #24
0
    def __call__(self,
                 box_outputs,
                 class_outputs,
                 anchor_boxes,
                 image_shape,
                 regression_weights=None,
                 bbox_per_class=True,
                 distill_class_outputs=None):
        """Generate final detections.

    Args:
      box_outputs: a tensor of shape of [batch_size, K, num_classes * 4]
        representing the class-specific box coordinates relative to anchors.
      class_outputs: a tensor of shape of [batch_size, K, num_classes]
        representing the class logits before applying score activation.
      anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
        corresponding anchor boxes w.r.t `box_outputs`.
      image_shape: a tensor of shape of [batch_size, 2] storing the image height
        and width w.r.t. the scaled image, i.e. the same image space as
        `box_outputs` and `anchor_boxes`.
      regression_weights: A list of four float numbers to scale coordinates.
      bbox_per_class: A `bool`. If True, perform per-class box regression.
      distill_class_outputs: a float tensor of shape of
        [batch_size, K, num_classes-1] representing the distilled class logits
        before applying score activation, without the background class.

    Returns:
      nmsed_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
        representing top detected boxes in [y1, x1, y2, x2].
      nmsed_scores: `float` Tensor of shape [batch_size, max_total_size]
        representing sorted confidence scores for detected boxes. The values are
        between [0, 1].
      nmsed_classes: `int` Tensor of shape [batch_size, max_total_size]
        representing classes for detected boxes.
      valid_detections: `int` Tensor of shape [batch_size] only the top
        `valid_detections` boxes are valid detections.
    """
        class_outputs_shape = tf.shape(class_outputs)
        num_locations = class_outputs_shape[1]
        num_classes = class_outputs_shape[-1]

        if self._discard_background:
            # Removes the background class before softmax.
            class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1])

        class_outputs = tf.nn.softmax(class_outputs, axis=-1)

        if not self._discard_background:
            # Removes the background class.
            class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1])

        if self._feat_distill == 'double_branch':
            distill_class_outputs = tf.nn.softmax(
                distill_class_outputs, axis=-1)  # [B, num_rois, num_classes]
            third_component = (
                1.0 - self._rare_mask
            ) * distill_class_outputs + self._rare_mask * class_outputs
            weighted_product = distill_class_outputs * class_outputs * third_component
            class_outputs = tf.pow(weighted_product, 1.0 / 3.0)

        if bbox_per_class:
            num_detections = num_locations * (num_classes - 1)
            box_outputs = tf.reshape(box_outputs,
                                     [-1, num_locations, num_classes, 4])
            box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
            anchor_boxes = tf.tile(tf.expand_dims(anchor_boxes, axis=2),
                                   [1, 1, num_classes - 1, 1])
            box_outputs = tf.reshape(box_outputs, [-1, num_detections, 4])
            anchor_boxes = tf.reshape(anchor_boxes, [-1, num_detections, 4])

        # Box decoding.
        if regression_weights is None:
            regression_weights = [10.0, 10.0, 5.0, 5.0]
        decoded_boxes = box_utils.decode_boxes(box_outputs,
                                               anchor_boxes,
                                               weights=regression_weights)

        # Box clipping
        decoded_boxes = box_utils.clip_boxes(decoded_boxes, image_shape)

        if bbox_per_class:
            decoded_boxes = tf.reshape(decoded_boxes,
                                       [-1, num_locations, num_classes - 1, 4])
        else:
            decoded_boxes = tf.expand_dims(decoded_boxes, axis=2)

        if not self._apply_nms:
            return {
                'raw_boxes': decoded_boxes,
                'raw_scores': class_outputs,
            }

        nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
            self._generate_detections(decoded_boxes, class_outputs))

        # Adds 1 to offset the background class which has index 0.
        nmsed_classes += 1

        return {
            'num_detections': valid_detections,
            'detection_boxes': nmsed_boxes,
            'detection_classes': nmsed_classes,
            'detection_scores': nmsed_scores,
        }
Exemple #25
0
def linear_regression_categorical():
    raw_train_dataset = library.data_processing(train_data_path)
    dummies = pd.get_dummies(pd.DataFrame(raw_train_dataset[['wd']]))
    X_d = dummies.to_numpy()
    Y_d = pd.DataFrame(raw_train_dataset[['PM2.5']]).to_numpy()

    X = tf.placeholder(tf.float32, name='x')
    Y = tf.placeholder(tf.float32, name='y')

    w = tf.Variable(np.random.normal(), name='weight')
    b = tf.Variable(np.random.normal(), name='bias')

    y_pred = tf.add(tf.multiply(X, w), b)

    loss = tf.reduce_sum(tf.square(y_pred - Y)) / (2 * X_d.shape[0])
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    init = tf.global_variables_initializer()

    # Launch the graph
    with tf.Session() as sess:
        sess.run(init)
        # Fit all training data
        for epoch in range(training_epochs):
            for (x, y) in zip(X_d, Y_d):
                x = x.reshape(1, X_d.shape[1])
                sess.run(optimizer, feed_dict={X: x, Y: y})

            # Display logs per epoch step
            if epoch % display_step == 0:
                print(
                    "Epoch:", '%04d' % (epoch + 1), "cost=",
                    "{:.9f}".format(sess.run(loss, feed_dict={
                        X: X_d,
                        Y: Y_d
                    })), "W=", sess.run(w), "b=", sess.run(b))

                fig = plt.figure(figsize=(10, 10), dpi=100)
                ax = raw_train_dataset.plot.scatter(x='wd', y='PM2.5')
                ax.set_ylim(0, 1)

                ax.plot(X_d,
                        sess.run(w) * X_d + sess.run(b),
                        label='Fitted line')
                ax.legend()
                plt.show()
                plt.close(fig)
        print("Optimization Finished!")
        training_cost = sess.run(loss, feed_dict={X: X_d, Y: Y_d})
        t_w = sess.run(w)
        t_b = sess.run(b)
        print("Training cost=", training_cost, "W=", t_w, "b=", t_b, '\n')

        raw_test_dataset = library.data_processing(test_data_path)
        X_test_d = pd.DataFrame(raw_test_dataset[['wd']]).to_numpy()
        dummies = pd.get_dummies(pd.DataFrame(raw_test_dataset[['wd']]))
        X_d = dummies.to_numpy()
        Y_test_d = pd.DataFrame(raw_test_dataset[['PM2.5']]).to_numpy()
        print("Testing... (L2 loss Comparison)")
        testing_cost = sess.run(tf.reduce_sum(tf.pow(y_pred - Y, 2)) /
                                (2 * X_test_d.shape[0]),
                                feed_dict={
                                    X: X_d,
                                    Y: Y_test_d
                                })
        print("Testing cost=", testing_cost)
        print("Absolute l2 loss difference:",
              abs(training_cost - testing_cost))
Exemple #26
0
def get_sampling_probability(hparams, is_training):
    """Returns the sampling probability as a tensor based on the hparams.

  Supports three sampling schedules (`hparams.sampling_schedule`):
    constant: `hparams.sampling_rate` is the sampling probability. Must be in
      the interval [0, 1].
    exponential: `hparams.sampling_rate` is the base of the decay exponential.
      Must be in the interval (0, 1). Larger values imply a slower increase in
      sampling.
    inverse_sigmoid: `hparams.sampling_rate` is in the interval [1, inf).
      Larger values imply a slower increase in sampling.

  A constant value of 0 is returned if `hparams.sampling_schedule` is undefined.

  If not training and a non-0 sampling schedule is defined, a constant value of
  1 is returned since this is assumed to be a test/eval job associated with a
  scheduled sampling trainer.

  Args:
    hparams: An HParams object containing model hyperparameters.
    is_training: Whether or not the model is being used for training.

  Raises:
    ValueError: On an invalid `sampling_schedule` or `sampling_rate` hparam.
  """
    if (not hasattr(hparams, 'sampling_schedule')
            or not hparams.sampling_schedule
            or (hparams.sampling_schedule == 'constant'
                and hparams.sampling_rate == 0)):
        return tf.constant(0.0)

    if not is_training:
        # This is likely an eval/test job associated with a training job using
        # scheduled sampling.
        tf.logging.warning(
            'Setting non-training sampling schedule from %s:%f to constant:1.0.',
            hparams.sampling_schedule, hparams.sampling_rate)
        hparams.sampling_schedule = 'constant'
        hparams.sampling_rate = 1.0

    schedule = hparams.sampling_schedule
    rate = hparams.sampling_rate
    step = tf.to_float(tf.train.get_global_step())

    if schedule == 'constant':
        if not 0 <= rate <= 1:
            raise ValueError(
                '`constant` sampling rate must be in the interval [0, 1]. Got %f.'
                % rate)
        sampling_probability = tf.to_float(rate)
    elif schedule == 'inverse_sigmoid':
        if rate < 1:
            raise ValueError(
                '`inverse_sigmoid` sampling rate must be at least 1. Got %f.' %
                rate)
        k = tf.to_float(rate)
        sampling_probability = 1.0 - k / (k + tf.exp(step / k))
    elif schedule == 'exponential':
        if not 0 < rate < 1:
            raise ValueError(
                '`exponential` sampling rate must be in the interval (0, 1). Got %f.'
                % hparams.sampling_rate)
        k = tf.to_float(rate)
        sampling_probability = 1.0 - tf.pow(k, step)
    else:
        raise ValueError('Invalid `sampling_schedule`: %s' % schedule)
    tf.summary.scalar('sampling_probability', sampling_probability)
    return sampling_probability
Exemple #27
0
# Task 1 - 1
# TIES 4911
# Toni Pikkarainen
# 14.1.2020
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# In[2]:

# Constants
# Lecture02, slides 11-13
a = tf.constant(5)
b = tf.constant(2)
add_op = tf.add(a, b)
mul_op = tf.multiply(b, add_op)
pow_op = tf.pow(mul_op, b)

# In[3]:

# Variables
# Lecture02, slides 15-17
var1 = tf.Variable(2, name="scalar1")
var2 = tf.Variable(3, name="scalar2")
assign_op = var2.assign(10)

# In[4]:

# Placeholders
# Lecture02, slides 19-20

a = tf.placeholder(tf.float32, shape=[3])
Exemple #28
0
    # Hidden layer in the decoder with sigmoid activation 2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
                                   biases['decoder_b2']))
    return layer_2


# Autoencoder model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

# Prediction
y_pred = decoder_op
y_true = X

# Loss and optimizer, minimize the squared error
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)

# Initialize the variables
init = tf.global_variables_initializer()

# Start Training
# Start a a tensorflow session
with tf.Session() as sess:
    sess.run(init)

    # Training
    for i in range(1, num_steps + 1):
        # Prepare Data
        # Get the next batch of MNIST data only images, not labels
        batch_x, _ = mnist.train.next_batch(batch_size)
Exemple #29
0
def l2(a, b): return tf.reduce_mean(tf.pow(a-b, 2))


def show_graph_operations():
 def focal(self, target, actual, alpha=1, gamma=2):
     focal_loss = alpha * tf.pow(tf.abs(target - actual), gamma)
     return focal_loss