def get_box3d_corners_helper(centers, headings, sizes):
    """ TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) """
    #print '-----', centers
    N = centers.get_shape()[0].value
    l = tf.slice(sizes, [0,0], [-1,1]) # (N,1)
    w = tf.slice(sizes, [0,1], [-1,1]) # (N,1)
    h = tf.slice(sizes, [0,2], [-1,1]) # (N,1)
    #print l,w,h
    x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8)
    y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8)
    z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8)
    corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8)
    #print x_corners, y_corners, z_corners
    c = tf.cos(headings)
    s = tf.sin(headings)
    ones = tf.ones([N], dtype=tf.float32)
    zeros = tf.zeros([N], dtype=tf.float32)
    row1 = tf.stack([c,zeros,s], axis=1) # (N,3)
    row2 = tf.stack([zeros,ones,zeros], axis=1)
    row3 = tf.stack([-s,zeros,c], axis=1)
    R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3)
    #print row1, row2, row3, R, N
    corners_3d = tf.matmul(R, corners) # (N,3,8)
    corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8)
    corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3)
    return corners_3d
Exemple #2
0
def get_log_rhos(target_action_log_probs, behaviour_action_log_probs):
    """With the selected log_probs for multi-discrete actions of behaviour
    and target policies we compute the log_rhos for calculating the vtrace."""
    t = tf.stack(target_action_log_probs)
    b = tf.stack(behaviour_action_log_probs)
    log_rhos = tf.reduce_sum(t - b, axis=0)
    return log_rhos
Exemple #3
0
    def generate_system(self, eqs):
        resolve = lambda eq: eval(eq, self.env) if type(eq) is str else eq
        self.eqn_exp = {nm: resolve(eq) for (nm, eq) in eqs.items()}

        # equations system
        self.eqn_vec = tf.concat(list(self.eqn_exp.values()), 0)
        self.eqn_spec = {nm: int(eq.get_shape()[0]) for (nm, eq) in self.eqn_exp.items()}
        self.eqn_sz = sum(self.eqn_spec.values())

        # find gradients
        n_eqns = self.eqn_vec.get_shape()[0]
        eqn_list = tf.split(self.eqn_vec, n_eqns)
        self.par_jac = tf.stack([tf.gradients(eqn, self.par_vec)[0] for eqn in eqn_list])
        self.var_jac = tf.stack([tf.gradients(eqn, self.var_vec)[0] for eqn in eqn_list])

        # create functions
        def state_evaler(f, matrix=False):
            def ev(p, v):
                y = f.eval({self.par_vec: p, self.var_vec: v})
                return ensure_matrix(y) if matrix else y
            return ev

        self.eqn_fun = state_evaler(self.eqn_vec)
        self.parjac_fun = state_evaler(self.par_jac, matrix=True)
        self.varjac_fun = state_evaler(self.var_jac, matrix=True)
  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate Radial Symmetry Function """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    self.build()
    d_cutoff = in_layers[0].out_tensor
    d = in_layers[1].out_tensor
    if self.atomic_number_differentiated:
      atom_numbers = in_layers[2].out_tensor
      atom_number_embedded = tf.nn.embedding_lookup(self.atom_number_embedding,
                                                    atom_numbers)
    d_cutoff = tf.stack([d_cutoff] * self.length, axis=3)
    d = tf.stack([d] * self.length, axis=3)
    Rs = tf.reshape(self.Rs, (1, 1, 1, -1))
    ita = tf.reshape(self.ita, (1, 1, 1, -1))
    out_tensor = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_number_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_number_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out_tensor * selected_atoms, axis=2))
      self.out_tensor = tf.concat(out_tensors, axis=2)
    else:
      self.out_tensor = tf.reduce_sum(out_tensor, axis=2)
  def iou(self, boxes1, boxes2):
    """calculate ious
    Args:
      boxes1: 4-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4]  ====> (x_center, y_center, w, h)
      boxes2: 1-D tensor [4] ===> (x_center, y_center, w, h)
    Return:
      iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    """
    boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2,
                      boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2])
    boxes1 = tf.transpose(boxes1, [1, 2, 3, 0])
    boxes2 =  tf.stack([boxes2[0] - boxes2[2] / 2, boxes2[1] - boxes2[3] / 2,
                      boxes2[0] + boxes2[2] / 2, boxes2[1] + boxes2[3] / 2])

    #calculate the left up point
    lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2])
    rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:])

    #intersection
    intersection = rd - lu 

    inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1]

    mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32)
    
    inter_square = mask * inter_square
    
    #calculate the boxs1 square and boxs2 square
    square1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1])
    square2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1])
    
    return inter_square/(square1 + square2 - inter_square + 1e-6)
Exemple #6
0
def bboxes_resize(bbox_ref, bboxes, name=None):
    """Resize bounding boxes based on a reference bounding box,
    assuming that the latter is [0, 0, 1, 1] after transform. Useful for
    updating a collection of boxes after cropping an image.
    """
    # Bboxes is dictionary.
    if isinstance(bboxes, dict):
        with tf.name_scope(name, 'bboxes_resize_dict'):
            d_bboxes = {}
            for c in bboxes.keys():
                d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c])
            return d_bboxes

    # Tensors inputs.
    with tf.name_scope(name, 'bboxes_resize'):
        # Translate.
        v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]])
        bboxes = bboxes - v
        # Scale.
        s = tf.stack([bbox_ref[2] - bbox_ref[0],
                      bbox_ref[3] - bbox_ref[1],
                      bbox_ref[2] - bbox_ref[0],
                      bbox_ref[3] - bbox_ref[1]])
        bboxes = bboxes / s
        return bboxes
  def _summarize_input(self, groundtruth_boxes_list, match_list):
    """Creates tensorflow summaries for the input boxes and anchors.

    This function creates four summaries corresponding to the average
    number (over images in a batch) of (1) groundtruth boxes, (2) anchors
    marked as positive, (3) anchors marked as negative, and (4) anchors marked
    as ignored.

    Args:
      groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4]
        containing corners of the groundtruth boxes.
      match_list: a list of matcher.Match objects encoding the match between
        anchors and groundtruth boxes for each image of the batch,
        with rows of the Match objects corresponding to groundtruth boxes
        and columns corresponding to anchors.
    """
    num_boxes_per_image = tf.stack(
        [tf.shape(x)[0] for x in groundtruth_boxes_list])
    pos_anchors_per_image = tf.stack(
        [match.num_matched_columns() for match in match_list])
    neg_anchors_per_image = tf.stack(
        [match.num_unmatched_columns() for match in match_list])
    ignored_anchors_per_image = tf.stack(
        [match.num_ignored_columns() for match in match_list])
    tf.summary.scalar('Input/AvgNumGroundtruthBoxesPerImage',
                      tf.reduce_mean(tf.to_float(num_boxes_per_image)))
    tf.summary.scalar('Input/AvgNumPositiveAnchorsPerImage',
                      tf.reduce_mean(tf.to_float(pos_anchors_per_image)))
    tf.summary.scalar('Input/AvgNumNegativeAnchorsPerImage',
                      tf.reduce_mean(tf.to_float(neg_anchors_per_image)))
    tf.summary.scalar('Input/AvgNumIgnoredAnchorsPerImage',
                      tf.reduce_mean(tf.to_float(ignored_anchors_per_image)))
      def hard_negative_mining():
        bboxes_per_batch = tf.unstack(bboxes)
        classification_loss_per_batch = tf.unstack(classification_loss)
        num_positives_per_batch = tf.unstack(tf.reduce_sum(positives, axis=-1))
        neg_class_loss_per_batch = tf.unstack(neg_class_loss_all)

        neg_class_losses = []
        total_negatives = []

        for bboxes_per_image, classification_loss_per_image, num_positives_per_image, neg_class_loss_per_image in \
            zip(bboxes_per_batch, classification_loss_per_batch, num_positives_per_batch, neg_class_loss_per_batch):
          min_negatives_keep = tf.maximum(self.neg_pos_ratio * num_positives_per_image, 3)
          num_negatives_keep = tf.minimum(min_negatives_keep,
                                          tf.count_nonzero(neg_class_loss_per_image, dtype=tf.float32))

          indices = tf.image.non_max_suppression(bboxes_per_image, classification_loss_per_image,
                                                 tf.to_int32(num_negatives_keep), iou_threshold=0.99)
          num_negatives = tf.size(indices)
          total_negatives.append(num_negatives)
          expanded_indexes = tf.expand_dims(indices, axis=1)  # shape: (num_negatives, 1)
          negatives_keep = tf.scatter_nd(expanded_indexes, updates=tf.ones_like(indices, dtype=tf.int32),
                                         shape=tf.shape(classification_loss_per_image))  # shape: (num_priors,)
          negatives_keep = tf.to_float(tf.reshape(negatives_keep, [num_priors]))  # shape: (batch_size, num_priors)
          neg_class_losses.append(tf.reduce_sum(classification_loss_per_image * negatives_keep, axis=-1))  # shape: (1,)

        return tf.stack(neg_class_losses), tf.reduce_sum(tf.stack(total_negatives))
Exemple #9
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
def _my_top_k(x, k):
  """GPU-compatible version of top-k that works for very small constant k.

  Calls argmax repeatedly.

  tf.nn.top_k is implemented for GPU, but the gradient, sparse_to_dense,
  seems not to be, so if we use tf.nn.top_k, then both the top_k and its
  gradient go on cpu.  Once this is not an issue, this function becomes
  obselete and should be replaced by tf.nn.top_k.

  Args:
    x: a 2d Tensor.
    k: a small integer.

  Returns:
    values: a Tensor of shape [batch_size, k]
    indices: a int32 Tensor of shape [batch_size, k]
  """
  if k > 10:
    return tf.nn.top_k(x, k)
  values = []
  indices = []
  depth = tf.shape(x)[1]
  for i in xrange(k):
    values.append(tf.reduce_max(x, 1))
    argmax = tf.argmax(x, 1)
    indices.append(argmax)
    if i + 1 < k:
      x += tf.one_hot(argmax, depth, -1e9)
  return tf.stack(values, axis=1), tf.to_int32(tf.stack(indices, axis=1))
Exemple #11
0
  def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.cast(np.reshape(Rs, (1, 1, 1, -1)), tf.float32)
    ita = tf.cast(np.reshape(ita, (1, 1, 1, -1)), tf.float32)
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2)
Exemple #12
0
  def _define_distance_to_clusters(self, data):
    """Defines the Mahalanobis distance to the assigned Gaussian."""
    # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input -
    # mean) from log probability function.
    self._all_scores = []
    for shard in data:
      all_scores = []
      shard = tf.expand_dims(shard, 0)
      for c in xrange(self._num_classes):
        if self._covariance_type == FULL_COVARIANCE:
          cov = self._covs[c, :, :]
        elif self._covariance_type == DIAG_COVARIANCE:
          cov = tf.diag(self._covs[c, :])
        inverse = tf.matrix_inverse(cov + self._min_var)
        inv_cov = tf.tile(
            tf.expand_dims(inverse, 0), tf.stack([self._num_examples, 1, 1]))
        diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2])
        m_left = tf.matmul(diff, inv_cov)
        all_scores.append(
            tf.sqrt(tf.matmul(
                m_left, tf.transpose(
                    diff, perm=[0, 2, 1]))))
      self._all_scores.append(
          tf.reshape(
              tf.concat_v2(all_scores, 1),
              tf.stack([self._num_examples, self._num_classes])))

    # Distance to the associated class.
    self._all_scores = tf.concat_v2(self._all_scores, 0)
    assignments = tf.concat_v2(self.assignments(), 0)
    rows = tf.to_int64(tf.range(0, self._num_examples))
    indices = tf.concat_v2(
        [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)], 1)
    self._scores = tf.gather_nd(self._all_scores, indices)
def get_filters(R, filter_size, P=None, n_rings=None):
    """Perform single-frequency DFT on each ring of a polar-resampled patch"""
    k = filter_size
    filters = {}
    N = n_samples(k)
    from scipy.linalg import dft
    for m, r in R.iteritems():
        rsh = r.get_shape().as_list()
        # Get the basis matrices
        weights = get_interpolation_weights(k, m, n_rings=n_rings)
        DFT = dft(N)[m,:]
        LPF = np.dot(DFT, weights).T

        cosine = np.real(LPF).astype(np.float32)
        sine = np.imag(LPF).astype(np.float32)
        # Reshape for multiplication with radial profile
        cosine = tf.constant(cosine)
        sine = tf.constant(sine)
        # Project taps on to rotational basis
        r = tf.reshape(r, tf.stack([rsh[0],rsh[1]*rsh[2]]))
        ucos = tf.reshape(tf.matmul(cosine, r), tf.stack([k, k, rsh[1], rsh[2]]))
        usin = tf.reshape(tf.matmul(sine, r), tf.stack([k, k, rsh[1], rsh[2]]))
        if P is not None:
            # Rotate basis matrices
            ucos_ = tf.cos(P[m])*ucos + tf.sin(P[m])*usin
            usin = -tf.sin(P[m])*ucos + tf.cos(P[m])*usin
            ucos = ucos_
        filters[m] = (ucos, usin)
    return filters
Exemple #14
0
  def _batch_decode(self, box_encodings):
    """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
      decoded_keypoints: A float32 tensor of shape
        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
        keypoints if present in the input `box_encodings`, None otherwise.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    batch_size = combined_shape[0]
    tiled_anchor_boxes = tf.tile(
        tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
    tiled_anchors_boxlist = box_list.BoxList(
        tf.reshape(tiled_anchor_boxes, [-1, 4]))
    decoded_boxes = self._box_coder.decode(
        tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
        tiled_anchors_boxlist)
    decoded_keypoints = None
    if decoded_boxes.has_field(fields.BoxListFields.keypoints):
      decoded_keypoints = decoded_boxes.get_field(
          fields.BoxListFields.keypoints)
      num_keypoints = decoded_keypoints.get_shape()[1]
      decoded_keypoints = tf.reshape(
          decoded_keypoints,
          tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
    decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
        [combined_shape[0], combined_shape[1], 4]))
    return decoded_boxes, decoded_keypoints
def bboxes_crop_or_pad(bboxes,
                       height, width,
                       offset_y, offset_x,
                       target_height, target_width):
    """Adapt bounding boxes to crop or pad operations.
    Coordinates are always supposed to be relative to the image.

    Arguments:
      bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max];
      height, width: Original image dimension;
      offset_y, offset_x: Offset to apply,
        negative if cropping, positive if padding;
      target_height, target_width: Target dimension after cropping / padding.
    """
    with tf.name_scope('bboxes_crop_or_pad'):
        # Rescale bounding boxes in pixels.
        scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype)
        bboxes = bboxes * scale
        # Add offset.
        offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype)
        bboxes = bboxes + offset
        # Rescale to target dimension.
        scale = tf.cast(tf.stack([target_height, target_width,
                                  target_height, target_width]), bboxes.dtype)
        bboxes = bboxes / scale
        return bboxes
    def _transform(theta, input_dim, out_size):
        num_batch = tf.shape(input=input_dim)[0]
        num_channels = tf.shape(input=input_dim)[3]
        theta = tf.reshape(theta, (-1, 2, 3))
        theta = tf.cast(theta, 'float32')

        # grid of (x_t, y_t, 1), eq (1) in ref [1]
        out_height = out_size[0]
        out_width = out_size[1]
        grid = _meshgrid(out_height, out_width)
        grid = tf.expand_dims(grid, 0)
        grid = tf.reshape(grid, [-1])
        grid = tf.tile(grid, tf.stack([num_batch]))
        grid = tf.reshape(grid, tf.stack([num_batch, 3, -1]))

        # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
        T_g = tf.matmul(theta, grid)
        x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1])
        y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1])
        x_s_flat = tf.reshape(x_s, [-1])
        y_s_flat = tf.reshape(y_s, [-1])

        input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, out_size)

        output = tf.reshape(input_transformed, tf.stack([num_batch, out_height, out_width, num_channels]))
        return output
Exemple #17
0
    def __init__(self, config):

        self.inputs = [ev.placeholder(config) for ev in config.evidence]
        exists = [ev.exists(i) for ev, i in zip(config.evidence, self.inputs)]
        zeros = tf.zeros([config.batch_size, config.latent_size], dtype=tf.float32)

        # Compute the denominator used for mean and covariance
        for ev in config.evidence:
            ev.init_sigma(config)
        d = [tf.where(exist, tf.tile([1. / tf.square(ev.sigma)], [config.batch_size]),
                      tf.zeros(config.batch_size)) for ev, exist in zip(config.evidence, exists)]
        d = 1. + tf.reduce_sum(tf.stack(d), axis=0)
        denom = tf.tile(tf.reshape(d, [-1, 1]), [1, config.latent_size])

        # Compute the mean of Psi
        with tf.variable_scope('mean'):
            # 1. compute encoding
            self.encodings = [ev.encode(i, config) for ev, i in zip(config.evidence, self.inputs)]
            encodings = [encoding / tf.square(ev.sigma) for ev, encoding in
                         zip(config.evidence, self.encodings)]

            # 2. pick only encodings from valid inputs that exist, otherwise pick zero encoding
            encodings = [tf.where(exist, enc, zeros) for exist, enc in zip(exists, encodings)]

            # 3. tile the encodings according to each evidence type
            encodings = [[enc] * ev.tile for ev, enc in zip(config.evidence, encodings)]
            encodings = tf.stack(list(chain.from_iterable(encodings)))

            # 4. compute the mean of non-zero encodings
            self.psi_mean = tf.reduce_sum(encodings, axis=0) / denom

        # Compute the covariance of Psi
        with tf.variable_scope('covariance'):
            I = tf.ones([config.batch_size, config.latent_size], dtype=tf.float32)
            self.psi_covariance = I / denom
def hnet_transformation(gt_pts, transformation_coeffcient, name):
    """

    :param gt_pts:
    :param transformation_coeffcient:
    :param name:
    :return:
    """
    with tf.variable_scope(name):
        # 首先映射原始标签点对
        transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1)
        H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]])
        H_shape = tf.constant([9])
        H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape)
        H = tf.reshape(H, shape=[3, 3])

        gt_pts = tf.transpose(gt_pts)
        pts_projects = tf.matmul(H, gt_pts)

        # 求解最小二乘二阶多项式拟合参数矩阵
        Y = tf.transpose(pts_projects[1, :])
        X = tf.transpose(pts_projects[0, :])
        Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32))
        Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1)
        w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)),
                                tf.transpose(Y_stack)),
                      tf.expand_dims(X, -1))

        # 利用二阶多项式参数求解拟合位置
        x_preds = tf.matmul(Y_stack, w)
        preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1))
        preds_fit = tf.stack([tf.squeeze(x_preds, -1), Y], axis=1)
        x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds)

    return x_transformation_back
  def _transform(theta, input_dim, out_size, z_near, z_far):
    with tf.variable_scope('_transform'):
      num_batch = input_dim.get_shape().as_list()[0]
      num_channels = input_dim.get_shape().as_list()[4]
      theta = tf.reshape(theta, (-1, 4, 4))
      theta = tf.cast(theta, 'float32')

      out_depth = out_size[0]
      out_height = out_size[1]
      out_width = out_size[2]
      grid = _meshgrid(out_depth, out_height, out_width, z_near, z_far)
      grid = tf.expand_dims(grid, 0)
      grid = tf.reshape(grid, [-1])
      grid = tf.tile(grid, tf.stack([num_batch]))
      grid = tf.reshape(grid, tf.stack([num_batch, 4, -1]))

      # Transform A x (x_t', y_t', 1, d_t)^T -> (x_s, y_s, z_s, 1).
      t_g = tf.matmul(theta, grid)
      z_s = tf.slice(t_g, [0, 0, 0], [-1, 1, -1])
      y_s = tf.slice(t_g, [0, 1, 0], [-1, 1, -1])
      x_s = tf.slice(t_g, [0, 2, 0], [-1, 1, -1])

      z_s_flat = tf.reshape(z_s, [-1])
      y_s_flat = tf.reshape(y_s, [-1])
      x_s_flat = tf.reshape(x_s, [-1])

      input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, z_s_flat,
                                       out_size)

      output = tf.reshape(
          input_transformed,
          tf.stack([num_batch, out_depth, out_height, out_width, num_channels]))

      return output
Exemple #20
0
def language_model(input, vocab_size):
  """Form p(x[0], ..., x[timesteps - 1]),

  \prod_{t=0}^{timesteps - 1} p(x[t] | x[:t]),

  To calculate the probability, we call log_prob on
  x = [x[0], ..., x[timesteps - 1]] given
  `input` = [0, x[0], ..., x[timesteps - 2]].

  We implement this separately from the generative model so the
  forward pass, e.g., embedding/dense layers, can be parallelized.

  [batch_size, timesteps] -> [batch_size, timesteps]
  """
  x = tf.one_hot(input, depth=vocab_size, dtype=tf.float32)
  h = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0)
  c = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0)
  hs = []
  reuse = None
  for t in range(FLAGS.timesteps):
    if t > 0:
      reuse = True
    xt = x[:, t, :]
    h, c = lstm_cell(xt, h, c, name="lstm", reuse=reuse)
    hs.append(h)

  h = tf.stack(hs, 1)
  logits = tf.layers.dense(h, vocab_size, name="dense")
  output = Categorical(logits=logits)
  return output
def hnet_loss(gt_pts, transformation_coeffcient, name):
    """
    
    :param gt_pts: 原始的标签点对 [x, y, 1] 
    :param transformation_coeffcient: 映射矩阵参数(6参数矩阵) [[a, b, c], [0, d, e], [0, f, 1]]
    :param name:
    :return: 
    """
    with tf.variable_scope(name):
        # 首先映射原始标签点对
        transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1)
        H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]])
        H_shape = tf.constant([9])
        H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape)
        H = tf.reshape(H, shape=[3, 3])

        gt_pts = tf.transpose(gt_pts)
        pts_projects = tf.matmul(H, gt_pts)

        # 求解最小二乘二阶多项式拟合参数矩阵
        Y = tf.transpose(pts_projects[1, :])
        X = tf.transpose(pts_projects[0, :])
        Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32))
        Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1)
        w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)),
                                tf.transpose(Y_stack)),
                      tf.expand_dims(X, -1))
        # 利用二阶多项式参数求解拟合位置并反算到原始投影空间计算损失
        x_preds = tf.matmul(Y_stack, w)
        preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1))
        x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds)

        loss = tf.reduce_mean(tf.pow(gt_pts[0, :] - x_transformation_back[0, :], 2))

    return loss
  def testConst(self):
    np.random.seed(7)
    with self.test_session(use_gpu=True):
      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
        data = np.random.randn(*shape).astype(np.float32)
        # Pack back into a single tensorflow tensor directly using np array
        c = tf.pack(data)
        # This is implemented via a Const:
        self.assertEqual(c.op.type, "Const")
        self.assertAllEqual(c.eval(), data)

        # Python lists also work for 1-D case:
        if len(shape) == 1:
          data_list = list(data)
          cl = tf.pack(data_list)
          self.assertEqual(cl.op.type, "Const")
          self.assertAllEqual(cl.eval(), data)

          cl = tf.stack(data_list)
          self.assertEqual(cl.op.type, "Const")
          self.assertAllEqual(cl.eval(), data)

      # Verify that shape induction works with shapes produced via const pack
      a = tf.constant([1, 2, 3, 4, 5, 6])
      b = tf.reshape(a, tf.pack([2, 3]))
      self.assertAllEqual(b.get_shape(), [2, 3])

      b = tf.reshape(a, tf.stack([2, 3]))
      self.assertAllEqual(b.get_shape(), [2, 3])
Exemple #23
0
def getImage(filenames):
	# convert filenames to a queue for an input pipeline.
	filenameQ = tf.train.string_input_producer(filenames,num_epochs=None)

	# object to read records
	recordReader = tf.TFRecordReader()

	# read the full set of features for a single example
	key, fullExample = recordReader.read(filenameQ)

	# parse the full example into its' component features.
	features = tf.parse_single_example(
        fullExample,
        features={
            'image/height': tf.FixedLenFeature([], tf.int64),
            'image/width': tf.FixedLenFeature([], tf.int64),
            'image/depth': tf.FixedLenFeature([], tf.int64),
            'image/class/label': tf.FixedLenFeature([],tf.int64),
            'image/class/text': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
            'image/filename': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
            'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
        })

	label = features['image/class/label']
	image_buffer = features['image/encoded']

	image = tf.decode_raw(image_buffer, tf.float32)
	image = tf.reshape(image, tf.stack([FLAGS.width*FLAGS.height*FLAGS.depth]))

	label=tf.stack(tf.one_hot(label-1, nLabel))
	return label, image
Exemple #24
0
 def when_singular():
   center = min_
   bucket_starts = tf.stack([center - 0.5])
   bucket_ends = tf.stack([center + 0.5])
   bucket_counts = tf.stack([tf.cast(tf.size(data), tf.float64)])
   return tf.transpose(
       tf.stack([bucket_starts, bucket_ends, bucket_counts]))
Exemple #25
0
    def objective(self, x):
        '''
        Returns scalar to maximize
        '''


        encoder = NN(self.encoder_net, self.encoder_act_func, self.batch_size)
        decoder = BNN(self.decoder_net, self.decoder_act_func, self.batch_size)

        log_px_list = []
        log_pz_list = []
        log_qz_list = []
        log_pW_list = []
        log_qW_list = []

        for W_i in range(self.n_W_particles):

            # Sample decoder weights  __, [1], [1]
            W, log_pW, log_qW = decoder.sample_weights()

            # Sample z   [P,B,Z], [P,B], [P,B]
            z, log_pz, log_qz = self.sample_z(x, encoder, decoder, W)
            # z: [PB,Z]
            z = tf.reshape(z, [self.n_z_particles*self.batch_size, self.z_size])

            # Decode [PB,X]
            y = decoder.feedforward(W, z)
            # y: [P,B,X]
            y = tf.reshape(y, [self.n_z_particles, self.batch_size, self.x_size])

            # Likelihood p(x|z)  [P,B]
            log_px = log_bern(x,y)

            #Store for later
            log_px_list.append(log_px)
            log_pz_list.append(log_pz)
            log_qz_list.append(log_qz)
            log_pW_list.append(log_pW)
            log_qW_list.append(log_qW)


        log_px = tf.stack(log_px_list) #[S,P,B]
        log_pz = tf.stack(log_pz_list) #[S,P,B]
        log_qz = tf.stack(log_qz_list) #[S,P,B]
        log_pW = tf.stack(log_pW_list) #[S]
        log_qW = tf.stack(log_qW_list) #[S]

        # Calculte log probs for printing
        self.log_px = tf.reduce_mean(log_px)
        self.log_pz = tf.reduce_mean(log_pz)
        self.log_qz = tf.reduce_mean(log_qz)
        self.log_pW = tf.reduce_mean(log_pW)
        self.log_qW = tf.reduce_mean(log_qW)
        self.z_elbo = self.log_px + self.log_pz - self.log_qz 


        #Calc elbo
        elbo = self.log_px + self.log_pz - self.log_qz + self.batch_frac*(self.log_pW - self.log_qW)

        return elbo
Exemple #26
0
def mtrx2vecBatch(pMtrxBatch,opt):
	with tf.name_scope("mtrx2vec"):
		if opt.warpType=="translation":
			[row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1)
			[e00,e01,e02] = tf.unstack(row0,axis=1)
			[e10,e11,e12] = tf.unstack(row1,axis=1)
			[e20,e21,e22] = tf.unstack(row2,axis=1)
			pBatch = tf.stack([e02,e12],axis=1)
		elif opt.warpType=="similarity":
			[row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1)
			[e00,e01,e02] = tf.unstack(row0,axis=1)
			[e10,e11,e12] = tf.unstack(row1,axis=1)
			[e20,e21,e22] = tf.unstack(row2,axis=1)
			pBatch = tf.stack([e00-1,e10,e02,e12],axis=1)
		elif opt.warpType=="affine":
			[row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1)
			[e00,e01,e02] = tf.unstack(row0,axis=1)
			[e10,e11,e12] = tf.unstack(row1,axis=1)
			[e20,e21,e22] = tf.unstack(row2,axis=1)
			pBatch = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1)
		elif opt.warpType=="homography":
			pMtrxBatch = pMtrxBatch/pMtrxBatch[:,2:3,2:3]
			[row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1)
			[e00,e01,e02] = tf.unstack(row0,axis=1)
			[e10,e11,e12] = tf.unstack(row1,axis=1)
			[e20,e21,e22] = tf.unstack(row2,axis=1)
			pBatch = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1)
	return pBatch
def tile_anchors(grid_height,
                 grid_width,
                 scales,
                 aspect_ratios,
                 base_anchor_size,
                 anchor_stride,
                 anchor_offset):
  """Create a tiled set of anchors strided along a grid in image space.

  This op creates a set of anchor boxes by placing a "basis" collection of
  boxes with user-specified scales and aspect ratios centered at evenly
  distributed points along a grid.  The basis collection is specified via the
  scale and aspect_ratios arguments.  For example, setting scales=[.1, .2, .2]
  and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
  .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
  and aspect ratio 1/2.  Each box is multiplied by "base_anchor_size" before
  placing it over its respective center.

  Grid points are specified via grid_height, grid_width parameters as well as
  the anchor_stride and anchor_offset parameters.

  Args:
    grid_height: size of the grid in the y direction (int or int scalar tensor)
    grid_width: size of the grid in the x direction (int or int scalar tensor)
    scales: a 1-d  (float) tensor representing the scale of each box in the
      basis set.
    aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
      box in the basis set.  The length of the scales and aspect_ratios tensors
      must be equal.
    base_anchor_size: base anchor size as [height, width]
      (float tensor of shape [2])
    anchor_stride: difference in centers between base anchors for adjacent grid
                   positions (float tensor of shape [2])
    anchor_offset: center of the anchor with scale and aspect ratio 1 for the
                   upper left element of the grid, this should be zero for
                   feature networks with only VALID padding and even receptive
                   field size, but may need some additional calculation if other
                   padding is used (float tensor of shape [2])
  Returns:
    a BoxList holding a collection of N anchor boxes
  """
  ratio_sqrts = tf.sqrt(aspect_ratios)
  heights = scales / ratio_sqrts * base_anchor_size[0]
  widths = scales * ratio_sqrts * base_anchor_size[1]

  # Get a grid of box centers
  y_centers = tf.to_float(tf.range(grid_height))
  y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
  x_centers = tf.to_float(tf.range(grid_width))
  x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
  x_centers, y_centers = ops.meshgrid(x_centers, y_centers)

  widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
  heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
  bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
  bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
  bbox_centers = tf.reshape(bbox_centers, [-1, 2])
  bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
  bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
  return box_list.BoxList(bbox_corners)
def collapse_mixture_of_tastes(tastes_predictions, tastes_attentions):
    """
    Collapses a list of prediction nodes in to a single prediction node.
    :param tastes_predictions:
    :param tastes_attentions:
    :return:
    """
    stacked_predictions = tf.stack(tastes_predictions)

    # If there is attention, the attentions are used to weight each prediction
    if tastes_attentions is not None:

        # Stack the attentions and perform softmax across the tastes
        stacked_attentions = tf.stack(tastes_attentions)
        softmax_attentions = tf.nn.softmax(stacked_attentions, axis=0)

        # The softmax'd attentions serve as weights for the taste predictiones
        weighted_predictions = tf.multiply(stacked_predictions, softmax_attentions)
        result_prediction = tf.reduce_sum(weighted_predictions, axis=0)

    # If there is no attention, the max prediction is returned
    else:
        result_prediction = tf.reduce_max(stacked_predictions, axis=0)

    return result_prediction
Exemple #29
0
def vec2mtrxBatch(pBatch,opt):
	with tf.name_scope("vec2mtrx"):
		batchSize = tf.shape(pBatch)[0]
		O = tf.zeros([batchSize])
		I = tf.ones([batchSize])
		if opt.warpType=="translation":
			tx,ty = tf.unstack(pBatch,axis=1)
			pMtrxBatch = tf.transpose(tf.stack([[I,O,tx],
												[O,I,ty],
												[O,O,I]]),perm=[2,0,1])
		elif opt.warpType=="similarity":
			pc,ps,tx,ty = tf.unstack(pBatch,axis=1)
			pMtrxBatch = tf.transpose(tf.stack([[I+pc,-ps,tx],
												[ps,I+pc,ty],
												[O,O,I]]),perm=[2,0,1])
		elif opt.warpType=="affine":
			p1,p2,p3,p4,p5,p6 = tf.unstack(pBatch,axis=1)
			pMtrxBatch = tf.transpose(tf.stack([[I+p1,p2,p3],
												[p4,I+p5,p6],
												[O,O,I]]),perm=[2,0,1])
		elif opt.warpType=="homography":
			p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(pBatch,axis=1)
			pMtrxBatch = tf.transpose(tf.stack([[I+p1,p2,p3],
												[p4,I+p5,p6],
												[p7,p8,I]]),perm=[2,0,1])
	return pMtrxBatch
Exemple #30
0
 def K(self, X, X2=None, presliced=False):
     if X2 is None:
         d = tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
         return tf.matrix_diag(d)
     else:
         shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
         return tf.zeros(shape, settings.float_type)
Exemple #31
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness):
    """Creates the operations to apply the specified distortions.
  During training it can help to improve the results if we run the images
  through simple distortions like crops, scales, and flips. These reflect the
  kind of variations we expect in the real world, and so can help train the
  model to cope with natural data more effectively. Here we take the supplied
  parameters and construct a network of operations to apply them to an image.
  Cropping
  ~~~~~~~~
  Cropping is done by placing a bounding box at a random position in the full
  image. The cropping parameter controls the size of that box relative to the
  input image. If it's zero, then the box is the same size as the input and no
  cropping is performed. If the value is 50%, then the crop box will be half the
  width and height of the input. In a diagram it looks like this:
  <       width         >
  +---------------------+
  |                     |
  |   width - crop%     |
  |    <      >         |
  |    +------+         |
  |    |      |         |
  |    |      |         |
  |    |      |         |
  |    +------+         |
  |                     |
  |                     |
  +---------------------+
  Scaling
  ~~~~~~~
  Scaling is a lot like cropping, except that the bounding box is always
  centered and its size varies randomly within the given range. For example if
  the scale percentage is zero, then the bounding box is the same size as the
  input and no scaling is applied. If it's 50%, then the bounding box will be in
  a random range between half the width and height and full size.
  Args:
    flip_left_right: Boolean whether to randomly mirror images horizontally.
    random_crop: Integer percentage setting the total margin used around the
    crop box.
    random_scale: Integer percentage of how much to vary the scale by.
    random_brightness: Integer range to randomly multiply the pixel values by.
    graph.
  Returns:
    The jpeg input layer and the distorted result tensor.
  """

    jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH)
    decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    margin_scale = 1.0 + (random_crop / 100.0)
    resize_scale = 1.0 + (random_scale / 100.0)
    margin_scale_value = tf.constant(margin_scale)
    resize_scale_value = tf.random_uniform(tensor_shape.scalar(),
                                           minval=1.0,
                                           maxval=resize_scale)
    scale_value = tf.multiply(margin_scale_value, resize_scale_value)
    precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH)
    precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT)
    precrop_shape = tf.stack([precrop_height, precrop_width])
    precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
    precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                                precrop_shape_as_int)
    precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0])
    cropped_image = tf.random_crop(
        precropped_image_3d,
        [MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH, MODEL_INPUT_DEPTH])
    if flip_left_right:
        flipped_image = tf.image.random_flip_left_right(cropped_image)
    else:
        flipped_image = cropped_image
    brightness_min = 1.0 - (random_brightness / 100.0)
    brightness_max = 1.0 + (random_brightness / 100.0)
    brightness_value = tf.random_uniform(tensor_shape.scalar(),
                                         minval=brightness_min,
                                         maxval=brightness_max)
    brightened_image = tf.multiply(flipped_image, brightness_value)
    distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
    return jpeg_data, distort_result
Exemple #32
0
def load_and_preprocess_from_path_label(path, x, y):
    image = load_and_preprocess_image(path)
    image = tf.image.resize(image, [shape, shape])
    return image, tf.stack([tf.to_float(x)/width, tf.to_float(y)/height])
Exemple #33
0
def load_and_preprocess_from_path_label_translate(path, x, y):
    image = load_and_preprocess_image(path)
    image, translations = translate(image)
    image = tf.image.resize(image, [shape, shape])
    return image, tf.stack([tf.to_float((x+translations[0]))/width, tf.to_float((y+translations[1]))/height])
Exemple #34
0
  optimizer = tf.train.AdamOptimizer(learning_rate=alpha)
  train_op = optimizer.minimize(
    loss=loss,
    global_step=tf.train.get_global_step())

  loss_arr.append(loss)
  train_op_arr.append(train_op)

  # functions for evaluating the network
  correct_pred = tf.equal(predictions["classes"], ph["y"])
  accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

  accuracy_arr.append(accuracy)

# combine all of the models together for the ensemble
all_preds = tf.stack([x["probabilities"] for x in predictions_arr])
all_preds = tf.transpose(all_preds, [1,2,0])

model_preds = tf.transpose(all_preds, [0, 2, 1])
model_top_10_values, model_top_10_indices = tf.nn.top_k(model_preds, k=10)
model_preds = tf.argmax(model_preds, axis=2, output_type=tf.int32)

if aggregate_method == 'average':
  # average over softmaxes
  test_prob = tf.reduce_mean(all_preds, axis = 2)
  test_class = tf.argmax(test_prob, axis=1, output_type=tf.int32)

elif aggregate_method == 'most_common':
  test_prob = tf.argmax(all_preds, axis=1, output_type=tf.int32)
  test_class = tf.argmax(tf.bincount(test_prob_max), output_type=tf.int32)
Exemple #35
0
    def build(self, n_hidden, update_next=False):
        with open(self.logfile, 'w') as file:
            file.write("Build file with Parameters:\n")
            file.write("[rnn_units: {}; update_next: {};]\n".format(
                n_hidden, update_next))
        #graph build
        self.outputs = tf.placeholder(
            tf.int32, [None, None],
            name='output_layer')  #batch_size * time_steps
        self.inputs = tf.placeholder(tf.int32, [None, None],
                                     name='input_layer')
        self.target_len = tf.placeholder(tf.int32, [None],
                                         name='target_length')
        n_time_step = tf.reduce_max(self.target_len)
        # task-specific embedding
        #print_op = tf.print("tensors:", {'outputs': outputs},output_stream=sys.stdout)
        #with tf.control_dependencies([print_op]):
        #embedding_mat = tf.Variable(tf.truncated_normal([self.n_voca,n_embedding]), name="task-specific-embedding-mat")
        #embedding_vec = tf.fill(tf.stack([tf.shape(inputs)[0],max_len, embed_size]),0.0)

        # one-hot encoding
        onehot_enc = tf.one_hot(self.inputs, self.n_voca, axis=-1)
        rnnCell = tf.contrib.rnn.BasicRNNCell(n_hidden)
        '''hidden_output is the output vectors of lstm cells [batch_size * time_steps * rnn_units]'''
        hidden_output, _ = tf.nn.dynamic_rnn(rnnCell,
                                             onehot_enc,
                                             dtype=tf.float32)
        self.hidden_output = hidden_output
        '''flatten it, but when update, choose to use the target one or the entire sequence to update the network'''
        hidden_output = tf.reshape(
            hidden_output, [-1, n_hidden]
        )  # reshape from [batch_size, time_steps, n_hidden] to [batch_size*timesteps, n_hidden]

        softmax_weight = tf.Variable(tf.truncated_normal(
            [n_hidden, self.n_voca]),
                                     name="softmax_weight")
        softmax_bias = tf.Variable(tf.constant(0.1, shape=[self.n_voca]))

        #hidden_output = tf.transpose(hidden_output,[1,0,2])  #batch_size * time_steps * lstm_units --> time_steps * batch_size * lstm_units
        #target_output = tf.gather(hidden_output, int(hidden_output.get_shape()[0])-1)
        '''pred is the vectors of all the timesteps, use last one or every step for update'''
        logits = tf.matmul(hidden_output, softmax_weight) + softmax_bias
        logits = tf.reshape(logits, [-1, n_time_step, self.n_voca])
        self.logits = logits
        #assert logits.get_shape().as_list() == [None,None,self.n_voca]            #targets = tf.one_hot(outputs,self.n_voca,axis=-1)
        index_offset = tf.fill(tf.stack([tf.shape(self.target_len)[0]]), -1)
        target_indices = tf.add(self.target_len, index_offset)
        mask = tf.one_hot(target_indices, n_time_step, dtype=tf.float32)
        '''mask_for_padding to guarantee update on every next token'''
        mask_for_padding = tf.sequence_mask(self.target_len, dtype=tf.float32)
        if update_next:
            # use every time steps to supervise the model.
            self.loss = tf.contrib.seq2seq.sequence_loss(
                logits,
                self.outputs,
                mask_for_padding,
                average_across_timesteps=False,
                average_across_batch=True)
        else:
            self.loss = tf.contrib.seq2seq.sequence_loss(
                logits,
                self.outputs,
                mask,
                average_across_timesteps=False,
                average_across_batch=True)

        self.optimizer = tf.train.AdamOptimizer().minimize(self.loss)
        # answer
        pred = tf.argmax(logits, 2)  #batch_size*time_step
        self.pred = pred
        mask_pred = tf.multiply(tf.cast(pred, tf.float32), mask)
        self.mask = mask
        self.mask_pred = mask_pred

        pred_logits = tf.boolean_mask(logits, mask)
        pred_logits = tf.reshape(pred_logits, [-1, self.n_voca])

        # output probability of target
        self.softmax_prob = tf.nn.softmax(pred_logits)

        # answer [prediction]
        self.last_pred = tf.reduce_sum(mask_pred, 1)

        mask_output = tf.multiply(tf.cast(self.outputs, tf.float32), mask)
        # label
        last_output = tf.reduce_sum(mask_output, 1)
        correctPred = tf.equal(tf.cast(self.last_pred, tf.int64),
                               tf.cast(last_output, tf.int64))
        self.last_acc = tf.reduce_mean(tf.cast(correctPred, tf.float32))

        # second last
        second_target_indices = tf.add(tf.cast(target_indices, tf.int32),
                                       index_offset)
        mask_second_last = tf.one_hot(second_target_indices,
                                      n_time_step,
                                      dtype=tf.float32)
        second_mask_pred = tf.multiply(tf.cast(pred, tf.float32),
                                       mask_second_last)
        self.second_last_pred = tf.reduce_sum(second_mask_pred, 1)
        second_mask_output = tf.multiply(tf.cast(self.outputs, tf.float32),
                                         mask_second_last)
        second_last_output = tf.reduce_sum(second_mask_output, 1)
        second_correct = tf.equal(tf.cast(self.second_last_pred, tf.int64),
                                  tf.cast(second_last_output, tf.int64))
        self.second_last_acc = tf.reduce_mean(
            tf.cast(second_correct, tf.float32))

        self.lstm_saver = tf.train.Saver()
    def matching(self, gt_bbox, gt_labels, num_crowd=None, threshold_pos=0.5, threshold_neg=0.4, threshold_crowd=0.7):
        """
        :param gt_bbox:
        :param gt_labels:
        :return:

        Args:
            num_crowd:
            threshold_pos:
            threshold_neg:
            threshold_crowd:
            pos_iou_threshold:
            num_crowd:
            neg_iou_threshold:
        """
        if num_crowd > 0:
            # split the gt_bbox
            gt_bbox = gt_bbox[:-num_crowd]
            crowd_gt_bbox = gt_bbox[-num_crowd:]
        else:
            crowd_gt_bbox = tf.zeros_like(gt_bbox)

        # Matching only for non-crowd annotation
        # --------------------------------------------------------------------------------------------------------------
        num_gt = tf.shape(gt_bbox)[0]
        # tf.print("num gt", num_gt)
        # pairwise IoU
        pairwise_iou = self._pairwise_iou(gt_bbox=gt_bbox, is_crowd=False)

        # assign the max overlap gt index for each anchor
        max_iou_for_anchors = tf.reduce_max(pairwise_iou, axis=-1)
        max_id_for_anchors = tf.math.argmax(pairwise_iou, axis=-1)

        # force the anchors which is the best matched of each gt to predict the correspond gt
        forced_update_id = tf.cast(tf.range(0, num_gt), tf.int64)

        # force the iou over threshold for not wasting any training data
        forced_update_iou = tf.reduce_max(pairwise_iou, axis=0)
        forced_update_indice = tf.expand_dims(tf.math.argmax(pairwise_iou, axis=0), axis=-1)
        max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, forced_update_indice, forced_update_iou)
        max_id_for_anchors = tf.tensor_scatter_nd_update(max_id_for_anchors, forced_update_indice, forced_update_id)

        # decide the anchors to be positive or negative based on the IoU and given threshold
        pos_iou = tf.where(max_iou_for_anchors > threshold_pos)
        max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, pos_iou, tf.ones(tf.size(pos_iou)))
        neg_iou = tf.where(max_iou_for_anchors < threshold_neg)
        max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, neg_iou, tf.zeros(tf.size(neg_iou)))
        neu_iou = tf.where(
            tf.math.logical_and((max_iou_for_anchors <= threshold_pos), max_iou_for_anchors >= threshold_neg))
        max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, neu_iou, -1 * tf.ones(tf.size(neu_iou)))

        # deal with crowd annotations, only affect non-positive
        # --------------------------------------------------------------------------------------------------------------
        if num_crowd > 0 and threshold_crowd < 1:
            # crowd pairwise IoU
            crowd_pairwise_iou = self._pairwise_iou(gt_bbox=crowd_gt_bbox, is_crowd=True)

            # assign the max overlap gt index for each anchor
            crowd_max_iou_for_anchors = tf.reduce_max(crowd_pairwise_iou, axis=-1)

            # assign neutral for those neg iou that over crowd threshold
            crowd_neu_iou = tf.where(
                tf.math.logical_and((max_iou_for_anchors <= 0), crowd_max_iou_for_anchors > threshold_crowd))

            # reassigh from negative to neutral
            max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, crowd_neu_iou,
                                                              -1 * tf.ones(tf.size(crowd_neu_iou)))
        match_positiveness = max_iou_for_anchors

        # create class target
        # map idx to label[idx]
        # match_labels = tf.map_fn(lambda x: gt_labels[x], max_id_for_anchors)
        match_labels = tf.gather(gt_labels, max_id_for_anchors)

        """
        element-wise multiplication of label[idx] and positiveness:
        1. positive sample will have correct label
        2. negative sample will have 0 * label[idx] = 0
        3. neural sample will have -1 * label[idx] = -1 * label[idx] 
        it can be useful to distinguish positive sample during loss calculation  
        """
        target_cls = tf.multiply(tf.cast(match_labels, tf.float32), match_positiveness)

        # create loc target
        # map_loc = tf.map_fn(lambda x: gt_bbox[x], max_id_for_anchors, dtype=tf.float32)
        map_loc = tf.gather(gt_bbox, max_id_for_anchors)

        # convert to center form [cx, cy, w, h]
        # center_anchors = tf.map_fn(lambda x: map_to_center_form(x), self.anchors)
        h = self.anchors[:, 2] - self.anchors[:, 0]
        w = self.anchors[:, 3] - self.anchors[:, 1]
        center_anchors = tf.stack([self.anchors[:, 1] + (w / 2), self.anchors[:, 0] + (h / 2), w, h], axis=-1)

        # center_gt = tf.map_fn(lambda x: map_to_center_form(x), map_loc)
        h = map_loc[:, 2] - map_loc[:, 0]
        w = map_loc[:, 3] - map_loc[:, 1]
        center_gt = tf.stack([map_loc[:, 1] + (w / 2), map_loc[:, 0] + (h / 2), w, h], axis=-1)
        variances = [0.1, 0.2]

        # calculate offset
        # target_loc = tf.map_fn(lambda x: map_to_offset(x), tf.stack([center_gt, center_anchors], axis=-1))
        g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2] / variances[0]
        g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3] / variances[0]
        tf.debugging.assert_non_negative(center_anchors[:, 2] / center_gt[:, 2])
        tf.debugging.assert_non_negative(center_anchors[:, 3] / center_gt[:, 3])
        g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2]) / variances[1]
        g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3]) / variances[1]
        target_loc = tf.stack([g_hat_cx, g_hat_cy, g_hat_w, g_hat_h], axis=-1)
        return target_cls, target_loc, max_id_for_anchors, match_positiveness
Exemple #37
0
 def call(self, inputs):
     h = inputs[0]
     x = inputs[1]
     aug_x = tf.stack([1-x, x], axis=1)
     emission  = tf.stack([tf.nn.softmax(self.emission_weight_raw[i,:]) for i in range(0, self.cardinality)], axis=0)
     return get_normalized_pr(h * tf.matmul(aug_x, tf.transpose(emission)))
    def add_prediction_op(self):
        """Adds the unrolled RNN:
            h_0 = 0
            for t in 1 to T:
                o_t, h_t = cell(x_t, h_{t-1})
                o_drop_t = Dropout(o_t, dropout_rate)
                y_t = o_drop_t U + b_2

        TODO: There a quite a few things you'll need to do in this function:
            - Define the variables U, b_2.
            - Define the vector h as a constant and inititalize it with
              zeros. See tf.zeros and tf.shape for information on how
              to initialize this variable to be of the right shape.
              https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros
              https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape
            - In a for loop, begin to unroll the RNN sequence. Collect
              the predictions in a list.
            - When unrolling the loop, from the second iteration
              onwards, you will HAVE to call
              tf.get_variable_scope().reuse_variables() so that you do
              not create new variables in the RNN cell.
              See https://www.tensorflow.org/versions/master/how_tos/variable_scope/
            - Concatenate and reshape the predictions into a predictions
              tensor.
        Hint: You will find the function tf.pack (similar to np.asarray)
              useful to assemble a list of tensors into a larger tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack
        Hint: You will find the function tf.transpose and the perms
              argument useful to shuffle the indices of the tensor.
              https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose

        Remember:
            * Use the xavier initilization for matrices.
            * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument.
            The keep probability should be set to the value of self.dropout_placeholder

        Returns:
            pred: tf.Tensor of shape (batch_size, max_length, n_classes)
        """

        x = self.add_embedding()
        dropout_rate = self.dropout_placeholder

        preds = []  # Predicted output at each timestep should go here!

        # Use the cell defined below. For Q2, we will just be using the
        # RNNCell you defined, but for Q3, we will run this code again
        # with a GRU cell!
        if Config.cell == "rnn":
            cell = RNNCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        elif Config.cell == "gru":
            cell = GRUCell(Config.n_features * Config.embed_size,
                           Config.hidden_size)
        else:
            raise ValueError("Unsuppported cell type: " + Config.cell)

        # Define U and b2 as variables.
        # Initialize state as vector of zeros.
        ### YOUR CODE HERE (~4-6 lines)
        U = tf.get_variable(
            'U',
            shape=(Config.hidden_size, Config.n_classes),
            initializer=tf.contrib.layers.xavier_initializer(seed=1))
        b2 = tf.get_variable(
            'b2',
            shape=(Config.n_classes),
            initializer=tf.contrib.layers.xavier_initializer(seed=2))
        h = tf.zeros(shape=(tf.shape(x)[0], Config.hidden_size))
        ### END YOUR CODE

        with tf.variable_scope("RNN"):
            for time_step in range(self.max_length):
                ### YOUR CODE HERE (~6-10 lines)
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                output, h = cell(x[:, time_step, :], h)
                output = tf.nn.dropout(output, self.dropout_placeholder)
                output = tf.matmul(output, U) + b2
                preds.append(output)
                ### END YOUR CODE

        # Make sure to reshape @preds here.
        ### YOUR CODE HERE (~2-4 lines)
        preds = tf.stack(preds)
        print preds.shape
        preds = tf.transpose(preds, perm=[1, 0, 2])
        ### END YOUR CODE

        assert preds.get_shape().as_list() == [
            None, self.max_length, Config.n_classes
        ], "predictions are not of the right shape. Expected {}, got {}".format(
            [None, self.max_length, Config.n_classes],
            preds.get_shape().as_list())
        return preds
Exemple #39
0
def conv_slim_capsule(input_tensor,
                      input_dim,
                      output_dim,
                      layer_name,
                      input_atoms=8,
                      output_atoms=8,
                      stride=2,
                      kernel_size=5,
                      padding='SAME',
                      **routing_args):
    """Builds a slim convolutional capsule layer.
  This layer performs 2D convolution given 5D input tensor of shape
  `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines
  the votes with routing and applies Squash non linearity for each capsule.
  Each capsule in this layer is a convolutional unit and shares its kernel over
  the position grid and different capsules of layer below. Therefore, number
  of trainable variables in this layer is:
    kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms]
    bias: [output_dim, output_atoms]
  Output of a conv2d layer is a single capsule with channel number of atoms.
  Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer
  with num_routing=1, input_dim=1 and input_atoms=conv_channels.
  Args:
    input_tensor: tensor, of rank 5. Last two dimmensions representing height
      and width position grid.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    stride: scalar, stride of the convolutional kernel.
    kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size].
    padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels.
    **routing_args: dictionary {leaky, num_routing}, args to be passed to the
      update_routing function.
  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is
      'SAME', out_height = in_height and out_width = in_width. Otherwise, height
      and width is adjusted with same rules as 'VALID' in tf.nn.conv2d.
  """
    with tf.variable_scope(layer_name):
        kernel = variables.weight_variable(shape=[
            kernel_size, kernel_size, input_atoms, output_dim * output_atoms
        ])
        biases = variables.bias_variable([output_dim, output_atoms, 1, 1])
        votes, votes_shape, input_shape = _depthwise_conv3d(
            input_tensor, kernel, input_dim, output_dim, input_atoms,
            output_atoms, stride, padding)

        with tf.name_scope('routing'):
            logit_shape = tf.stack([
                input_shape[0], input_dim, output_dim, votes_shape[2],
                votes_shape[3]
            ])
            biases_replicated = tf.tile(biases,
                                        [1, 1, votes_shape[2], votes_shape[3]])
            activations = _update_routing(votes=votes,
                                          biases=biases_replicated,
                                          logit_shape=logit_shape,
                                          num_dims=6,
                                          input_dim=input_dim,
                                          output_dim=output_dim,
                                          **routing_args)
        return activations
Exemple #40
0
def conv2d_transpose(inputs,
                     num_output_channels,
                     kernel_size,
                     scope,
                     stride=[1, 1],
                     padding='SAME',
                     use_xavier=False,
                     stddev=1e-3,
                     weight_decay=0.0,
                     activation_fn=tf.nn.relu,
                     bn=False,
                     bn_decay=None,
                     is_training=None):
    """ 2D convolution transpose with non-linear operation.

    Args:
      inputs: 4-D tensor variable BxHxWxC
      num_output_channels: int
      kernel_size: a list of 2 ints
      scope: string
      stride: a list of 2 ints
      padding: 'SAME' or 'VALID'
      use_xavier: bool, use xavier_initializer if true
      stddev: float, stddev for truncated_normal init
      weight_decay: float
      activation_fn: function
      bn: bool, whether to use batch norm
      bn_decay: float or float tensor variable in [0,1]
      is_training: bool Tensor variable

    Returns:
      Variable tensor

    Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
    """
    with tf.variable_scope(scope) as sc:
        kernel_h, kernel_w = kernel_size
        num_in_channels = inputs.get_shape()[-1].value
        kernel_shape = [kernel_h, kernel_w,
                        num_output_channels, num_in_channels]  # reversed to conv2d
        kernel = _variable_with_weight_decay('weights',
                                             shape=kernel_shape,
                                             use_xavier=use_xavier,
                                             stddev=stddev,
                                             wd=weight_decay)
        stride_h, stride_w = stride

        # from slim.convolution2d_transpose
        def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
            dim_size *= stride_size

            if padding == 'VALID' and dim_size is not None:
                dim_size += max(kernel_size - stride_size, 0)
            return dim_size

        # caculate output shape
        batch_size = tf.shape(inputs)[0]
        height = tf.shape(inputs)[1]
        width = tf.shape(inputs)[2]
        out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
        out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
        output_shape = tf.stack([batch_size, out_height, out_width, num_output_channels], axis=0)

        outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
                                         [1, stride_h, stride_w, 1],
                                         padding=padding)
        biases = _variable_on_cpu('biases', [num_output_channels],
                                  tf.constant_initializer(0.0))
        outputs = tf.nn.bias_add(outputs, biases)

        if bn:
            # outputs = batch_norm_for_conv2d(outputs, is_training,
            #                                 bn_decay=bn_decay, scope='bn')
            outputs = tf.layers.batch_normalization(outputs, momentum=0.99, epsilon=1e-6, training=is_training)
        if activation_fn is not None:
            # outputs = activation_fn(outputs)
            outputs = tf.nn.leaky_relu(outputs, alpha=0.2)
        return outputs
Exemple #41
0
    def __init__(self, args):
        '''
        모델 초기화
        :param args: 하이퍼 파라미터가 저장된 dict
        '''
        self.is_train = args["is_train"]
        self.batch_size = args["batch_size"]
        self.keep_pob = args["keep_prob"]
        self.dropout_prob = 1.0 - self.keep_pob
        self.learning_rate = args["learning_rate"]

        self.relation_vocab_size = args["relation_vocab_size"]
        self.entity_vocab_size = args["entity_vocab_size"]
        self.entity_type_emb_size = args["entity_type_emb_size"]
        self.char_vocab_size = args["char_vocab_size"]
        self.char_emb_size = args["char_emb_size"]

        self.max_sentences = args["max_sentences"]
        self.word_maxlen = args["word_maxlen"]
        self.word_emb_table = args["embedding_table"]
        self.word_emb_size = args["word_emb_size"]

        self.filter_size = args["filter_size"]
        self.num_filter = args["num_filter"]

        self.max_entities = args["max_entities"]
        self.entity_max_tokens = args["entity_max_tokens"]
        self.entity_max_chars = args["entity_max_chars"]

        # 인코더, 디코더 파라미터
        self.encoder_stack = args["encoder_stack"]
        self.encoder_max_step = args["encoder_max_step"]
        self.encoder_hidden = args["encoder_hidden"]
        self.decoder_hidden = args["decoder_hidden"]

        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype='int32',
            initializer=tf.constant_initializer(0),
            trainable=False)

        # 모델 입력단 초기화
        self._placeholder_init()

        # 모델과 함께 학습하며 finetune되는 단어 임베딩 테이블
        finetune_table = tf.get_variable(
            name="word_embedding_table_finetuning",
            initializer=self.word_emb_table,
            trainable=True,
            dtype=tf.float32)

        # 사전 학습 값 그대로 사용할 고정 단어 임베딩 테이블
        fix_table = tf.get_variable(name="word_embedding_table_fix",
                                    initializer=self.word_emb_table,
                                    trainable=False,
                                    dtype=tf.float32)
        # 임의 초기화 문자 임베딩 테이블
        char_emb_table = tf.get_variable(
            "char_emb_table",
            shape=[self.char_vocab_size, self.char_emb_size],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        # 임의 초기화 개체 타입 임베딩 테이블
        entity_type_emb_table = tf.get_variable(
            "entity_type_emb_table",
            shape=[self.entity_vocab_size, self.entity_type_emb_size],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        # 문장 인덱스 one-hot 임베딩 테이블
        sentence_id_emb_table = tf.eye(num_rows=self.max_sentences)

        # 문장 단어 임베딩
        context_embedding = self._context_embedding_layer(
            fix_table=fix_table,
            finetune_table=finetune_table,
            char_emb_table=char_emb_table)
        # 문장 개체 임베딩
        entity_type_embedding = tf.nn.embedding_lookup(
            entity_type_emb_table, self.context_entity_type)
        # 문장 인덱스 임베딩
        sentence_id_embedding = tf.nn.embedding_lookup(sentence_id_emb_table,
                                                       self.sentence_id)

        # entity token, character, type, position, sentence_id embedding
        entity_embedding = self._entity_pool_embedding(
            fix_table=fix_table,
            finetune_table=finetune_table,
            char_emb_table=char_emb_table,
            token_entities=self.entity_pool,
            char_entities=self.char_entity_pool)

        # 문장에 있는 개체의 임베딩 가져오는 부분
        context_entity_emb = []
        unstack_entity_pool = tf.unstack(entity_embedding, axis=0)
        unstack_context_entity_id = tf.unstack(self.context_entity_id, axis=0)
        for entity_pool, context in zip(unstack_entity_pool,
                                        unstack_context_entity_id):
            context_entity_emb.append(
                tf.nn.embedding_lookup(entity_pool, context))

        context_entity_emb = tf.stack(context_entity_emb, axis=0)

        # context token, character, entity_type, sentence_id embedding
        context_embedding = tf.concat([
            context_embedding, entity_type_embedding, sentence_id_embedding,
            context_entity_emb
        ],
                                      axis=-1)

        # 개체 임베딩, 개체 문장 인덱스 임베딩
        entity_pool_type_emb = tf.nn.embedding_lookup(entity_type_emb_table,
                                                      self.entity_pool_type)
        entity_pool_sent_emb = tf.nn.embedding_lookup(sentence_id_emb_table,
                                                      self.entity_sent_id)

        entity_pool_emb = tf.concat(
            [entity_embedding, entity_pool_type_emb, entity_pool_sent_emb],
            axis=-1)

        # 관계 없는 개체가 포인팅하게 할 none 벡터
        none_emb = tf.get_variable(name="none_emb",
                                   shape=[self.decoder_hidden],
                                   initializer=tf.zeros_initializer)
        pad_emb = tf.get_variable(name="pad_emb",
                                  shape=[self.decoder_hidden],
                                  initializer=tf.zeros_initializer)

        pad_token = tf.expand_dims(tf.stack([pad_emb] * self.batch_size, 0),
                                   axis=1,
                                   name="pad_token")
        none_token = tf.expand_dims(tf.stack([none_emb] * self.batch_size, 0),
                                    axis=1,
                                    name="none_token")

        # 문장 인코딩
        encoder_output, encoder_state = self._biGRU_encoding_layer(
            encoder_input=context_embedding,
            encoder_length=self.context_input_length,
            name="encoder_layer")

        # 개체 인코딩 및 문장 개체 간 주의 집중
        pointing_mem, decoder_state = self._entity_encoding_layer(
            entity_pool_emb, encoder_output, encoder_state)

        # 디코더에서 포인팅 할 타겟
        self.pointing_target = tf.concat([pad_token, none_token, pointing_mem],
                                         axis=1)
        # 디코더 입력
        decoder_input = tf.concat([entity_pool_emb, pointing_mem], axis=-1)

        # 디코더 레이어 및 train op
        self._dual_pointer_decoder(decoder_input=decoder_input,
                                   decoder_init_state=decoder_state,
                                   decoder_hidden=self.decoder_hidden,
                                   pointing_memory=self.pointing_target)
Exemple #42
0
def learn(env,
          benchmark_env,
          q_func,
          replay_memory,
          optimizer,
          exploration=LinearSchedule(1000000, 0.1),
          max_timesteps=50000000,
          batch_size=32,
          learning_starts=50000,
          learning_freq=4,
          target_update_freq=10000,
          grad_clip=None,
          log_every_n_steps=100000,
          mov_avg_size=300,
    ):

    assert (learning_starts % target_update_freq) == 0
    assert type(env.observation_space) == gym.spaces.Box
    assert type(env.action_space)      == gym.spaces.Discrete

    input_shape = (replay_memory.history_len, *env.observation_space.shape)
    n_actions = env.action_space.n
    benchmark_env = HistoryWrapper(benchmark_env, replay_memory.history_len)

    # build model
    session = get_session()

    obs_t_ph  = tf.placeholder(env.observation_space.dtype, [None] + list(input_shape))
    act_t_ph  = tf.placeholder(tf.int32,   [None])
    return_ph = tf.placeholder(tf.float32, [None])

    qvalues, rnn_state_tf = q_func(obs_t_ph, n_actions, scope='q_func')
    greedy_action = tf.argmax(qvalues, axis=1)

    action_indices = tf.stack([tf.range(tf.size(act_t_ph)), act_t_ph], axis=-1)
    onpolicy_qvalues = tf.gather_nd(qvalues, action_indices)

    td_error = return_ph - onpolicy_qvalues
    total_error = tf.reduce_mean(tf.square(td_error))

    # compute and clip gradients
    grads_and_vars = optimizer.compute_gradients(total_error, var_list=tf.trainable_variables(scope='q_func'))
    if grad_clip is not None:
        grads_and_vars = [(tf.clip_by_value(g, -grad_clip, +grad_clip), v) for g, v in grads_and_vars]
    train_op = optimizer.apply_gradients(grads_and_vars)

    def refresh(states, actions):
        onpolicy_qvals, greedy = session.run([onpolicy_qvalues, greedy_action], feed_dict={
            obs_t_ph: states,
            act_t_ph: actions,
        })
        mask = (actions == greedy)
        return onpolicy_qvals, mask

    replay_memory.register_refresh_func(refresh)

    # initialize variables
    session.run(tf.global_variables_initializer())

    def epsilon_greedy(obs, rnn_state, epsilon):
        if random.random() < epsilon:
            action = env.action_space.sample()
        else:
            action = session.run(greedy_action, feed_dict={obs_t_ph: obs[None]})[0]
        return action, None

    def epsilon_greedy_rnn(obs, rnn_state, epsilon):
        feed_dict = {obs_t_ph: obs[None]}
        if rnn_state is not None:
            feed_dict[q_func.rnn_state] = rnn_state

        if random.random() < epsilon:
            action = env.action_space.sample()
            rnn_state = session.run(rnn_state_tf, feed_dict)
        else:
            action, rnn_state = session.run([greedy_action, rnn_state_tf], feed_dict)
            action = action[0]

        return action, rnn_state

    best_mean_reward = -float('inf')
    obs = env.reset()
    rnn_state = None
    n_epochs = 0
    policy = epsilon_greedy_rnn if q_func.is_recurrent() else epsilon_greedy
    rewards = deque(benchmark(benchmark_env, policy, epsilon=1.0, n_episodes=mov_avg_size), maxlen=mov_avg_size)
    start_time = time.time()

    for t in itertools.count():
        if t % log_every_n_steps == 0:
            print('Epoch', n_epochs)
            print('Timestep', t)
            print('Realtime {:.3f}'.format(time.time() - start_time))

            rewards.extend(get_episode_rewards(env))
            mean_reward = np.mean(rewards)
            std_reward = np.std(rewards)
            best_mean_reward = max(mean_reward, best_mean_reward)

            print('Episodes', len(get_episode_rewards(env)))
            print('Exploration', exploration.value(t))
            print('Mean reward', mean_reward)
            print('Best mean reward', best_mean_reward)
            print('Standard dev', std_reward)
            print(flush=True)

            n_epochs += 1

        if t >= max_timesteps:
            break

        replay_memory.store_frame(obs)
        obs = replay_memory.encode_recent_observation()

        action, rnn_state = policy(obs, rnn_state, epsilon=exploration.value(t))
        obs, reward, done, _ = env.step(action)

        replay_memory.store_effect(action, reward, done)

        if done:
            obs = env.reset()
            rnn_state = None

        if t >= learning_starts:
            if t % target_update_freq == 0:
                replay_memory.refresh()

            if t % learning_freq == 0:
                obs_batch, act_batch, ret_batch = replay_memory.sample(batch_size)

                session.run(train_op, feed_dict={
                    obs_t_ph:  obs_batch,
                    act_t_ph:  act_batch,
                    return_ph: ret_batch,
                })
Exemple #43
0
# Packing sequences

n_steps = 2
n_inputs = 3
n_neurons = 5

reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell,
                                                X_seqs,
                                                dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

init = tf.global_variables_initializer()

X_batch = np.array([
    # t = 0      t = 1
    [[0, 1, 2], [9, 8, 7]],  # instance 1
    [[3, 4, 5], [0, 0, 0]],  # instance 2
    [[6, 7, 8], [6, 5, 4]],  # instance 3
    [[9, 0, 1], [3, 2, 1]],  # instance 4
])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})
Exemple #44
0
    def build_loss_and_gradients(self, var_list):
        """Build loss function

    .. math::
      \\text{KL}( p(z \mid x) \| q(z) )
      = \mathbb{E}_{p(z \mid x)} [ \log p(z \mid x) - \log q(z; \lambda) ]

    and stochastic gradients based on importance sampling.

    The loss function can be estimated as

    .. math::
      \\frac{1}{S} \sum_{s=1}^S [
        w_{\\text{norm}}(z^s; \lambda) (\log p(x, z^s) - \log q(z^s; \lambda) ],

    where for :math:`z^s \sim q(z; \lambda)`,

    .. math::

      w_{\\text{norm}}(z^s; \lambda) =
          w(z^s; \lambda) / \sum_{s=1}^S w(z^s; \lambda)

    normalizes the importance weights, :math:`w(z^s; \lambda) = p(x,
    z^s) / q(z^s; \lambda)`.

    This provides a gradient,

    .. math::
      - \\frac{1}{S} \sum_{s=1}^S [
        w_{\\text{norm}}(z^s; \lambda) \\nabla_{\lambda} \log q(z^s; \lambda) ].
    """
        p_log_prob = [0.0] * self.n_samples
        q_log_prob = [0.0] * self.n_samples
        for s in range(self.n_samples):
            # Form dictionary in order to replace conditioning on prior or
            # observed variable with conditioning on a specific value.
            scope = 'inference_' + str(id(self)) + '/' + str(s)
            dict_swap = {}
            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable):
                    if isinstance(qx, RandomVariable):
                        qx_copy = copy(qx, scope=scope)
                        dict_swap[x] = qx_copy.value()
                    else:
                        dict_swap[x] = qx

            for z, qz in six.iteritems(self.latent_vars):
                # Copy q(z) to obtain new set of posterior samples.
                qz_copy = copy(qz, scope=scope)
                dict_swap[z] = qz_copy.value()
                q_log_prob[s] += tf.reduce_sum(
                    qz_copy.log_prob(tf.stop_gradient(dict_swap[z])))

            for z in six.iterkeys(self.latent_vars):
                z_copy = copy(z, dict_swap, scope=scope)
                p_log_prob[s] += tf.reduce_sum(z_copy.log_prob(dict_swap[z]))

            for x in six.iterkeys(self.data):
                if isinstance(x, RandomVariable):
                    x_copy = copy(x, dict_swap, scope=scope)
                    p_log_prob[s] += tf.reduce_sum(
                        x_copy.log_prob(dict_swap[x]))

        p_log_prob = tf.stack(p_log_prob)
        q_log_prob = tf.stack(q_log_prob)

        if self.logging:
            summary_key = 'summaries_' + str(id(self))
            tf.summary.scalar("loss/p_log_prob",
                              tf.reduce_mean(p_log_prob),
                              collections=[summary_key])
            tf.summary.scalar("loss/q_log_prob",
                              tf.reduce_mean(q_log_prob),
                              collections=[summary_key])

        log_w = p_log_prob - q_log_prob
        log_w_norm = log_w - tf.reduce_logsumexp(log_w)
        w_norm = tf.exp(log_w_norm)

        loss = tf.reduce_mean(w_norm * log_w)
        grads = tf.gradients(
            -tf.reduce_mean(q_log_prob * tf.stop_gradient(w_norm)), var_list)
        grads_and_vars = list(zip(grads, var_list))
        return loss, grads_and_vars
Exemple #45
0
    def _last_token(x: tf.Tensor, sequence_lengths: tf.Tensor) -> tf.Tensor:
        last_sequence_index = tf.maximum(0, sequence_lengths - 1)
        batch_index = tf.range(tf.shape(last_sequence_index)[0])

        indices = tf.stack([batch_index, last_sequence_index], axis=1)
        return tf.gather_nd(x, indices)
Exemple #46
0
    def _dual_pointer_decoder(self, decoder_input, decoder_init_state,
                              decoder_hidden, pointing_memory):
        '''
        듀얼 포인터 네트워크 디코더 및 train operate layer
        :param decoder_input: 디코더 입력
        :param decoder_init_state: 디코더 초기 상태 값, 인코더 최종 state 사용
        :param decoder_hidden: 디코더 은닉층 사이즈
        :param pointing_memory: 디코더에서 포인팅 할 타겟
        :return:
        '''
        with tf.variable_scope("decoder_v3"):
            init_state = decoder_init_state

            with tf.variable_scope("object_cell_define"):
                object_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    decoder_hidden, dropout_keep_prob=self.keep_pob)
                object_cell_pre_state = init_state

            with tf.variable_scope("subject_cell_define"):
                subject_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    decoder_hidden, dropout_keep_prob=self.keep_pob)
                subject_cell_pre_state = init_state

            with tf.variable_scope("decoder_input_layer"):
                decoder_input_per_step = tf.unstack(decoder_input, axis=1)

            with tf.variable_scope("decoding_triple", reuse=tf.AUTO_REUSE):
                # 듀얼 포인팅 부분
                object_logits = []
                relation_logits = []
                subject_logits = []
                rev_relation_logits = []

                for i in range(self.max_entities):
                    input = decoder_input_per_step[i]
                    object_deocder_output, object_state = object_decoder_cell(
                        input, object_cell_pre_state)
                    subject_decoder_output, subject_state = subject_decoder_cell(
                        input, subject_cell_pre_state)

                    object_deocder_output = tf.expand_dims(
                        object_deocder_output, axis=1)
                    subject_decoder_output = tf.expand_dims(
                        subject_decoder_output, axis=1)

                    # 포인팅은 multi-head attention 기반으로 수행
                    relation_output, object_pointing = self._multi_head_attention(
                        key=pointing_memory,
                        query=object_deocder_output,
                        value=pointing_memory,
                        attention_name="object_pointing")
                    rev_output, subject_pointing = self._multi_head_attention(
                        key=pointing_memory,
                        query=subject_decoder_output,
                        value=pointing_memory,
                        attention_name="subject_pointing")
                    object_pointing = tf.squeeze(object_pointing, axis=1)
                    subject_pointing = tf.squeeze(subject_pointing, axis=1)
                    relation_output = tf.squeeze(relation_output, axis=1)
                    rev_output = tf.squeeze(rev_output, axis=1)

                    relation_logit = tf.layers.dense(
                        relation_output,
                        units=self.relation_vocab_size,
                        activation=tf.nn.leaky_relu,
                        name="relation_label")
                    rev_relation_logit = tf.layers.dense(
                        rev_output,
                        units=self.relation_vocab_size,
                        activation=tf.nn.leaky_relu,
                        name="rev_relation_label")
                    object_logits.append(object_pointing)
                    relation_logits.append(relation_logit)
                    subject_logits.append(subject_pointing)
                    rev_relation_logits.append(rev_relation_logit)

                    object_cell_pre_state = object_state
                    subject_cell_pre_state = subject_state

                object_logits = tf.stack(object_logits, axis=1)
                relation_logits = tf.stack(relation_logits, axis=1)
                subject_logits = tf.stack(subject_logits, axis=1)
                rev_relation_logits = tf.stack(rev_relation_logits, axis=1)

                self.object_predicts = tf.argmax(object_logits, axis=-1)
                self.relation_predicts = tf.argmax(relation_logits, axis=-1)
                self.subject_predicts = tf.argmax(subject_logits, axis=-1)
                self.rev_relation_predicts = tf.argmax(rev_relation_logits,
                                                       axis=-1)

            with tf.variable_scope("training_layer"):
                # train operate 부분
                self.object_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=object_logits,
                    labels=self.object_target,
                    weights=self.relation_weight)
                self.re_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=relation_logits,
                    labels=self.relation_target,
                    weights=self.relation_weight)
                self.subject_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=subject_logits,
                    labels=self.subject_target,
                    weights=self.rev_relation_weight)
                self.rev_re_loss = tf.losses.sparse_softmax_cross_entropy(
                    logits=rev_relation_logits,
                    labels=self.rev_relation_target,
                    weights=self.rev_relation_weight)

                self.object_loss = tf.reduce_mean(self.object_loss)
                self.re_loss = tf.reduce_mean(self.re_loss)
                self.subject_loss = tf.reduce_mean(self.subject_loss)
                self.rev_re_loss = tf.reduce_mean(self.rev_re_loss)

                self.loss = (0.4 *
                             self.object_loss) + (0.4 * self.subject_loss) + (
                                 0.1 * self.re_loss) + (0.1 * self.rev_re_loss)

                # Adam optimizer 및 EMA 사용, 학습 parameter tuning
                _optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate)
                self._gradients = _optimizer.compute_gradients(self.loss)
                # for g in self._gradients:
                #     print(g)
                _apply_op = _optimizer.apply_gradients(
                    self._gradients, global_step=self.global_step)
                _ema = tf.train.ExponentialMovingAverage(decay=0.9999)

                with tf.control_dependencies([_apply_op]):
                    _ema_op = _ema.apply(
                        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
                    self.train_op = tf.group(_ema_op)

                self.saver = tf.train.Saver(tf.global_variables(),
                                            max_to_keep=10)
Exemple #47
0
 def call(self, dec_output, final_output, attention_weights, encoder_input, 
          inp_shape, tar_shape, batch, training):
   
   # tf.debugging.assert_non_negative(dec_output, 
   #                                  message='negative_values_in_dec_output'
   #                                  )
   # tf.debugging.assert_greater(dec_output, 
   #                             tf.cast([0], dtype=tf.float32), 
   #                             message = 'zeros_in_dec_output') 
   # might contain negative values
   #dec_output = tf.math.abs(dec_output)
   #adding a small value to dec_output to maintain numerical stability
   #dec_output = dec_output+0.0001
   # p_gen (batch_size, tar_seq_len, 1)
   batch = tf.shape(encoder_input)[0]
   p_gen = self.generator_vec(dec_output)
   #p_gen += 0.0001
   #p_gen = tf.math.abs(p_gen)
   tf.debugging.check_numerics(final_output,
                               "Nan's in the final_output"
                               )                          
   vocab_dist_ = tf.math.softmax(final_output, axis=-1)
   # vocab_dist (batch_size, tar_seq_len, target_vocab_size)
   vocab_dist = p_gen * vocab_dist_
   tf.debugging.assert_non_negative(p_gen, 
                                    message='negative_values_in_p_gen')
   # catches both zero and negative should be caught above
   tf.debugging.assert_greater(p_gen, 
                               tf.cast([0], dtype=tf.float32), 
                               message = 'zeros_in_p_gen') 
   tf.debugging.assert_non_negative(vocab_dist_, 
                                    message='negative_values_in_vocab_dist_')
   tf.debugging.assert_greater(vocab_dist_, 
                               tf.cast([0], dtype=tf.float32), 
                               message = 'zeros_in_vocab_dist_')  
   # attention_dist (batch_size, tar_seq_len, inp_seq_len)
   # attention_weights is 4D so taking mean of the second dimension(i.e num_heads)
   attention_weights_ = tf.reduce_mean(attention_weights, axis=1)
   attention_dist = tf.math.softmax(attention_weights_, axis=-1)
   tf.debugging.check_numerics(attention_weights,
                               "Nan's in the attention_weights"
                               )                          
   
   # updates (batch_size, tar_seq_len, inp_seq_len)
   updates = (1 - p_gen) * attention_dist
   shape = tf.shape(final_output)
   
   # represent the tokens indices in 3D using meshgrid and tile
   # https://stackoverflow.com/questions/45162998/proper-usage-of-tf-scatter-nd-in-tensorflow-r1-2
   
   i1, i2 = tf.meshgrid(tf.range(batch), tf.range(tar_shape), indexing="ij")
   i1 = tf.tile(i1[:, :, tf.newaxis], [1, 1, inp_shape])
   i2 = tf.tile(i2[:, :, tf.newaxis], [1, 1, inp_shape])
   # convert to int32 since they are compatible with scatter_nd
   indices_ = tf.cast(encoder_input, dtype=tf.int32)
   #tile on tar_seq_len so that the input vocab can be copied to op
   indices_x = tf.tile(indices_[:, tf.newaxis,: ], [1, tar_shape, 1])
   indices = tf.stack([i1, i2, indices_x], axis=-1)
   # copy_probs (batch_size, tar_seq_len, target_vocab_size)
   copy_probs = tf.scatter_nd(indices, updates, shape)   
   #assert copy_probs.shape[1] == tar_shape, 'shape mismatch with the tensors in Generator'
   combined_probs = vocab_dist + copy_probs
   combined_probs += 0.001          
   combined_logits = tf.math.log(combined_probs)
   tf.debugging.check_numerics(
                               combined_logits,
                               "Nan's in the combined_logits"
                               )
   return combined_logits
Exemple #48
0
with tf.name_scope('inputs') as scope:
    Inp0 = tf.placeholder(tf.int32, [None, None], name='sequence_factors1')
    Inp1 = tf.placeholder(tf.float32, [None, 4, None],
                          name='sequence_factors2')
    labels = tf.placeholder(tf.float32, [None, 1], name='labels')
    dropout = tf.placeholder(tf.float32, name='dropout')

with tf.name_scope('embedding') as scope:
    aa_embeddings = tf.get_variable('aa_embeddings', [20, 5])
    embedded_word_ids = tf.gather(aa_embeddings, range(0, 20))
    embed0 = tf.nn.embedding_lookup(aa_embeddings, Inp0, name='lookup')
    embed1 = tf.transpose(embed0, (0, 2, 1))
    unstack0 = tf.unstack(Inp1, axis=-2, name='unstack0')
    unstack1 = tf.unstack(embed1, axis=-2, name='unstack1')
    layer0 = tf.stack([tf.stack(unstack0 + unstack1, axis=1)],
                      -1,
                      name='stack')

with tf.name_scope('layer1') as scope:
    layer1_norm = batch_normalization(layer0, 'BN_layer0')
    layer1 = tf.layers.conv2d(layer1_norm,
                              32, (4, 4),
                              padding='same',
                              activation=tf.nn.relu)
    layer1_DO = tf.layers.dropout(layer1, rate=dropout, name='Drop1')

with tf.name_scope('layer2') as scope:
    layer2_norm = batch_normalization(layer1_DO, 'BN_layer1')
    layer2 = tf.layers.conv2d(layer2_norm,
                              64, (4, 4),
                              padding='same',
Exemple #49
0
def complex2real(x, axis=-1):
    return tf.stack((tf.math.real(x), tf.math.imag(x)), axis=axis)
def ioi_model(input_x, input_x_mask, input_y, input_y_mask, word_emb, keep_rate, conf):
    if True:
        embed_dim = 200
        max_turn = conf["max_turn_num"]
        max_word_len = conf["max_turn_len"]
        max_word_len_a = input_y.shape[1]
        num_layer = 7

        context = input_x
        context_mask = tf.to_float(input_x_mask)
        response = input_y
        response_mask = tf.to_float(input_y_mask)

        expand_response_mask = tf.tile(tf.expand_dims(response_mask, 1), [1, max_turn, 1]) 
        expand_response_mask = tf.reshape(expand_response_mask, [-1, max_word_len_a])  
        parall_context_mask = tf.reshape(context_mask, [-1, max_word_len])  


        context_embeddings = tf.nn.embedding_lookup(word_emb, context)  
        response_embeddings = tf.nn.embedding_lookup(word_emb, response)  
        context_embeddings = tf.layers.dropout(context_embeddings, rate=1.0-keep_rate)
        response_embeddings = tf.layers.dropout(response_embeddings, rate=1.0-keep_rate)
        context_embeddings = tf.multiply(context_embeddings, tf.expand_dims(context_mask, axis=-1))  
        response_embeddings = tf.multiply(response_embeddings, tf.expand_dims(response_mask, axis=-1)) 


        expand_response_embeddings = tf.tile(tf.expand_dims(response_embeddings, 1), [1, max_turn, 1, 1]) 
        expand_response_embeddings = tf.reshape(expand_response_embeddings, [-1, max_word_len_a, embed_dim]) 
        parall_context_embeddings = tf.reshape(context_embeddings, [-1, max_word_len, embed_dim])
        context_rep, response_rep = parall_context_embeddings, expand_response_embeddings

        losses_list = []
        y_pred_list = []
        logits_list=[]
        fea_list = []
        for k in range(num_layer):
            inter_feat_collection = []
            with tf.variable_scope('dense_interaction_{}'.format(k)): 
                # get the self rep
                context_self_rep = self_attention(context_rep, context_rep, embed_dim, 
                                                    query_masks=parall_context_mask, 
                                                    key_masks=parall_context_mask, 
                                                    num_blocks=1, num_heads=1, 
                                                    dropout_rate=1.0-keep_rate,
                                                    use_residual=True, use_feed=True, 
                                                    scope='context_self_attention')[1]  # [batch*turn, len_utt, embed_dim, 2]
                response_self_rep = self_attention(response_rep, response_rep, embed_dim, 
                                                    query_masks=expand_response_mask, 
                                                    key_masks=expand_response_mask, 
                                                    num_blocks=1, num_heads=1, 
                                                    dropout_rate=1.0-keep_rate, 
                                                    use_residual=True, use_feed=True, 
                                                    scope='response_self_attention')[1]  # [batch*turn, len_res, embed_dims, 2]

                # get the attended rep
                context_cross_rep = self_attention(context_rep, response_rep, embed_dim, 
                                                    query_masks=parall_context_mask, 
                                                    key_masks=expand_response_mask, 
                                                    num_blocks=1, num_heads=1, 
                                                    dropout_rate=1.0-keep_rate, 
                                                    use_residual=True, use_feed=True, 
                                                    scope='context_cross_attention')[1]  # [batch*turn, len_utt, embed_dim]

                response_cross_rep = self_attention(response_rep, context_rep, embed_dim, 
                                                    query_masks=expand_response_mask, 
                                                    key_masks=parall_context_mask, 
                                                    num_blocks=1, num_heads=1, 
                                                    dropout_rate=1.0-keep_rate, 
                                                    use_residual=True, use_feed=True, 
                                                    scope='response_cross_attention')[1]  # [batch*turn, len_res, embed_dim]


                context_inter_feat_multi = tf.multiply(context_rep, context_cross_rep)
                response_inter_feat_multi = tf.multiply(response_rep, response_cross_rep)


                context_concat_rep = tf.concat([context_rep, context_self_rep, context_cross_rep, context_inter_feat_multi], axis=-1) 
                response_concat_rep = tf.concat([response_rep, response_self_rep, response_cross_rep, response_inter_feat_multi], axis=-1)


                context_concat_dense_rep = tf.layers.dense(context_concat_rep, embed_dim, activation=tf.nn.relu, use_bias=True, 
                                                                    name='context_dense1') 
                context_concat_dense_rep = tf.layers.dropout(context_concat_dense_rep, rate=1.0-keep_rate)

                response_concat_dense_rep = tf.layers.dense(response_concat_rep, embed_dim,  activation=tf.nn.relu, use_bias=True, 
                                                                    name='response_dense1') 
                response_concat_dense_rep = tf.layers.dropout(response_concat_dense_rep, rate=1.0-keep_rate)

              
                inter_feat = tf.matmul(context_rep, tf.transpose(response_rep, perm=[0, 2, 1])) / tf.sqrt(tf.to_float(embed_dim))
                inter_feat_self = tf.matmul(context_self_rep, tf.transpose(response_self_rep, perm=[0, 2, 1])) / tf.sqrt(tf.to_float(embed_dim))
                inter_feat_cross = tf.matmul(context_cross_rep, tf.transpose(response_cross_rep, perm=[0, 2, 1])) / tf.sqrt(tf.to_float(embed_dim))


                inter_feat_collection.append(inter_feat)
                inter_feat_collection.append(inter_feat_self)
                inter_feat_collection.append(inter_feat_cross)

                if k==0:
                    context_rep = tf.add(context_rep, context_concat_dense_rep)
                    response_rep = tf.add(response_rep, response_concat_dense_rep)
                else:
                    context_rep = tf.add_n([parall_context_embeddings, context_rep, context_concat_dense_rep])
                    response_rep = tf.add_n([expand_response_embeddings, response_rep, response_concat_dense_rep])

                context_rep = normalize(context_rep, scope='layer_context_normalize') 
                response_rep = normalize(response_rep, scope='layer_response_normalize') 

                context_rep = tf.multiply(context_rep, tf.expand_dims(parall_context_mask, axis=-1))
                response_rep = tf.multiply(response_rep, tf.expand_dims(expand_response_mask, axis=-1))

                matching_feat = tf.stack(inter_feat_collection, axis=-1)
                #matrix_trans = tf.reshape(matching_feat, [-1, max_turn, max_word_len, max_word_len, len(inter_feat_collection)])  # embed_dim

            with tf.variable_scope('CRNN_{}'.format(k)): 
                conv1 = tf.layers.conv2d(matching_feat, filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                                            activation=tf.nn.relu, name='conv1')
                pool1 = tf.layers.max_pooling2d(conv1, (3, 3), strides=(3, 3), padding='same', name='max_pooling1')
                
                conv2 = tf.layers.conv2d(pool1, filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same',
                                            activation=tf.nn.relu, name='conv2')
                pool2 = tf.layers.max_pooling2d(conv2, (3, 3), strides=(3, 3), padding='same', name='max_pooling2')                    
                flatten = tf.contrib.layers.flatten(pool2)
                flatten = tf.layers.dropout(flatten, rate=1.0-keep_rate)

                matching_vector = tf.layers.dense(flatten, embed_dim,
                                      kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                      activation=tf.tanh, name='dense_feat') 
                matching_vector = tf.reshape(matching_vector, [-1, max_turn, embed_dim]) 

                final_gru_cell = tf.contrib.rnn.GRUCell(embed_dim, kernel_initializer=tf.orthogonal_initializer())
                _, last_hidden = tf.nn.dynamic_rnn(final_gru_cell, matching_vector, dtype=tf.float32, scope='final_GRU')  # TODO: check time_major
                fea_list.append(last_hidden)
                #logits = tf.layers.dense(last_hidden, 2, kernel_initializer=tf.contrib.layers.xavier_initializer(), name='final_v')


        last_hidden = tf.concat(fea_list, axis=-1)
        tf.layers.dense(last_hidden, 50, kernel_initializer=tf.contrib.layers.xavier_initializer(), name='final_v')
        return last_hidden, fea_list

        if True:
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=logits)
            loss = tf.reduce_mean(tf.clip_by_value(loss, -FLAGS.clip_value, FLAGS.clip_value))
            y_pred = tf.nn.softmax(logits)
            
            losses_list.append(loss) 
            y_pred_list.append(y_pred) 
            logits_list.append(logits)

        if FLAGS.use_loss_decay:
            loss =sum([((idx+1)/float(FLAGS.num_layer))*item for idx, item in enumerate(losses_list)])
        else:
            loss = sum(losses_list)
        loss_list = losses_list

        y_pred = sum(y_pred_list)

        if FLAGS.use_globalLoss:
            logits_sum = tf.add_n(logits_list)
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=logits_sum)
            loss = tf.reduce_mean(tf.clip_by_value(loss, -FLAGS.clip_value, FLAGS.clip_value))
            loss_list = [loss]
            y_pred = tf.nn.softmax(logits_sum) 


        correct = tf.equal(tf.cast(tf.argmax(y_pred, axis=1), tf.int32), tf.to_int32(target))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
Exemple #51
0
def position_sensitive_crop_regions(image,
                                    boxes,
                                    crop_size,
                                    num_spatial_bins,
                                    global_pool):
  """Position-sensitive crop and pool rectangular regions from a feature grid.

  The output crops are split into `spatial_bins_y` vertical bins
  and `spatial_bins_x` horizontal bins. For each intersection of a vertical
  and a horizontal bin the output values are gathered by performing
  `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
  channels of the image. This reduces `depth` by a factor of
  `(spatial_bins_y * spatial_bins_x)`.

  When global_pool is True, this function implements a differentiable version
  of position-sensitive RoI pooling used in
  [R-FCN detection system](https://arxiv.org/abs/1605.06409).

  When global_pool is False, this function implements a differentiable version
  of position-sensitive assembling operation used in
  [instance FCN](https://arxiv.org/abs/1603.08678).

  Args:
    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 3-D tensor of shape `[image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: A list of two integers `[crop_height, crop_width]`. All
      cropped image patches are resized to this size. The aspect ratio of the
      image content is not preserved. Both `crop_height` and `crop_width` need
      to be positive.
    num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
      Represents the number of position-sensitive bins in y and x directions.
      Both values should be >= 1. `crop_height` should be divisible by
      `spatial_bins_y`, and similarly for width.
      The number of image channels should be divisible by
      (spatial_bins_y * spatial_bins_x).
      Suggested value from R-FCN paper: [3, 3].
    global_pool: A boolean variable.
      If True, we perform average global pooling on the features assembled from
        the position-sensitive score maps.
      If False, we keep the position-pooled features without global pooling
        over the spatial coordinates.
      Note that using global_pool=True is equivalent to but more efficient than
        running the function with global_pool=False and then performing global
        average pooling.

  Returns:
    position_sensitive_features: A 4-D tensor of shape
      `[num_boxes, K, K, crop_channels]`,
      where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
      where K = 1 when global_pool is True (Average-pooled cropped regions),
      and K = crop_size when global_pool is False.
  Raises:
    ValueError: Raised in four situations:
      `num_spatial_bins` is not >= 1;
      `num_spatial_bins` does not divide `crop_size`;
      `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
      `bin_crop_size` is not square when global_pool=False due to the
        constraint in function space_to_depth.
  """
  total_bins = 1
  bin_crop_size = []

  for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
    if num_bins < 1:
      raise ValueError('num_spatial_bins should be >= 1')

    if crop_dim % num_bins != 0:
      raise ValueError('crop_size should be divisible by num_spatial_bins')

    total_bins *= num_bins
    bin_crop_size.append(crop_dim // num_bins)

  if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
    raise ValueError('Only support square bin crop size for now.')

  ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
  spatial_bins_y, spatial_bins_x = num_spatial_bins

  # Split each box into spatial_bins_y * spatial_bins_x bins.
  position_sensitive_boxes = []
  for bin_y in range(spatial_bins_y):
    step_y = (ymax - ymin) / spatial_bins_y
    for bin_x in range(spatial_bins_x):
      step_x = (xmax - xmin) / spatial_bins_x
      box_coordinates = [ymin + bin_y * step_y,
                         xmin + bin_x * step_x,
                         ymin + (bin_y + 1) * step_y,
                         xmin + (bin_x + 1) * step_x,
                        ]
      position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))

  image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2)

  image_crops = []
  for (split, box) in zip(image_splits, position_sensitive_boxes):
    if split.shape.is_fully_defined() and box.shape.is_fully_defined():
      crop = tf.squeeze(
          matmul_crop_and_resize(
              tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0),
              bin_crop_size),
          axis=0)
    else:
      crop = tf.image.crop_and_resize(
          tf.expand_dims(split, 0), box,
          tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size)
    image_crops.append(crop)

  if global_pool:
    # Average over all bins.
    position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
    # Then average over spatial positions within the bins.
    position_sensitive_features = tf.reduce_mean(
        position_sensitive_features, [1, 2], keep_dims=True)
  else:
    # Reorder height/width to depth channel.
    block_size = bin_crop_size[0]
    if block_size >= 2:
      image_crops = [tf.space_to_depth(
          crop, block_size=block_size) for crop in image_crops]

    # Pack image_crops so that first dimension is for position-senstive boxes.
    position_sensitive_features = tf.stack(image_crops, axis=0)

    # Unroll the position-sensitive boxes to spatial positions.
    position_sensitive_features = tf.squeeze(
        tf.batch_to_space_nd(position_sensitive_features,
                             block_shape=[1] + num_spatial_bins,
                             crops=tf.zeros((3, 2), dtype=tf.int32)),
        squeeze_dims=[0])

    # Reorder back the depth channel.
    if block_size >= 2:
      position_sensitive_features = tf.depth_to_space(
          position_sensitive_features, block_size=block_size)

  return position_sensitive_features
Exemple #52
0
    def _build_network(self, layers):
        network = tf.transpose(self.input_tensor, [0, 2, 3, 1])
        # [batch, assets, window, features]
        network = network / network[:, :, -1, 0, None, None]
        for layer_number, layer in enumerate(layers):
            if layer["type"] == "DenseLayer":
                network = tflearn.layers.core.fully_connected(network,
                                                              int(layer["neuron_number"]),
                                                              layer["activation_function"],
                                                              regularizer=layer["regularizer"],
                                                              weight_decay=layer["weight_decay"] )
            elif layer["type"] == "DropOut":
                network = tflearn.layers.core.dropout(network, layer["keep_probability"])
            elif layer["type"] == "EIIE_Dense":
                width = network.get_shape()[2]
                network = tflearn.layers.conv_2d(network, int(layer["filter_number"]),
                                                 [1, width],
                                                 [1, 1],
                                                 "valid",
                                                 layer["activation_function"],
                                                 regularizer=layer["regularizer"],
                                                 weight_decay=layer["weight_decay"])
            elif layer["type"] == "ConvLayer":
                network = tflearn.layers.conv_2d(network, int(layer["filter_number"]),
                                                 allint(layer["filter_shape"]),
                                                 allint(layer["strides"]),
                                                 layer["padding"],
                                                 layer["activation_function"],
                                                 regularizer=layer["regularizer"],
                                                 weight_decay=layer["weight_decay"])
            elif layer["type"] == "MaxPooling":
                network = tflearn.layers.conv.max_pool_2d(network, layer["strides"])
            elif layer["type"] == "AveragePooling":
                network = tflearn.layers.conv.avg_pool_2d(network, layer["strides"])
            elif layer["type"] == "LocalResponseNormalization":
                network = tflearn.layers.normalization.local_response_normalization(network)
            elif layer["type"] == "EIIE_Output":
                width = network.get_shape()[2]
                network = tflearn.layers.conv_2d(network, 1, [1, width], padding="valid",
                                                 regularizer=layer["regularizer"],
                                                 weight_decay=layer["weight_decay"])
                network = network[:, :, 0, 0]
                btc_bias = tf.ones((self.input_num, 1))
                network = tf.concat([btc_bias, network], 1)
                network = tflearn.layers.core.activation(network, activation="softmax")
            elif layer["type"] == "Output_WithW":
                network = tflearn.flatten(network)
                network = tf.concat([network,self.previous_w], axis=1)
                network = tflearn.fully_connected(network, self._rows+1,
                                                  activation="softmax",
                                                  regularizer=layer["regularizer"],
                                                  weight_decay=layer["weight_decay"])
            elif layer["type"] == "EIIE_Output_WithW":
                width = network.get_shape()[2]
                height = network.get_shape()[1]
                features = network.get_shape()[3]
                network = tf.reshape(network, [self.input_num, int(height), 1, int(width*features)])
                w = tf.reshape(self.previous_w, [-1, int(height), 1, 1])
                network = tf.concat([network, w], axis=3)
                network = tflearn.layers.conv_2d(network, 1, [1, 1], padding="valid",
                                                 regularizer=layer["regularizer"],
                                                 weight_decay=layer["weight_decay"])
                network = network[:, :, 0, 0]
                btc_bias = tf.zeros((self.input_num, 1))
                network = tf.concat([btc_bias, network], 1)
                self.voting = network
                network = tflearn.layers.core.activation(network, activation="softmax")

            elif layer["type"] == "EIIE_LSTM" or\
                            layer["type"] == "EIIE_RNN":
                network = tf.transpose(network, [0, 2, 3, 1])
                resultlist = []
                reuse = False
                for i in range(self._rows):
                    if i > 0:
                        reuse = True
                    if layer["type"] == "EIIE_LSTM":
                        result = tflearn.layers.lstm(network[:, :, :, i],
                                                     int(layer["neuron_number"]),
                                                     dropout=layer["dropouts"],
                                                     scope="lstm"+str(layer_number),
                                                     reuse=reuse)
                    else:
                        result = tflearn.layers.simple_rnn(network[:, :, :, i],
                                                           int(layer["neuron_number"]),
                                                           dropout=layer["dropouts"],
                                                           scope="rnn"+str(layer_number),
                                                           reuse=reuse)
                    resultlist.append(result)
                network = tf.stack(resultlist)
                network = tf.transpose(network, [1, 0, 2])
                network = tf.reshape(network, [-1, self._rows, 1, int(layer["neuron_number"])])
            else:
                raise ValueError("the layer {} not supported.".format(layer["type"]))
        return network
Exemple #53
0
def inference_mem(images,
                  cams,
                  depth_num,
                  depth_start,
                  depth_interval,
                  is_master_gpu=True):
    """ infer depth image from multi-view images and cameras """

    # dynamic gpu params
    depth_end = depth_start + (tf.cast(depth_num, tf.float32) -
                               1) * depth_interval
    feature_c = 32
    feature_h = FLAGS.max_h / 4
    feature_w = FLAGS.max_w / 4

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0],
                                    [-1, 1, -1, -1, 3]),
                           axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]),
                         axis=1)

    # image feature extraction
    if is_master_gpu:
        ref_tower = UniNetDS2({'data': ref_image},
                              is_training=True,
                              reuse=False)
    else:
        ref_tower = UniNetDS2({'data': ref_image},
                              is_training=True,
                              reuse=True)
    ref_feature = ref_tower.get_output()
    ref_feature2 = tf.square(ref_feature)

    view_features = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0],
                                         [-1, 1, -1, -1, -1]),
                                axis=1)
        view_tower = UniNetDS2({'data': view_image},
                               is_training=True,
                               reuse=True)
        view_features.append(view_tower.get_output())
    view_features = tf.stack(view_features, axis=0)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0],
                                       [-1, 1, 2, 4, 4]),
                              axis=1)
        homographies = get_homographies(ref_cam,
                                        view_cam,
                                        depth_num=depth_num,
                                        depth_start=depth_start,
                                        depth_interval=depth_interval)
        view_homographies.append(homographies)
    view_homographies = tf.stack(view_homographies, axis=0)

    # build cost volume by differentialble homography
    with tf.name_scope('cost_volume_homography'):
        depth_costs = []

        for d in range(depth_num):
            # compute cost (standard deviation feature)
            ave_feature = tf.Variable(
                tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]),
                name='ave',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])
            ave_feature2 = tf.Variable(
                tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]),
                name='ave2',
                trainable=False,
                collections=[tf.GraphKeys.LOCAL_VARIABLES])
            ave_feature = tf.assign(ave_feature, ref_feature)
            ave_feature2 = tf.assign(ave_feature2, ref_feature2)

            def body(view, ave_feature, ave_feature2):
                """Loop body."""
                homography = tf.slice(view_homographies[view],
                                      begin=[0, d, 0, 0],
                                      size=[-1, 1, 3, 3])
                homography = tf.squeeze(homography, axis=1)
                warped_view_feature = homography_warping(
                    view_features[view], homography)
                ave_feature = tf.assign_add(ave_feature, warped_view_feature)
                ave_feature2 = tf.assign_add(ave_feature2,
                                             tf.square(warped_view_feature))
                view = tf.add(view, 1)
                return view, ave_feature, ave_feature2

            view = tf.constant(0)
            cond = lambda view, *_: tf.less(view, FLAGS.view_num - 1)
            _, ave_feature, ave_feature2 = tf.while_loop(
                cond,
                body, [view, ave_feature, ave_feature2],
                back_prop=False,
                parallel_iterations=1)

            ave_feature = tf.assign(
                ave_feature,
                tf.square(ave_feature) / (FLAGS.view_num * FLAGS.view_num))
            ave_feature2 = tf.assign(
                ave_feature2, ave_feature2 / FLAGS.view_num - ave_feature)
            depth_costs.append(ave_feature2)
        cost_volume = tf.stack(depth_costs, axis=1)

    # filtered cost volume, size of (B, D, H, W, 1)
    if is_master_gpu:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume},
                                               is_training=True,
                                               reuse=False)
    else:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume},
                                               is_training=True,
                                               reuse=True)
    filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(),
                                      axis=-1)

    # depth map by softArgmin
    with tf.name_scope('soft_arg_min'):
        # probability volume by soft max
        probability_volume = tf.nn.softmax(tf.scalar_mul(
            -1, filtered_cost_volume),
                                           axis=1,
                                           name='prob_volume')

        # depth image by soft argmin
        volume_shape = tf.shape(probability_volume)
        soft_2d = []
        for i in range(FLAGS.batch_size):
            soft_1d = tf.linspace(depth_start[i], depth_end[i],
                                  tf.cast(depth_num, tf.int32))
            soft_2d.append(soft_1d)
        soft_2d = tf.reshape(tf.stack(soft_2d, axis=0),
                             [volume_shape[0], volume_shape[1], 1, 1])
        soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]])
        estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume,
                                            axis=1)
        estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3)

    # probability map
    prob_map = get_propability_map(probability_volume, estimated_depth_map,
                                   depth_start, depth_interval)

    filtered_depth_map = tf.cast(tf.greater_equal(prob_map, 0.8),
                                 dtype='float32') * estimated_depth_map

    return filtered_depth_map, prob_map
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = data.dataset.get_split(FLAGS.dataset_split_name,
                                         FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = get_resnet_func(num_classes=(dataset.num_classes -
                                                  FLAGS.labels_offset),
                                     weight_decay=FLAGS.weight_decay,
                                     is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        # TODO: preprocessing
        def random_crop(input, size):
            return tf.random_crop(input, [size, 5000])

        preprocessing_fn = random_crop

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [ix0, ix1, values, shape] = provider.get(
                ['tweets/ix0', 'tweets/ix1', 'tweets/values', 'tweets/shape'])
            indices = tf.stack([ix0, ix1], axis=1)
            tweet = tf.SparseTensor(indices=indices,
                                    values=values,
                                    dense_shape=shape)
            tweet = sparse_ops.sparse_tensor_to_dense(tweet,
                                                      validate_indices=False)
            [label] = provider.get(['label'])
            label -= FLAGS.labels_offset

            train_tweet_size = FLAGS.train_image_size or network_fn.default_image_size

            tweet = preprocessing_fn(tweet, train_tweet_size)

            tweets, labels = tf.train.batch(
                [tweet, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [tweets, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            tweets, labels = batch_queue.dequeue()
            logits, end_points = network_fn(tweets)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                tf.losses.softmax_cross_entropy(
                    logits=end_points['AuxLogits'],
                    onehot_labels=labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weights=0.4,
                    scope='aux_loss')
            tf.losses.softmax_cross_entropy(
                logits=logits,
                onehot_labels=labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables,
                replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
                total_num_replicas=FLAGS.worker_replicas)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None,
            session_config=tf.ConfigProto(gpu_options=tf.GPUOptions(
                visible_device_list='0')))
 def _predict_mean(self, Fmu, Fvar):
     possible_outputs = [tf.fill(tf.stack([tf.shape(Fmu)[0], 1]), np.array(i, dtype=np.int64)) for i in
                         range(self.num_classes)]
     ps = [self._density(Fmu, Fvar, po) for po in possible_outputs]
     ps = tf.transpose(tf.stack([tf.reshape(p, (-1,)) for p in ps]))
     return ps
Exemple #56
0
def inference(images,
              cams,
              depth_num,
              depth_start,
              depth_interval,
              is_master_gpu=True):
    """ infer depth image from multi-view images and cameras """

    # dynamic gpu params
    depth_end = depth_start + (tf.cast(depth_num, tf.float32) -
                               1) * depth_interval

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0],
                                    [-1, 1, -1, -1, 3]),
                           axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]),
                         axis=1)

    # image feature extraction
    if is_master_gpu:
        ref_tower = UniNetDS2({'data': ref_image},
                              is_training=True,
                              reuse=False)
    else:
        ref_tower = UniNetDS2({'data': ref_image},
                              is_training=True,
                              reuse=True)
    view_towers = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0],
                                         [-1, 1, -1, -1, -1]),
                                axis=1)
        view_tower = UniNetDS2({'data': view_image},
                               is_training=True,
                               reuse=True)
        view_towers.append(view_tower)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0],
                                       [-1, 1, 2, 4, 4]),
                              axis=1)
        homographies = get_homographies(ref_cam,
                                        view_cam,
                                        depth_num=depth_num,
                                        depth_start=depth_start,
                                        depth_interval=depth_interval)
        view_homographies.append(homographies)

    # build cost volume by differentialble homography
    with tf.name_scope('cost_volume_homography'):
        depth_costs = []
        for d in range(depth_num):
            # compute cost (variation metric)
            ave_feature = ref_tower.get_output()
            ave_feature2 = tf.square(ref_tower.get_output())
            for view in range(0, FLAGS.view_num - 1):
                homography = tf.slice(view_homographies[view],
                                      begin=[0, d, 0, 0],
                                      size=[-1, 1, 3, 3])
                homography = tf.squeeze(homography, axis=1)
                warped_view_feature = homography_warping(
                    view_towers[view].get_output(), homography)
                ave_feature = ave_feature + warped_view_feature
                ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
            ave_feature = ave_feature / FLAGS.view_num
            ave_feature2 = ave_feature2 / FLAGS.view_num
            cost = ave_feature2 - tf.square(ave_feature)
            depth_costs.append(cost)
        cost_volume = tf.stack(depth_costs, axis=1)

    # filtered cost volume, size of (B, D, H, W, 1)
    if is_master_gpu:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume},
                                               is_training=True,
                                               reuse=False)
    else:
        filtered_cost_volume_tower = RegNetUS0({'data': cost_volume},
                                               is_training=True,
                                               reuse=True)
    filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(),
                                      axis=-1)

    # depth map by softArgmin
    with tf.name_scope('soft_arg_min'):
        # probability volume by soft max
        probability_volume = tf.nn.softmax(tf.scalar_mul(
            -1, filtered_cost_volume),
                                           axis=1,
                                           name='prob_volume')
        # depth image by soft argmin
        volume_shape = tf.shape(probability_volume)
        soft_2d = []
        for i in range(FLAGS.batch_size):
            soft_1d = tf.linspace(depth_start[i], depth_end[i],
                                  tf.cast(depth_num, tf.int32))
            soft_2d.append(soft_1d)
        soft_2d = tf.reshape(tf.stack(soft_2d, axis=0),
                             [volume_shape[0], volume_shape[1], 1, 1])
        soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]])
        estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume,
                                            axis=1)
        estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3)

    # probability map
    prob_map = get_propability_map(probability_volume, estimated_depth_map,
                                   depth_start, depth_interval)

    return estimated_depth_map, prob_map  #, filtered_depth_map, probability_volume
Exemple #57
0
    def create_training_operations(self, config):
        num_actions = sum(
            util.prod(config.actions[name].shape)
            for name in sorted(self.action))

        # Get hidden layers from network generator, then add NAF outputs, same for target network
        flat_mean = layers['linear'](x=self.training_network.output,
                                     size=num_actions,
                                     scope='naf_action_means')
        n = 0
        for name in sorted(self.action):
            shape = config.actions[name].shape
            self.action_taken[name] = tf.reshape(
                tensor=flat_mean[:, n:n + util.prod(shape)],
                shape=((-1, ) + shape))
            n += util.prod(shape)

        # Advantage computation
        # Network outputs entries of lower triangular matrix L
        lower_triangular_size = num_actions * (num_actions + 1) // 2
        l_entries = layers['linear'](x=self.training_network.output,
                                     size=lower_triangular_size,
                                     scope='naf_matrix_entries')

        l_matrix = tf.exp(
            x=tf.map_fn(fn=tf.diag, elems=l_entries[:, :num_actions]))

        if num_actions > 1:
            offset = num_actions
            l_columns = list()
            for zeros, size in enumerate(xrange(num_actions - 1, -1, -1), 1):
                column = tf.pad(tensor=l_entries[:, offset:offset + size],
                                paddings=((0, 0), (zeros, 0)))
                l_columns.append(column)
                offset += size
            l_matrix += tf.stack(values=l_columns, axis=1)

        # P = LL^T
        p_matrix = tf.matmul(a=l_matrix,
                             b=tf.transpose(a=l_matrix, perm=(0, 2, 1)))

        flat_action = list()
        for name in sorted(self.action):
            shape = config.actions[name].shape
            flat_action.append(
                tf.reshape(tensor=self.action[name],
                           shape=(-1, util.prod(shape))))
        flat_action = tf.concat(values=flat_action, axis=1)
        difference = flat_action - flat_mean

        # A = -0.5 (a - mean)P(a - mean)
        advantage = tf.matmul(a=p_matrix,
                              b=tf.expand_dims(input=difference, axis=2))
        advantage = tf.matmul(a=tf.expand_dims(input=difference, axis=1),
                              b=advantage)
        advantage = tf.squeeze(input=(-advantage / 2.0), axis=2)

        # Q = A + V
        # State-value function
        value = layers['linear'](x=self.training_network.output,
                                 size=num_actions)
        q_value = value + advantage

        q_values = dict()
        n = 0
        for name in sorted(self.action):
            shape = (-1, ) + config.actions[name].shape
            flat_size = util.prod(shape[1:])
            q_values[name] = tf.reshape(tensor=q_value[:, n:n + flat_size],
                                        shape=shape)
            n += flat_size
        return q_values
Exemple #58
0
def get_propability_map(cv, depth_map, depth_start, depth_interval):
    """ get probability map from cost volume """
    def _repeat_(x, num_repeats):
        """ repeat each element num_repeats times """
        x = tf.reshape(x, [-1])
        ones = tf.ones((1, num_repeats), dtype='int32')
        x = tf.reshape(x, shape=(-1, 1))
        x = tf.matmul(x, ones)
        return tf.reshape(x, [-1])

    shape = tf.shape(depth_map)
    batch_size = shape[0]
    height = shape[1]
    width = shape[2]
    depth = tf.shape(cv)[1]

    # byx coordinate, batched & flattened
    b_coordinates = tf.range(batch_size)
    y_coordinates = tf.range(height)
    x_coordinates = tf.range(width)
    b_coordinates, y_coordinates, x_coordinates = tf.meshgrid(
        b_coordinates, y_coordinates, x_coordinates)
    b_coordinates = _repeat_(b_coordinates, batch_size)
    y_coordinates = _repeat_(y_coordinates, batch_size)
    x_coordinates = _repeat_(x_coordinates, batch_size)

    # d coordinate (floored and ceiled), batched & flattened
    d_coordinates = tf.reshape((depth_map - depth_start) / depth_interval,
                               [-1])
    d_coordinates_left0 = tf.clip_by_value(
        tf.cast(tf.floor(d_coordinates), 'int32'), 0, depth - 1)
    d_coordinates_left1 = tf.clip_by_value(d_coordinates_left0 - 1, 0,
                                           depth - 1)
    d_coordinates1_right0 = tf.clip_by_value(
        tf.cast(tf.ceil(d_coordinates), 'int32'), 0, depth - 1)
    d_coordinates1_right1 = tf.clip_by_value(d_coordinates1_right0 + 1, 0,
                                             depth - 1)

    # voxel coordinates
    voxel_coordinates_left0 = tf.stack(
        [b_coordinates, d_coordinates_left0, y_coordinates, x_coordinates],
        axis=1)
    voxel_coordinates_left1 = tf.stack(
        [b_coordinates, d_coordinates_left1, y_coordinates, x_coordinates],
        axis=1)
    voxel_coordinates_right0 = tf.stack(
        [b_coordinates, d_coordinates1_right0, y_coordinates, x_coordinates],
        axis=1)
    voxel_coordinates_right1 = tf.stack(
        [b_coordinates, d_coordinates1_right1, y_coordinates, x_coordinates],
        axis=1)

    # get probability image by gathering and interpolation
    prob_map_left0 = tf.gather_nd(cv, voxel_coordinates_left0)
    prob_map_left1 = tf.gather_nd(cv, voxel_coordinates_left1)
    prob_map_right0 = tf.gather_nd(cv, voxel_coordinates_right0)
    prob_map_right1 = tf.gather_nd(cv, voxel_coordinates_right1)
    prob_map = prob_map_left0 + prob_map_left1 + prob_map_right0 + prob_map_right1
    prob_map = tf.reshape(prob_map, [batch_size, height, width, 1])

    return prob_map
    def __init__(self, emb_dim, char_vocab_size, phoneme_vocab_size, seqlen):

        tf.reset_default_graph()

        # define placeholders
        chars    = tf.placeholder(tf.int32, [None, seqlen], 'chars')
        phonemes = tf.placeholder(tf.int32, [None, seqlen], 'phonemes')

        # expose placeholders
        self.placeholders = { 'chars' : chars, 'phonemes' : phonemes }

        # infer dimensions of batch
        batch_size_, seq_len_ = tf.unstack(tf.shape(chars))

        # actual length of sequences considering padding
        seqlens = tf.count_nonzero(chars, axis=-1)

        # Character and Phoneme Embedding Matrices
        chE = tf.get_variable('chE', [char_vocab_size, emb_dim], tf.float32, 
                            initializer=tf.random_uniform_initializer(-0.01, 0.01)
                           )
        phE = tf.get_variable('phE', [1 + phoneme_vocab_size, emb_dim], tf.float32, 
                            initializer=tf.random_uniform_initializer(-0.01, 0.01)
                           ) # +1 corresponds to <START> token to signal "start generating"

        # <START> token
        PH_START = tf.tile([phE[-1]], [batch_size_, 1])

        # lookup character embedding
        chars_emb = tf.nn.embedding_lookup(chE, tf.transpose(chars))
        # break into iterable list
        #  batch_major to time_major
        chars_emb_list = chars_emb #tf.transpose(chars_emb, [1, 0, 2]))

        # encoder
        encoder_outputs = []
        with tf.variable_scope('encoder') as scope:
            enc_cell  = tf.nn.rnn_cell.LSTMCell(emb_dim)
            enc_state = enc_cell.zero_state(batch_size_, tf.float32)
            for i in range(seqlen):
                output, enc_state = enc_cell(chars_emb_list[i], enc_state)
                # accumulate outputs at each step
                encoder_outputs.append(output)

        # output projection parameters
        Wo = tf.get_variable('Wo', 
            shape=[emb_dim, phoneme_vocab_size], 
            dtype=tf.float32, 
            initializer=tf.random_uniform_initializer(-0.01, 0.01))

        bo = tf.get_variable('bo', 
            shape=[phoneme_vocab_size], 
            dtype=tf.float32, 
            initializer=tf.random_uniform_initializer(-0.01, 0.01))

        llogits = []
        with tf.variable_scope('decoder') as scope:
            dec_cell  = tf.nn.rnn_cell.LSTMCell(emb_dim, name='decoder_cell')
            dec_state = enc_state
            input_ = PH_START # start generation
            for i in range(seqlen):
                output, dec_state = dec_cell(input_, dec_state)
                logits = tf.matmul(output, Wo) + bo # tf.linear
                llogits.append(logits)
                prediction = tf.argmax(tf.nn.softmax(logits), axis=-1)
                input_ = tf.nn.embedding_lookup(phE, prediction)

        # stack list of logits
        #  convert to time_major
        logits = tf.transpose(tf.stack(llogits), [1, 0, 2])
        # probability distribution across vocabulary
        probs  = tf.nn.softmax(logits)
        # predictions
        preds  = tf.argmax(probs, axis=-1)

        # Cross Entropy
        ce = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, 
                labels=phonemes
                )
        # reduce to scalar
        loss = tf.reduce_mean(ce)

        # Accuracy
        accuracy = tf.reduce_mean(
                        tf.cast(
                            tf.equal(tf.cast(preds, tf.int32), phonemes),
                            tf.float32
                            )
                        )

        self.out = { 
                'loss'     : loss,
                'prob'     : probs,
                'pred'     : preds,
                'logits'   : logits,
                'accuracy' : accuracy
                }

        # training operation
        self.trainop = tf.train.AdamOptimizer().minimize(loss)
    def __init__(self, actor, critic, experts, obs_dim, memory, observation_shape, action_shape,
        expert_is_np=False,
        param_noise=None, action_noise=None,
        gamma=0.95, tau=0.001, normalize_returns=False, enable_popart=False, normalize_observations=True,
        batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.), return_range=(-np.inf, np.inf),
        critic_l2_reg=0., actor_lr=1e-4, critic_lr=1e-3, clip_norm=None, reward_scale=1.):
        # Inputs.
        self.obs0 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs0')
        self.obs1 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs1')
        self.terminals1 = tf.placeholder(tf.float32, shape=(None, 1), name='terminals1')
        self.rewards = tf.placeholder(tf.float32, shape=(None, 1), name='rewards')
        self.actions = tf.placeholder(tf.float32, shape=(None,) + action_shape, name='actions')
        self.critic_target = tf.placeholder(tf.float32, shape=(None, 1), name='critic_target')
        self.param_noise_stddev = tf.placeholder(tf.float32, shape=(), name='param_noise_stddev')

        # Parameters.
        self.gamma = gamma
        self.tau = tau
        self.memory = memory
        self.normalize_observations = normalize_observations
        self.normalize_returns = normalize_returns
        self.action_noise = action_noise
        self.param_noise = param_noise
        self.action_range = action_range
        self.return_range = return_range
        self.observation_range = observation_range
        self.critic = critic
        self.actor = copy(actor)
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.clip_norm = clip_norm
        self.enable_popart = enable_popart
        self.reward_scale = reward_scale
        self.batch_size = batch_size
        self.stats_sample = None
        self.critic_l2_reg = critic_l2_reg
        self.experts = experts
        self.obs_dim = obs_dim
        # self.critic_obs0 = self.experts[0].obs0
        # self.critic_obs1 = self.experts[0].obs1
        # self.critic_actor = self.experts[0].use_tf_actor

        # Observation normalization.
        if self.normalize_observations:
            with tf.variable_scope('obs_rms'):
                self.obs_rms = RunningMeanStd(shape=observation_shape)
        else:
            self.obs_rms = None
        normalized_obs0 = tf.clip_by_value(normalize(self.obs0, self.obs_rms),
            self.observation_range[0], self.observation_range[1])
        normalized_obs1 = tf.clip_by_value(normalize(self.obs1, self.obs_rms),
            self.observation_range[0], self.observation_range[1])

        # Return normalization.
        if self.normalize_returns:
            with tf.variable_scope('ret_rms'):
                self.ret_rms = RunningMeanStd()
        else:
            self.ret_rms = None

        # Create target networks.
        target_actor = copy(self.actor)
        target_actor.name = 'target_actor'
        self.target_actor = target_actor
        target_critic = copy(critic)
        target_critic.name = 'target_critic'
        self.target_critic = target_critic

        expert0_normalize_obs0 = [tf.clip_by_value(normalize(self.obs0[:, :self.obs_dim], self.experts[i].obs_rms),
            self.observation_range[0], self.observation_range[1]) for i in range(len(self.experts))]
        expert_qv0 = tf.squeeze(tf.stack([experts[i].critic(expert0_normalize_obs0[i], self.actions)\
         for i in range(len(self.experts))]), axis=2)
        # expert_qv0 = tf.Print(expert_qv0, [expert_qv0], '>>>> qv0 :', summarize=10)
        expert_qv0 = tf.reduce_sum(self.obs0[:, self.obs_dim:] * tf.transpose(expert_qv0), axis=1)

        # Create networks and core TF parts that are shared across setup parts.
        self.actor_tf = self.actor(normalized_obs0)
        self.normalized_critic_tf = critic(normalized_obs0, self.actions, tf.stop_gradient(expert_qv0))
        self.critic_tf = tf.clip_by_value(self.normalized_critic_tf, self.return_range[0], self.return_range[1])

        expert_qv0_with_actor_tf = tf.squeeze(tf.stack([experts[i].critic(expert0_normalize_obs0[i], self.actor_tf) for i in range(len(self.experts))]),
            axis=2)
        expert_qv0_with_actor_tf = tf.reduce_sum(self.obs0[:, self.obs_dim:] * tf.transpose(expert_qv0_with_actor_tf), axis=1)

        self.normalized_critic_with_actor_tf = critic(normalized_obs0, self.actor_tf, tf.stop_gradient(expert_qv0_with_actor_tf))
        self.critic_with_actor_tf = denormalize(tf.clip_by_value(self.normalized_critic_with_actor_tf, self.return_range[0], self.return_range[1]), self.ret_rms)

        action1 = target_actor(normalized_obs1)
        expert0_normalize_obs1 = [tf.clip_by_value(normalize(self.obs1[:, :self.obs_dim], self.experts[i].obs_rms),
            self.observation_range[0], self.observation_range[1]) for i in range(len(self.experts))]
        expert_qv1 = tf.squeeze(tf.stack([(experts[i].critic(expert0_normalize_obs1[i], action1)) for i in range(len(self.experts))]), axis=2)
        expert_qv1 = tf.reduce_sum(self.obs1[:, self.obs_dim:] * tf.transpose(expert_qv1), axis=1)

        self.Q_obs1 = target_critic(normalized_obs1, action1, tf.stop_gradient(expert_qv1))
        # self.Q_obs1 = tf.Print(self.Q_obs1, [self.Q_obs1], '>>>> Q :', summarize=10)
        # self.terminals1 = tf.Print(self.terminals1, [self.terminals1], '>>>> terminal :', summarize=10)

        self.target_Q = self.rewards + (1. - self.terminals1) * gamma * self.Q_obs1

        self.expert_qv1 = expert_qv1

        # Set up parts.
        if self.param_noise is not None:
            self.setup_param_noise(normalized_obs0)

        if self.normalize_returns and self.enable_popart:
            self.setup_popart()
        self.setup_stats()
        self.setup_target_network_updates()

        self.initial_state = None # recurrent architectures not supported yet