Beispiel #1
0
    def _partition_and_stitch(self, args, func_name):
        """
        args is a list of tensors, to be passed to self.likelihoods.<func_name>

        args[-1] is the 'Y' argument, which contains the indexes to self.likelihoods.

        This function splits up the args using dynamic_partition, calls the
        relevant function on the likelihoods, and re-combines the result.
        """
        # get the index from Y
        Y = args[-1]
        ind = Y[:, -1]
        ind = tf.cast(ind, tf.int32)
        Y = Y[:, :-1]
        args[-1] = Y

        # split up the arguments into chunks corresponding to the relevant likelihoods
        args = zip(*[tf.dynamic_partition(X, ind, self.num_likelihoods) for X in args])

        # apply the likelihood-function to each section of the data
        with params_as_tensors_for(self, convert=False):
            funcs = [getattr(lik, func_name) for lik in self.likelihood_list]
        results = [f(*args_i) for f, args_i in zip(funcs, args)]

        # stitch the results back together
        partitions = tf.dynamic_partition(tf.range(0, tf.size(ind)), ind, self.num_likelihoods)
        results = tf.dynamic_stitch(partitions, results)

        return results
    def loop(q_, mask, mass_, found_):
        q_list = tf.dynamic_partition(q_, mask, 2)
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(q_)[0]), mask, 2)  # 0 element it False,
        #  1 element if true

        p = q_list[1] * (1.0 - mass_) / tf.reduce_sum(q_list[1])
        p_new = tf.dynamic_stitch(condition_indices, [q_list[0], p])

        # condition verification and mask modification
        less_mask = tf.cast(tf.less(u, p_new), tf.int32)  # 0 when u is bigger than p, 1 when u is less than p
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(p_new)[0]), less_mask,
                                                 2)  # 0 when u is bigger than p, 1 when u is less than p

        split_p_new = tf.dynamic_partition(p_new, less_mask, 2)
        split_u = tf.dynamic_partition(u, less_mask, 2)

        alpha = tf.dynamic_stitch(condition_indices, [split_p_new[0], split_u[1]])
        mass_ += tf.reduce_sum(split_u[1])

        mask = mask * (tf.ones_like(less_mask) - less_mask)

        found_ = tf.cond(tf.equal(tf.reduce_sum(less_mask), 0),
                         lambda: False,
                         lambda: True)

        alpha = tf.reshape(alpha, q_.shape)

        return alpha, mask, mass_, found_
Beispiel #3
0
    def __call__(self, X):
        ind = tf.gather(tf.transpose(X), tf.shape(X)[1]-1)  # ind = X[:,-1]
        ind = tf.cast(ind, tf.int32)
        X = tf.transpose(tf.gather(tf.transpose(X), tf.range(0, tf.shape(X)[1]-1)))  # X = X[:,:-1]

        # split up X into chunks corresponding to the relevant likelihoods
        x_list = tf.dynamic_partition(X, ind, len(self.meanfunction_list))
        # apply the likelihood-function to each section of the data
        results = [m(x) for x, m in zip(x_list, self.meanfunction_list)]
        # stitch the results back together
        partitions = tf.dynamic_partition(tf.range(0, tf.size(ind)), ind, len(self.meanfunction_list))
        return tf.dynamic_stitch(partitions, results)
Beispiel #4
0
def split_apply_merge(inp, partitions, fns):
    """Split input according to partitions.  Pass results through fns and merge.
  Args:
    inp: the input vector
    partitions: tensor of same length as input vector, having values 0, 1
    fns: the two functions.
  Returns:
    the vector routed, where routed[i] = fns[partitions[i]](inp[i])
  """
    new_inputs = tf.dynamic_partition(inp, partitions, len(fns))
    new_outputs = [fns[i](x) for i, x in enumerate(new_inputs)]
    new_indices = tf.dynamic_partition(tf.range(0, inp.get_shape()[0]), partitions, len(fns))
    return tf.dynamic_stitch(new_indices, new_outputs)
Beispiel #5
0
def add_loss(graph, locations, confidences, batched_bboxes, batched_num_bboxes, bbox_priors, cfg):
  
  with graph.name_scope("loss"):
    # ground truth bounding boxes:
    # [batch_size, # of ground truth bounding boxes, 4]
    # we also need to know the number of ground truth bounding boxes for each image in the batch
    # (it can be different for each image...)
    # We could assume 1 for now.
    
    # Pass the locations, confidences, and ground truth labels to the matching function
    locations = tf.reshape(locations, [-1, 4])
    confidences = tf.reshape(confidences, [-1])
    
    # add the priors to the predicted residuals
    locations += tf.tile(bbox_priors, [cfg.BATCH_SIZE, 1])
    
    # add a small epsilon to the confidences
    confidences += small_epsilon
    
    # print "Shapes"
    # print locations.get_shape().as_list()
    # print confidences.get_shape().as_list()
    # print batched_bboxes.get_shape().as_list()
    # print batched_num_bboxes.get_shape().as_list()
    params = [locations, confidences, batched_bboxes, batched_num_bboxes, cfg.BATCH_SIZE, cfg.LOCATION_LOSS_ALPHA]
    matching, stacked_gt_bboxes = tf.py_func(compute_assignments, params, [tf.int32, tf.float32], name="bipartite_matching") 
    
    # matching: [num_predictions * batch_size] 0s and 1s for partitioning
    # stacked_gt_bboxes : [total number of gt bboxes for this batch, 4]
    
    # dynamic partition the bounding boxes and confidences into "positives" and "negatives"
    unmatched_locations, matched_locations = tf.dynamic_partition(locations, matching, 2)
    unmatched_confidences, matched_confidences = tf.dynamic_partition(confidences, matching, 2)
    
    # sum the norm from the "positive" bounding boxes 
    #loss = tf.nn.l2_loss(matched_locations - stacked_gt_bboxes)
    
    # sum the negative logs of the "positive" confidences
    #loss = loss - tf.reduce_sum(tf.log(matched_confidences)) + tf.reduce_sum(tf.log((1. - matched_confidences) + small_epsilon))
    
    # sum the negative logs of one minus the all of the confidences
    ###loss = loss - (1. / tf.cast(tf.reduce_sum(batched_num_bboxes), tf.float32) ) *  tf.reduce_sum(tf.log( 1. - confidences))
    #loss = loss -  tf.reduce_sum(tf.log( (1. - confidences) + small_epsilon))
    
    location_loss = cfg.LOCATION_LOSS_ALPHA * tf.nn.l2_loss(matched_locations - stacked_gt_bboxes)
    confidence_loss = -1. * tf.reduce_sum(tf.log(matched_confidences)) - tf.reduce_sum(tf.log((1. - unmatched_confidences) + small_epsilon))
    
    #loss = -1. * tf.reduce_sum(tf.log(matched_confidences)) - tf.reduce_sum(tf.log((1. - unmatched_confidences) + small_epsilon)) + cfg.LOCATION_LOSS_ALPHA * tf.nn.l2_loss(matched_locations - stacked_gt_bboxes)
  
  return location_loss, confidence_loss, matching
def apply_factor(tensor, *args, **kwargs):
    scope = kwargs.pop("scope", "")     
    with tf.name_scope(scope):
        n_args = len(args)

        if n_args is 0:
            tensor, output_size, error_symbol = tensor
            return one_hot(tensor, output_size, scope=scope)
        else:
            tensor, args = slice_out_int_literals(tensor, list(args))
            args, is_batched = make_batch_consistent(args)
            tensor, output_size, error_symbol = tensor

            # handle the case where all arguments were int literals
            tensor_dim_sizes = [dim.value for dim in tensor.get_shape()]
            if not tensor_dim_sizes:
                return one_hot(tensor, output_size, scope=scope)

            # Each arg is batch size x arg dim. Add dimensions to enable broadcasting.
            for i, arg in enumerate(args):
                for j in range(len(args)):
                    if j == i: continue
                    args[i] = tf.expand_dims(args[i], j + 1)

            # compute joint before tensor is applied
            joint = 0
            for arg in args:
                joint = joint + arg

            # prepare for unsorted_segment_sum
            joint = tf.reshape(joint, (-1, np.prod(tensor_dim_sizes)))
            joint = tf.transpose(joint, [1, 0])  # |tensor| x batch_size

            flat_tensor = tf.reshape(tensor, [-1])
            if error_symbol is not None:
                to_logsumexp = tf.dynamic_partition(joint, flat_tensor, output_size + 1)
                del to_logsumexp[error_symbol]
            else:
                to_logsumexp = tf.dynamic_partition(joint, flat_tensor, output_size)



            result = tf.pack(
                        map(lambda x : logsumexp(x, reduction_indices=0), to_logsumexp)
                    )

            result = tf.transpose(result, [1, 0])
            if not is_batched: result = tf.squeeze(result)
            return result
def mmd_objective(z, s, sdim):
    """
    Compute the MMD from latent space and nuisance_id

    Notes:
    Reimplementation in tensorflow of the Variational Fair Autoencoder
    https://arxiv.org/abs/1511.00830
    """
    
    #mmd_method = mmd_rbf
    mmd_method = mmd_fourier
    
    z_dim = z.get_shape().as_list()[1]

    # STEP 1: construct lists of samples in their proper batches
    z_part = tf.dynamic_partition(z, s, sdim)

                
    # STEP 2: add noise to all of them and get the mmd
    mmd = 0
    for j, z_j in enumerate(z_part):
        z0_ = z_j
        aux_z0 = tf.random_normal([1, z_dim])  # if an S category does not have any samples
        z0 = tf.concat([z0_, aux_z0], 0)
        if len(z_part) == 2:
            z1_ = z_part[j + 1]
            aux_z1 = tf.random_normal((1, z_dim))
            z1 = tf.concat([z1_, aux_z1], axis=0)
            return mmd_method(z0, z1)
        z1 = z
        mmd += mmd_method(z0, z1)
    return mmd
Beispiel #8
0
def mol_conv_layer(atoms, cH_params, aux_params, layer):
    #Sum all neighbors using adjacency matrix
    atom_sum_neigh = sum_neigh(atoms, aux_params, layer)

    # Partition the atom matrix by degree of atoms
    # THIS CREATES PROBLEMS WITH GRADIENTS. NEED TO USE SLICING
    indices = tf.sub(deg_list_ph, tf.constant(1,dtype=tf.int32))
    
    atom_partitions = tf.dynamic_partition(atom_sum_neigh, indices, max_deg)

    # Get collection of modified atom features
    new_rel_atoms_collection = []
    for deg in range(1,6):
        # Obtain relevant atoms for this degree
        rel_atoms = atom_partitions[deg-1]

        # Apply hidden affine to relevant atoms and append
        if bool_separate_conv_depths:
            out = affine(rel_atoms, cH_params['W'+str(deg)+'_'+str(layer)], cH_params['b'+str(deg)+'_'+str(layer)])
        else:
            out = affine(rel_atoms, cH_params['W'+str(deg)], cH_params['b'+str(deg)])
        new_rel_atoms_collection.append(out)

    # Combine all atoms back into the list
    # NOTE: FOR NOW USE CONCATENATION. MEANS WE CANNOT USE ARBITARY deg_list ORDER
    hidden_atoms = tf.concat(0, new_rel_atoms_collection)

    # Apply relu
    activated_atoms = tf.nn.relu(hidden_atoms)

    return activated_atoms
 def testScalarIndexOutOfRange(self):
   with self.test_session() as sess:
     bad = 17
     data = np.zeros(5)
     partitions = tf.dynamic_partition(data, bad, num_partitions=7)
     with self.assertRaisesOpError(r"partitions = 17 is not in \[0, 7\)"):
       sess.run(partitions)
Beispiel #10
0
def smoothed_l1_loss(input_tensor):
    absval = tf.abs(input_tensor)
    ind = tf.to_int32(absval > 1)
    inner, outer = tf.dynamic_partition(absval, ind, 2)
    loss = tf.reduce_sum(0.5 * tf.square(inner)) + \
        tf.reduce_sum(outer - 0.5)
    return loss
Beispiel #11
0
  def __call__(self, *parents):
    # x = [atom_features, deg_slice, membership, deg_adj_list placeholders...]
    atom_features = parents[0].out_tensor

    # Extract graph topology
    membership = parents[2].out_tensor

    # Perform the mol gather

    assert (self.batch_size > 1, "graph_gather requires batches larger than 1")

    # Obtain the partitions for each of the molecules
    activated_par = tf.dynamic_partition(atom_features, membership,
                                         self.batch_size)

    # Sum over atoms for each molecule
    sparse_reps = [
        tf.reduce_sum(activated, 0, keep_dims=True)
        for activated in activated_par
    ]
    max_reps = [
        tf.reduce_max(activated, 0, keep_dims=True)
        for activated in activated_par
    ]

    # Get the final sparse representations
    sparse_reps = tf.concat(axis=0, values=sparse_reps)
    max_reps = tf.concat(axis=0, values=max_reps)
    mol_features = tf.concat(axis=1, values=[sparse_reps, max_reps])

    if self.activation_fn is not None:
      mol_features = self.activation_fn(mol_features)
    self.out_tensor = mol_features
    return mol_features
Beispiel #12
0
    def _build_graph(self):
        """Construct tensorflow nodes for round of clustering"""
        # N.B. without tf.Variable, makes awesome glitchy clustered images
        self.centroids_in = tf.Variable(tf.slice(tf.random_shuffle(self.arr),
                                     [0, 0], [self.k, -1]), name="centroids_in")
        # tiled should be shape(self.n_pixels, self.k, size_data = 2 + self.channels)
        tiled_pix = tf.tile(tf.expand_dims(self.arr, 1),
                            multiples=[1, self.k, 1], name="tiled_pix")

        # no need to take square root b/c positive reals and sqrt are isomorphic
        def radical_euclidean_dist(x, y):
            """Takes in 2 tensors and returns euclidean distance radical, i.e. dist**2"""
            with tf.name_scope("radical_euclidean"):
                return tf.square(tf.sub(x, y))

        # should be shape(self.n_pixels, self.k)
        distances = tf.reduce_sum(radical_euclidean_dist(tiled_pix, self.centroids_in),
                                  reduction_indices=2, name="distances")
        # should be shape(self.n_pixels)
        nearest = tf.to_int32(tf.argmin(distances, 1), name="nearest")

        # should be list of len self.k with tensors of shape(size_cluster, size_data)
        self.clusters = tf.dynamic_partition(self.arr, nearest, self.k)
        # should be shape(self.k, size_data)
        self.centroids = tf.pack([tf.reduce_mean(cluster, 0) for cluster in self.clusters],
            name="centroids_out")
        self.update_roids = tf.assign(self.centroids_in, self.centroids)
Beispiel #13
0
  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Perform M steps of set2set gather,
        detailed descriptions in: https://arxiv.org/abs/1511.06391 """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    self.build()
    # Extract atom_features
    atom_features = in_layers[0].out_tensor
    atom_split = in_layers[1].out_tensor

    self.c = tf.zeros((self.batch_size, self.n_hidden))
    self.h = tf.zeros((self.batch_size, self.n_hidden))

    for i in range(self.M):
      q_expanded = tf.gather(self.h, atom_split)
      e = tf.reduce_sum(atom_features * q_expanded, 1)
      e_mols = tf.dynamic_partition(e, atom_split, self.batch_size)
      # Add another value(~-Inf) to prevent error in softmax
      e_mols = [
          tf.concat([e_mol, tf.constant([-1000.])], 0) for e_mol in e_mols
      ]
      a = tf.concat([tf.nn.softmax(e_mol)[:-1] for e_mol in e_mols], 0)
      r = tf.segment_sum(tf.reshape(a, [-1, 1]) * atom_features, atom_split)
      # Model using this layer must set pad_batches=True
      q_star = tf.concat([self.h, r], axis=1)
      self.h, self.c = self.LSTMStep(q_star, self.c)

    out_tensor = q_star
    if set_tensors:
      self.variables = self.trainable_weights
      self.out_tensor = out_tensor
    return out_tensor
Beispiel #14
0
  def call(self, x, mask=None):
    """Execute this layer on input tensors.

    x = [atom_features, membership]
    
    Parameters
    ----------
    x: list
      Tensors as listed above
    mask: bool, optional
      Ignored. Present only to shadow superclass call() method.

    Returns
    -------
    outputs: Tensor
      Tensor of molecular features
    """
    # Add trainable weights
    self.build()
    outputs = x[0]
    membership = x[1]

    if self.gaussian_expand:
      outputs = self.gaussian_histogram(outputs)

    outputs = tf.dynamic_partition(outputs, membership, self.batch_size)

    output_molecules = [tf.reduce_sum(molecule, 0) for molecule in outputs]

    output_molecules = tf.stack(output_molecules)
    if self.gaussian_expand:
      output_molecules = tf.matmul(output_molecules, self.W) + self.b
      output_molecules = self.activation(output_molecules)
    return output_molecules
Beispiel #15
0
def graph_gather(atoms, membership_placeholder, batch_size):
  """
  Parameters
  ----------
  atoms: tf.Tensor
    Of shape (n_atoms, n_feat)
  membership_placeholder: tf.Placeholder
    Of shape (n_atoms,). Molecule each atom belongs to.
  batch_size: int
    Batch size for deep model.

  Returns
  -------
  tf.Tensor
    Of shape (batch_size, n_feat)
  """

  # WARNING: Does not work for Batch Size 1! If batch_size = 1, then use reduce_sum!
  assert (batch_size > 1, "graph_gather requires batches larger than 1")

  # Obtain the partitions for each of the molecules
  activated_par = tf.dynamic_partition(atoms, membership_placeholder,
                                       batch_size)

  # Sum over atoms for each molecule 
  sparse_reps = [
      tf.reduce_sum(activated, 0, keep_dims=True) for activated in activated_par
  ]

  # Get the final sparse representations
  sparse_reps = tf.concat(axis=0, values=sparse_reps)

  return sparse_reps
 def testErrorIndexOutOfRange(self):
   with self.test_session() as sess:
     data = tf.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8],
                                  [9, 10, 11], [12, 13, 14]])
     indices = tf.constant([0, 2, 99, 2, 2])
     partitions = tf.dynamic_partition(data, indices, num_partitions=4)
     with self.assertRaisesOpError(r"partitions\[2\] = 99 is not in \[0, 4\)"):
       sess.run(partitions)
def pair_loss(y_true, y_pred):
    y_true = tf.cast(y_true, tf.int32)
    parts = tf.dynamic_partition(y_pred, y_true, 2)
    y_pos = parts[1]
    y_neg = parts[0]
    y_pos = tf.expand_dims(y_pos, 0)
    y_neg = tf.expand_dims(y_neg, -1)
    out = K.sigmoid(y_neg - y_pos)
    return K.mean(out)
 def scatter_update(cls, factor, indices, values, sharding_func):
     """Helper function for doing sharded scatter update."""
     assert isinstance(factor, list)
     if len(factor) == 1:
         with ops.colocate_with(factor[0]):
             # TODO(agarwal): assign instead of scatter update for full batch update.
             return tf.scatter_update(factor[0], indices, values).op
     else:
         num_shards = len(factor)
         assignments, new_ids = sharding_func(indices)
         assert assignments is not None
         assignments = tf.cast(assignments, tf.int32)
         sharded_ids = tf.dynamic_partition(new_ids, assignments, num_shards)
         sharded_values = tf.dynamic_partition(values, assignments, num_shards)
         updates = []
         for i in xrange(num_shards):
             updates.append(tf.scatter_update(factor[i], sharded_ids[i], sharded_values[i]))
         return tf.group(*updates)
Beispiel #19
0
def update_centroids(samples, centroids, num_clusters):
    # First, lets find the data samples closest to a centroid, then we update 
    # its value using all vectors within that cluster
    expanded_data_vectors = tf.expand_dims(samples, 0)
    expanded_centroids = tf.expand_dims(centroids, 1)
    distances = tf.reduce_sum( tf.square( tf.sub( expanded_data_vectors, expanded_centroids ) ), 2 )
    nearest_samples = tf.to_int32( tf.argmin(distances, 0) )
    partitioned_data = tf.dynamic_partition(samples, nearest_samples, num_clusters)
    new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitioned_data])
    return new_centroids
Beispiel #20
0
def hsplit(x):
    # break out I and Q - note that horizontal splits seem to require transposition!
    c_fixedlevelsT = tf.transpose(x)
    partedIQ = tf.dynamic_partition(c_fixedlevelsT, partIQ, 2)

    # detranspose the split IQ data and shape into half-width image
    Q = tf.reshape(tf.transpose(partedIQ[0]), [1, 505, 422, 1])
    I = tf.reshape(tf.transpose(partedIQ[1]), [1, 505, 422, 1])

    return I, Q
Beispiel #21
0
    def _add_beam_tag_dynamic(self, feat_x, beam_path, cur_size):
        max_size = self.beam_size
        path_list = tf.dynamic_partition(beam_path, tf.range(cur_size), max_size)

        non_empty_path = [tf.cond(tf.less(tf.shape(e)[0], 1),
                         lambda : tf.zeros(shape=[0, self.window_size, self.dim_feat_x]),
                         lambda : self._add_tag_dynamic(feat_x, tf.reshape(e, [-1])))
               for e in path_list
               ]
        return tf.concat(0, non_empty_path)
 def testHigherRank(self):
   np.random.seed(7)
   with self.test_session() as sess:
     for n in 2, 3:
       for shape in (4,), (4, 5), (4, 5, 2):
         partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape)
         for extra_shape in (), (6,), (6, 7):
           data = np.random.randn(*(shape + extra_shape))
           outputs = tf.dynamic_partition(data, partitions, num_partitions=n)
           self.assertEqual(n, len(outputs))
           for i, output in enumerate(sess.run(outputs)):
             self.assertAllEqual(output, data[partitions == i])
 def testHigherRankIndexOutOfRange(self):
   with self.test_session() as sess:
     shape = (2, 3)
     indices = tf.placeholder(shape=shape, dtype=np.int32)
     data = np.zeros(shape + (5,))
     partitions = tf.dynamic_partition(data, indices, num_partitions=7)
     for i in xrange(2):
       for j in xrange(3):
         bad = np.zeros(shape, dtype=np.int32)
         bad[i, j] = 17
         with self.assertRaisesOpError(
             r"partitions\[%d,%d\] = 17 is not in \[0, 7\)" % (i, j)):
           sess.run(partitions, feed_dict={indices: bad})
Beispiel #24
0
def triplet_loss(infer, labels, batch_size, radius = 1.0):
    feature_1, feature_2 = tf.split(0,2,infer)

    # label is either 0 or 1
    # partition_list = tf.equal(labels,1)
    feature_diff = tf.reduce_sum(tf.square(feature_1 - feature_2), 1)
    feature_list = tf.dynamic_partition(feature_diff, labels, 2)

    # pos_loss = tf.reduce_mean(feature_list[1])
    pos_list = feature_list[1]
    neg_list  = (tf.maximum(0.0, radius * radius - feature_list[0]))
    full_list = tf.concat(0,[pos_list, neg_list])
    loss = tf.reduce_mean(full_list)

    tf.add_to_collection('losses', loss)
    return tf.add_n(tf.get_collection('losses'), name = 'total_loss')
  def testSimpleOneDimensional(self):
    with self.test_session() as sess:
      data = tf.constant([0, 13, 2, 39, 4, 17])
      indices = tf.constant([0, 0, 2, 3, 2, 1])
      partitions = tf.dynamic_partition(data, indices, num_partitions=4)
      partition_vals = sess.run(partitions)

    self.assertAllEqual([0, 13], partition_vals[0])
    self.assertAllEqual([17], partition_vals[1])
    self.assertAllEqual([2, 4], partition_vals[2])
    self.assertAllEqual([39], partition_vals[3])
    # Vector data input to DynamicPartition results in
    # `num_partitions` vectors of unknown length.
    self.assertEqual([None], partitions[0].get_shape().as_list())
    self.assertEqual([None], partitions[1].get_shape().as_list())
    self.assertEqual([None], partitions[2].get_shape().as_list())
    self.assertEqual([None], partitions[3].get_shape().as_list())
  def testSimpleTwoDimensional(self):
    with self.test_session() as sess:
      data = tf.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8],
                                   [9, 10, 11], [12, 13, 14], [15, 16, 17]])
      indices = tf.constant([0, 0, 2, 3, 2, 1])
      partitions = tf.dynamic_partition(data, indices, num_partitions=4)
      partition_vals = sess.run(partitions)

    self.assertAllEqual([[0, 1, 2], [3, 4, 5]], partition_vals[0])
    self.assertAllEqual([[15, 16, 17]], partition_vals[1])
    self.assertAllEqual([[6, 7, 8], [12, 13, 14]], partition_vals[2])
    self.assertAllEqual([[9, 10, 11]], partition_vals[3])
    # Vector data input to DynamicPartition results in
    # `num_partitions` matrices with an unknown number of rows, and 3 columns.
    self.assertEqual([None, 3], partitions[0].get_shape().as_list())
    self.assertEqual([None, 3], partitions[1].get_shape().as_list())
    self.assertEqual([None, 3], partitions[2].get_shape().as_list())
    self.assertEqual([None, 3], partitions[3].get_shape().as_list())
Beispiel #27
0
def triplet_loss(infer, labels, radius = 2.0):
	"""
	Args:
		infer: inference concatenate together with 2 * batch_size
		labels: 0 or 1 with batch_size
		radius:
	Return:
		loss: triplet loss
	"""
		
	feature_1, feature_2 = tf.split(0,2,infer)

	feature_diff = tf.reduce_sum(tf.square(feature_1 - feature_2), 1)
	feature_list = tf.dynamic_partition(feature_diff, labels, 2)

	pos_list = feature_list[1]
	neg_list  = (tf.maximum(0.0, radius * radius - feature_list[0]))
	full_list = tf.concat(0,[pos_list, neg_list])
	loss = tf.reduce_mean(full_list)

	return loss
  def testHigherRank(self):
    np.random.seed(7)
    with self.test_session() as sess:
      for n in 2, 3:
        for shape in (4,), (4, 5), (4, 5, 2):
          partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape)
          for extra_shape in (), (6,), (6, 7):
            data = np.random.randn(*(shape + extra_shape))
            partitions_t = tf.constant(partitions, dtype=tf.int32)
            data_t = tf.constant(data)
            outputs = tf.dynamic_partition(
                data_t, partitions_t, num_partitions=n)
            self.assertEqual(n, len(outputs))
            outputs_val = sess.run(outputs)
            for i, output in enumerate(outputs_val):
              self.assertAllEqual(output, data[partitions == i])

            # Test gradients
            outputs_grad = [7 * output for output in outputs_val]
            grads = tf.gradients(outputs, [data_t, partitions_t], outputs_grad)
            self.assertEqual(grads[1], None)  # Partitions has no gradients
            self.assertAllEqual(7 * data, sess.run(grads[0]))
Beispiel #29
0
  def update_centroids(self, nearest_indices):
      partitions = tf.dynamic_partition(self.v_data, tf.to_int32(nearest_indices), self.n_clusters)
      return tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0)
                                    for partition in partitions], 0)
 
      
Beispiel #30
0
 def assign_new_model_parameters(params_1d):
     params = tf.dynamic_partition(params_1d, part, n_tensors)
     for i, (shape, param) in enumerate(zip(shapes, params)):
         model.trainable_variables[i].assign(tf.reshape(param, shape))
Beispiel #31
0
def batch_normalization(batch_data_list, types_list, miss_list):
    normalized_data = []
    normalization_parameters = []

    for i, d in enumerate(batch_data_list):
        # Partition the data in missing data (0) and observed data n(1)
        missing_data, observed_data = tf.dynamic_partition(d,
                                                           miss_list[:, i],
                                                           num_partitions=2)
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(d)[0]),
                                                 miss_list[:, i],
                                                 num_partitions=2)

        if types_list[i]['type'] == 'real':
            # We transform the data to a gaussian with mean 0 and std 1
            data_mean, data_var = tf.nn.moments(observed_data, 0)
            data_var = tf.clip_by_value(data_var, 1e-6,
                                        1e20)  # Avoid zero values
            aux_X = tf.nn.batch_normalization(observed_data,
                                              data_mean,
                                              data_var,
                                              offset=0.0,
                                              scale=1.0,
                                              variance_epsilon=1e-6)

            normalized_data.append(
                tf.dynamic_stitch(condition_indices, [missing_data, aux_X]))
            normalization_parameters.append([data_mean, data_var])

        # When using log-normal
        elif types_list[i]['type'] == 'pos':
            #           #We transform the log of the data to a gaussian with mean 0 and std 1
            observed_data_log = tf.math.log(1.0 + observed_data)
            data_mean_log, data_var_log = tf.nn.moments(observed_data_log, 0)

            data_var_log = tf.clip_by_value(data_var_log, 1e-6,
                                            1e20)  # Avoid zero values
            aux_X = tf.nn.batch_normalization(observed_data_log,
                                              data_mean_log,
                                              data_var_log,
                                              offset=0.0,
                                              scale=1.0,
                                              variance_epsilon=1e-6)

            normalized_data.append(
                tf.dynamic_stitch(condition_indices, [missing_data, aux_X]))
            normalization_parameters.append([data_mean_log, data_var_log])

        elif types_list[i]['type'] == 'count':

            # Input log of the data
            aux_X = tf.math.log(observed_data)

            normalized_data.append(
                tf.dynamic_stitch(condition_indices, [missing_data, aux_X]))
            normalization_parameters.append([0.0, 1.0])

        else:
            # Don't normalize the categorical and ordinal variables
            normalized_data.append(d)
            normalization_parameters.append([0.0,
                                             1.0])  # No normalization here

    return normalized_data, normalization_parameters
Beispiel #32
0
 def __update_center(self, data, nearest):
     """updating centroid"""
     partitions = tf.dynamic_partition(data, tf.to_int32(nearest), self._k)
     # updating centers by means
     new_centers = tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions], 0)
     return new_centers
Beispiel #33
0
  def model_fn(features, labels, mode, params=None):
    """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
    params = params or {}
    total_loss, train_op, detections, export_outputs = None, None, None, None
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    # Make sure to set the Keras learning phase. True during training,
    # False for inference.
    tf.keras.backend.set_learning_phase(is_training)
    detection_model = detection_model_fn(is_training=is_training,
                                         add_summaries=(not use_tpu))
    scaffold = None
    batch_size = features['hash']._shape_as_list()[0]
    mask = tf.sequence_mask(features['query_sec'] * 24,
                            tf.shape(features['query'])[1])
    features['query'] = tf.boolean_mask(features['query'], mask)
    mask = tf.sequence_mask(features['query_sec'] * 3,
                            tf.shape(features['query_box'])[1])
    idx = tf.range(batch_size)
    idx = tf.reshape(idx, [-1, 1])
    idx = tf.tile(idx, [1, tf.shape(features['query_box'])[1]])
    features['query_box'] = tf.boolean_mask(features['query_box'], mask)
    idx = tf.boolean_mask(idx, mask)
    features['query_box'] = tf.dynamic_partition(features['query_box'], idx,
                                                 batch_size)
    features['query_idx'] = idx

    d0 = batch_size * FLAGS.ref_sec * 3
    labels[fields.InputDataFields.num_groundtruth_boxes] = tf.reshape(
      labels[fields.InputDataFields.num_groundtruth_boxes], [-1])
    labels[fields.InputDataFields.groundtruth_boxes] = tf.reshape(
      labels[fields.InputDataFields.groundtruth_boxes], [d0, -1, 4])
    labels[fields.InputDataFields.groundtruth_classes] = tf.reshape(
      labels[fields.InputDataFields.groundtruth_classes], [d0, -1, 2])

    true_im_shape = features[fields.InputDataFields.true_image_shape]
    true_im_shape = tf.expand_dims(true_im_shape, axis=1)
    true_im_shape = tf.tile(true_im_shape, [1, FLAGS.ref_sec * 3, 1])
    features[fields.InputDataFields.true_image_shape] = tf.reshape(
      true_im_shape, [-1, 3])

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      labels = unstack_batch(
        labels,
        unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
      gt_masks_list = None
      if fields.InputDataFields.groundtruth_instance_masks in labels:
        gt_masks_list = labels[
          fields.InputDataFields.groundtruth_instance_masks]
      gt_keypoints_list = None
      if fields.InputDataFields.groundtruth_keypoints in labels:
        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
      gt_weights_list = None
      if fields.InputDataFields.groundtruth_weights in labels:
        gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
      if fields.InputDataFields.groundtruth_is_crowd in labels:
        gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
      detection_model.provide_groundtruth(
        groundtruth_boxes_list=gt_boxes_list,
        groundtruth_classes_list=gt_classes_list,
        groundtruth_masks_list=gt_masks_list,
        groundtruth_keypoints_list=gt_keypoints_list,
        groundtruth_weights_list=gt_weights_list)

    prediction_dict = detection_model.predict(features)
    if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
      detections = detection_model.postprocess(
        prediction_dict, features[fields.InputDataFields.true_image_shape])

    if mode == tf.estimator.ModeKeys.TRAIN:
      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
        if not train_config.fine_tune_checkpoint_type:
          # train_config.from_detection_checkpoint field is deprecated. For
          # backward compatibility, set train_config.fine_tune_checkpoint_type
          # based on train_config.from_detection_checkpoint.
          if train_config.from_detection_checkpoint:
            train_config.fine_tune_checkpoint_type = 'detection'
          else:
            train_config.fine_tune_checkpoint_type = 'classification'
        asg_map = detection_model.restore_map(
          fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
          load_all_detection_checkpoint_vars=(
            train_config.load_all_detection_checkpoint_vars))
        available_var_map = (
          get_variables_available_in_checkpoint(
            asg_map, FLAGS.i3d_ckpt,
            include_global_step=False))
        if use_tpu:
          def tpu_scaffold():
            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                          available_var_map)
            return tf.train.Scaffold()

          scaffold_fn = tpu_scaffold
        else:
          saver = tf.train.Saver(var_list=available_var_map, reshape=True)

          def init_fn(scaffold, session):
            saver.restore(session, FLAGS.i3d_ckpt)

          scaffold = tf.train.Scaffold(init_fn=init_fn)
          # tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
          #                              available_var_map)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      losses_dict = detection_model.loss(
        prediction_dict, features[fields.InputDataFields.true_image_shape],
        features['ref_sec'])
      losses = [loss_tensor for loss_tensor in losses_dict.values()]
      if train_config.add_regularization_loss:
        regularization_losses = tf.get_collection(
          tf.GraphKeys.REGULARIZATION_LOSSES)
        if regularization_losses:
          regularization_loss = tf.add_n(regularization_losses,
                                         name='regularization_loss')
          losses.append(regularization_loss)
          losses_dict['Loss/regularization_loss'] = regularization_loss
      total_loss = tf.add_n(losses, name='total_loss')
      losses_dict['Loss/total_loss'] = total_loss

      if 'graph_rewriter_config' in configs:
        graph_rewriter_fn = graph_rewriter_builder.build(
          configs['graph_rewriter_config'], is_training=is_training)
        graph_rewriter_fn()

      # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
      # can write learning rate summaries on TPU without host calls.
      global_step = tf.train.get_or_create_global_step()
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
        train_config.optimizer)

    if mode == tf.estimator.ModeKeys.TRAIN:
      if use_tpu:
        training_optimizer = tf.contrib.tpu.CrossShardOptimizer(
          training_optimizer)
      if FLAGS.multi_gpu:
        training_optimizer = tf.contrib.estimator.TowerOptimizer(
          training_optimizer)

      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
      include_variables = (
        train_config.update_trainable_variables
        if train_config.update_trainable_variables else None)
      exclude_variables = (
        train_config.freeze_variables
        if train_config.freeze_variables else None)
      trainable_variables = tf.contrib.framework.filter_variables(
        tf.trainable_variables(),
        include_patterns=include_variables,
        exclude_patterns=exclude_variables)

      clip_gradients_value = None
      if train_config.gradient_clipping_by_norm > 0:
        clip_gradients_value = train_config.gradient_clipping_by_norm

      if not use_tpu:
        for var in optimizer_summary_vars:
          tf.summary.scalar(var.op.name, var)
      summaries = [] if use_tpu else None
      train_op = tf.contrib.layers.optimize_loss(
        loss=total_loss,
        global_step=global_step,
        learning_rate=None,
        clip_gradients=clip_gradients_value,
        optimizer=training_optimizer,
        variables=trainable_variables,
        summaries=summaries,
        name='')  # Preventing scope prefix on all variables.

    if mode == tf.estimator.ModeKeys.PREDICT:
      export_outputs = {
        tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
          tf.estimator.export.PredictOutput(detections)
      }

    eval_metric_ops = None
    if mode == tf.estimator.ModeKeys.EVAL:
      scaffold = None
      class_agnostic = (fields.DetectionResultFields.detection_classes
                        not in detections)
      groundtruth = _prepare_groundtruth_for_eval(
        detection_model, class_agnostic)
      use_original_images = fields.InputDataFields.original_image in features
      eval_images = (
        features[fields.InputDataFields.original_image] if use_original_images
        else features[fields.InputDataFields.image])
      eval_dict = eval_util.result_dict_for_single_example(
        eval_images[0:1],
        features[inputs.HASH_KEY][0],
        detections,
        groundtruth,
        class_agnostic=class_agnostic,
        scale_to_absolute=True)

      if class_agnostic:
        category_index = label_map_util.create_class_agnostic_category_index()
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
          eval_input_config.label_map_path)
      img_summary = None
      if not use_tpu and use_original_images:
        detection_and_groundtruth = (
          vis_utils.draw_side_by_side_evaluation_image(
            eval_dict, category_index,
            max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
            min_score_thresh=eval_config.min_score_threshold,
            use_normalized_coordinates=False))
        img_summary = tf.summary.image('Detections_Left_Groundtruth_Right',
                                       detection_and_groundtruth)

      # Eval metrics on a single example.
      eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
        eval_config,
        category_index.values(),
        eval_dict)
      for loss_key, loss_tensor in iter(losses_dict.items()):
        eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
      for var in optimizer_summary_vars:
        eval_metric_ops[var.op.name] = (var, tf.no_op())
      if img_summary is not None:
        eval_metric_ops['Detections_Left_Groundtruth_Right'] = (
          img_summary, tf.no_op())
      eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}

      if eval_config.use_moving_averages:
        variable_averages = tf.train.ExponentialMovingAverage(0.0)
        variables_to_restore = variable_averages.variables_to_restore()
        keep_checkpoint_every_n_hours = (
          train_config.keep_checkpoint_every_n_hours)
        saver = tf.train.Saver(
          variables_to_restore,
          keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
        scaffold = tf.train.Scaffold(saver=saver)

    # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
    if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
      return tf.contrib.tpu.TPUEstimatorSpec(
        mode=mode,
        scaffold_fn=scaffold_fn,
        predictions=detections,
        loss=total_loss,
        train_op=train_op,
        eval_metrics=eval_metric_ops,
        export_outputs=export_outputs)
    else:
      return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=detections,
        loss=total_loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        export_outputs=export_outputs,
        scaffold=scaffold)
Beispiel #34
0
def update_centroids(samples, nearest_indices, n_clusters):
    # Updates the centroid to be the mean of all smaples associated with it
    nearest_indices = tf.to_int32(nearest_indices)
    partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters)
    new_centroids = tf.concat([tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions], 0)
    return new_centroids
    def run_graph(self,
                  num_features,
                  train_data,
                  val_data,
                  test_data,
                  sample_weights=None):
        '''

        :param distribution:
        :param num_features:
        :param k: the dimensionality of the embedding, Must be >= 0; when k=0, it is a simple model; Otherwise it is factorized
        :return:
        '''
        # INPUTs
        feature_indice = tf.placeholder(tf.int32, name='feature_indice')
        feature_values = tf.placeholder(tf.float32, name='feature_values')

        min_hbs = tf.placeholder(tf.float32,
                                 name='min_headerbids')  # for regularization
        max_hbs = tf.placeholder(tf.float32,
                                 name='max_headerbids')  # for regularization

        times = tf.placeholder(tf.float32, shape=[None], name='times')
        events = tf.placeholder(tf.int32, shape=[None], name='events')

        # shape: (batch_size, max_nonzero_len)
        embeddings_linear = tf.Variable(
            tf.truncated_normal(shape=(num_features, ), mean=0.0, stddev=1e-5))
        filtered_embeddings_linear = tf.nn.embedding_lookup(
            embeddings_linear, feature_indice) * feature_values
        intercept = tf.Variable(1e-5)
        linear_term = self.linear_function(filtered_embeddings_linear,
                                           intercept)
        scale = linear_term

        embeddings_factorized = None
        filtered_embeddings_factorized = None
        if self.k > 0:
            # shape: (batch_size, max_nonzero_len, k)
            embeddings_factorized = tf.Variable(
                tf.truncated_normal(shape=(num_features, self.k),
                                    mean=0.0,
                                    stddev=1e-5))
            filtered_embeddings_factorized = tf.nn.embedding_lookup(embeddings_factorized, feature_indice) * \
                                      tf.tile(tf.expand_dims(feature_values, axis=-1), [1, 1, 1])
            factorized_term = self.factorization_machines(
                filtered_embeddings_factorized)
            scale += factorized_term

        scale = tf.nn.softplus(scale)
        ''' 
        if event == 0, right-censoring
        if event == 1, left-censoring 
        '''
        shape = tf.Variable(0.2, trainable=True)
        not_survival_proba = self.distribution.left_censoring(
            times, scale, shape)  # the left area

        not_survival_bin = tf.where(tf.greater_equal(not_survival_proba, 0.5),
                                    tf.ones(tf.shape(not_survival_proba)),
                                    tf.zeros(tf.shape(not_survival_proba)))

        running_acc, acc_update = None, None
        if not sample_weights:
            running_acc, acc_update = tf.metrics.accuracy(
                labels=events, predictions=not_survival_bin)
        elif sample_weights == 'time':
            running_acc, acc_update = tf.metrics.accuracy(
                labels=events, predictions=not_survival_bin, weights=times)

        batch_loss = None
        if not sample_weights:
            batch_loss = tf.losses.log_loss(labels=events,
                                            predictions=not_survival_proba,
                                            reduction=tf.losses.Reduction.MEAN)
        elif sample_weights == 'time':
            # class_weights = tf.where(tf.equal(events, 1),
            #                             tf.ones(tf.shape(events)) * 100,
            #                             tf.ones(tf.shape(events)))
            batch_loss = tf.losses.log_loss(labels=events,
                                            predictions=not_survival_proba,
                                            weights=times,
                                            reduction=tf.losses.Reduction.MEAN)
        running_loss, loss_update = tf.metrics.mean(batch_loss)

        # Header Bidding Regularization
        hb_adxwon_partitions = tf.cast(
            tf.logical_and(
                tf.equal(events, 0),  # adx won
                tf.logical_and(
                    tf.not_equal(0.0, max_hbs),  # the max_hb is not missing
                    tf.less(times, max_hbs)
                    # tf.less(times, min_hbs),
                    # tf.logical_and(
                    #                                #     # tf.less(times, max_hbs),  # the max hb > the revenue
                    #                                #                # tf.less(max_hbs - time, 1.0)  # remove the outliers
                    #                                #                tf.less(times, min_hbs),
                    #                                #                tf.less((max_hbs - times) / times, 0.01)
                    #                                #                # tf.logical_and(
                    #                                #                #     tf.less((max_hbs - times) / times, 0.01),
                    #                                #                #     tf.less(times, 10.0)
                    #                                #                # )
                    #                                #                )
                )),
            tf.int32)
        hb_adxlose_partitions = tf.cast(
            tf.logical_and(
                tf.equal(events, 1),  # adx lose
                tf.logical_and(
                    tf.not_equal(0.0, min_hbs),  # the min_hb is not missing
                    tf.less(min_hbs, times)  # the min hb < the floor
                    # tf.less(max_hbs, times),
                    # tf.logical_and(
                    #                tf.less(min_hbs, times),
                    #                # tf.less(max_hbs - time, 1.0)  # remove the outliers
                    #                tf.less(0.9, (times - min_hbs) / times)
                    #                # tf.logical_and(
                    #                #     tf.less(0.1, (times - min_hbs) / times),
                    #                #     tf.less(times, 10.0)
                    #                # )
                    #                )
                )),
            tf.int32)

        # Using boolean_mask instead of dynamic_partition leads to:
        # "UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory."
        # https://stackoverflow.com/questions/44380727/get-userwarning-while-i-use-tf-boolean-mask?noredirect=1&lq=1
        regable_hb_adxwon = tf.dynamic_partition(max_hbs, hb_adxwon_partitions,
                                                 2)[1]
        regable_hb_adxlose = tf.dynamic_partition(min_hbs,
                                                  hb_adxlose_partitions, 2)[1]
        regable_scale_adxwon = tf.dynamic_partition(scale,
                                                    hb_adxwon_partitions, 2)[1]
        regable_scale_adxlose = tf.dynamic_partition(scale,
                                                     hb_adxlose_partitions,
                                                     2)[1]

        hb_adxwon_pred = self.distribution.left_censoring(
            regable_hb_adxwon, regable_scale_adxwon, shape)
        hb_adxlose_pred = self.distribution.left_censoring(
            regable_hb_adxlose, regable_scale_adxlose, shape)

        hb_reg_adxwon, hb_reg_adxlose = None, None
        if not sample_weights:
            # if True:
            hb_reg_adxwon = tf.losses.log_loss(labels=tf.zeros(
                tf.shape(hb_adxwon_pred)),
                                               predictions=hb_adxwon_pred)
            hb_reg_adxlose = tf.losses.log_loss(labels=tf.zeros(
                tf.shape(hb_adxlose_pred)),
                                                predictions=hb_adxlose_pred)
        elif sample_weights == 'time':
            regable_time_adxwon = tf.dynamic_partition(times,
                                                       hb_adxwon_partitions,
                                                       2)[1]
            regable_time_adxlose = tf.dynamic_partition(
                times, hb_adxlose_partitions, 2)[1]
            hb_reg_adxwon = tf.losses.log_loss(
                labels=tf.ones(tf.shape(hb_adxwon_pred)),
                predictions=hb_adxwon_pred,
                weights=1.0 / regable_time_adxwon)
            hb_reg_adxlose = tf.losses.log_loss(
                labels=tf.zeros(tf.shape(hb_adxlose_pred)),
                predictions=hb_adxlose_pred,
                weights=1.0 / regable_time_adxlose)
        mean_hb_reg_adxwon = tf.reduce_mean(hb_reg_adxwon)
        mean_hb_reg_adxlose = tf.reduce_mean(hb_reg_adxlose)

        # L2 regularized sum of squares loss function over the embeddings
        '''
        l2_norm = tf.constant(self.lambda_linear) * tf.pow(embeddings_linear, 2)
        if embeddings_factorized is not None:
            l2_norm += tf.reduce_sum(tf.pow(embeddings_factorized, 2), axis=-1)
        sum_l2_norm = tf.constant(self.lambda_factorized) * tf.reduce_sum(l2_norm)
        '''
        l2_norm = self.lambda_linear * tf.nn.l2_loss(
            filtered_embeddings_linear)
        if embeddings_factorized is not None:
            l2_norm += self.lambda_factorized * tf.nn.l2_loss(
                filtered_embeddings_factorized)


        loss_mean = batch_loss + \
                    tf.constant(self.lambda_hb_adxwon) * mean_hb_reg_adxwon + \
                    tf.constant(self.lambda_hb_adxlose) * mean_hb_reg_adxlose + \
                    l2_norm
        # training_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss_mean)

        ### gradient clipping
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        gradients, variables = zip(*optimizer.compute_gradients(loss_mean))
        gradients_clipped, _ = tf.clip_by_global_norm(gradients, 5.0)
        training_op = optimizer.apply_gradients(
            zip(gradients_clipped, variables))

        # Isolate the variables stored behind the scenes by the metric operation
        running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES)
        # Define initializer to initialize/reset running variables
        running_vars_initializer = tf.variables_initializer(
            var_list=running_vars)

        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())

        with tf.Session() as sess:
            init.run()

            max_loss_val = None

            num_total_batches = int(
                np.ceil(train_data.num_instances / self.batch_size))
            for epoch in range(1, self.num_epochs + 1):
                sess.run(running_vars_initializer)
                # model training
                num_batch = 0
                start = nowtime()
                for time_batch, event_batch, featidx_batch, featval_batch, minhbs_natch, maxhbs_batch, max_nz_len \
                        in train_data.make_sparse_batch(self.batch_size, only_freq=ONLY_FREQ_TRAIN):

                    num_batch += 1

                    _, loss_batch, _, event_batch, time_batch, shape_batch = sess.run(
                        [
                            training_op, loss_mean, acc_update, events, times,
                            shape
                        ],
                        feed_dict={
                            'feature_indice:0': featidx_batch,
                            'feature_values:0': featval_batch,
                            'min_headerbids:0': minhbs_natch,
                            'max_headerbids:0': maxhbs_batch,
                            'times:0': time_batch,
                            'events:0': event_batch
                        })

                    # print()
                    # print('mean_hb_reg_adxwon_batch')
                    # print(mean_hb_reg_adxwon_batch)
                    # print('mean_hb_reg_adxlose_batch')
                    # print(mean_hb_reg_adxlose_batch)
                    # print('mean_batch_loss_batch')
                    # print(mean_batch_loss_batch)
                    # print("event_batch")
                    # print(event_batch)
                    # print('shape_batch')
                    # print(shape_batch)

                    if epoch == 1:
                        print(
                            "Epoch %d - Batch %d/%d: batch loss = %.4f" %
                            (epoch, num_batch, num_total_batches, loss_batch))
                        print("                         time: %.4fs" %
                              (nowtime() - start))
                        start = nowtime()

                # evaluation on training data
                eval_nodes_update = [
                    loss_update, acc_update, not_survival_proba, scale, max_hbs
                ]
                eval_nodes_metric = [running_loss, running_acc]
                print()
                print("========== Evaluation at Epoch %d ==========" % epoch)
                print('*** On Training Set:')
                (loss_train, acc_train), _, _, _, _, _ = self.evaluate(
                    train_data.make_sparse_batch(only_freq=ONLY_FREQ_TEST),
                    running_vars_initializer, sess, eval_nodes_update,
                    eval_nodes_metric, sample_weights)
                # print("TENSORFLOW:\tloss = %.6f\taccuracy = %.4f" % (loss_train, acc_train))

                # evaluation on validation data
                print('*** On Validation Set:')
                (
                    loss_val, acc_val
                ), not_survival_val, _, _, events_val, times_val = self.evaluate(
                    val_data.make_sparse_batch(only_freq=ONLY_FREQ_TEST),
                    running_vars_initializer, sess, eval_nodes_update,
                    eval_nodes_metric, sample_weights)
                # print("TENSORFLOW:\tloss = %.6f\taccuracy = %.4f" % (loss_val, acc_val))
                print("Validation C-Index = %.4f" %
                      c_index(events_val, not_survival_val, times_val))

                if max_loss_val is None or loss_val < max_loss_val:
                    print("!!! GET THE LOWEST VAL LOSS !!!")
                    max_loss_val = loss_val

                    # evaluation on test data
                    print('*** On Test Set:')
                    (
                        loss_test, acc_test
                    ), not_survival_test, scale_test, max_hbs_test, events_test, times_test = self.evaluate(
                        test_data.make_sparse_batch(only_freq=ONLY_FREQ_TEST),
                        running_vars_initializer, sess, eval_nodes_update,
                        eval_nodes_metric, sample_weights)
                    # print("TENSORFLOW:\tloss = %.6f\taccuracy = %.4f" % (loss_test, acc_test))
                    print("TEST C-Index = %.4f" %
                          c_index(events_test, not_survival_test, times_test))

                    # Store prediction results
                    with open('output/all_predictions_factorized.csv',
                              'w',
                              newline="\n") as outfile:
                        csv_writer = csv.writer(outfile)
                        csv_writer.writerow(('NOT_SURV_PROB', 'EVENTS',
                                             'MAX(RESERVE, REVENUE)', 'MAX_HB',
                                             'SCALE', 'SHAPE'))
                        sh = shape.eval()
                        for p, e, t, h, sc in zip(not_survival_test,
                                                  events_test, times_test,
                                                  max_hbs_test, scale_test):
                            csv_writer.writerow((p, e, t, h, sc, sh))
                    print('All predictions are outputted for error analysis')

                    # Store parameters
                    params = {
                        'embeddings_linear': embeddings_linear.eval(),
                        'intercept': intercept.eval(),
                        'shape': shape.eval(),
                        'distribution_name': type(self.distribution).__name__
                    }
                    if embeddings_factorized is not None:
                        params[
                            'embeddings_factorized'] = embeddings_factorized.eval(
                            ),
                    pickle.dump(params,
                                open('output/params_k%d.pkl' % self.k, 'wb'))
Beispiel #36
0
    def get_output(self, train=False):
        X = self.get_input(train)  # 0,0,0,1,2,3,4

        mask = self.get_input_mask(train)  # 0,0,0,1,1,1,1

        # X_rev = reverse(X)
        X_rev = K.permute_dimensions(X, (1, 0, 2))
        X_rev = X_rev[::-1]
        X_rev = K.permute_dimensions(X_rev, (1, 0, 2))  # 4,3,2,1,0,0,0

        Y = self.forward(X, mask)  # 0,0,0,1,3,6,10

        Y_rev = None

        if mask:

            if K._BACKEND == 'theano':
                #convert right padding to left padding by rolling
                shifts = K.sum(mask, axis=1)
                import theano
                X_rev, _ = theano.scan(
                    lambda x, i: theano.tensor.roll(x, -i, 0),
                    sequences=[X_rev, shifts])  # 0,0,0,4,3,2,1

                #Get reverse output
                Y_rev = self.reverse(
                    X_rev, mask
                )  # 0,0,0,4,7,9,10 or just 10 if return_sequences = False

                if self.return_sequences:

                    #Fix allignment :
                    # When return_sequence = True, outputs corresponding to the same input should be merged.

                    # Reverse Y_rev.
                    # Note : On reversing left padding will be converted to right padding.
                    Y_rev = K.permute_dimensions((1, 0, 2))
                    Y_rev = Y_rev[::-1]
                    Y_rev = K.permute_dimensions((1, 0, 2))  # 10,9,7,4,0,0,0

                    #Convert right padding back to to left padding
                    Y_rev, _ = theano.scan(
                        lambda x, i: theano.tensor.roll(x, -i, 0),
                        sequences=[Y_rev, shifts])  # 0,0,0,10,9,7,4
            else:

                import tensorflow as tf

                # mask_rev = reverse(mask)
                mask_rev = K.permute_dimensions(mask, (1, 0))
                mask_rev = mask_rev[::-1]
                mask_rev = K.permute_dimensions(mask_rev,
                                                (1, 0))  # 1,1,1,1,0,0,0

                # X_rev = 4,3,2,1,0,0,0
                # Get reverse output:
                Y_rev = self.reverse(
                    X_rev, mask_rev)  # 4,7,9,10,g,g,g  (g = Garbage value)

                # Reverse Y_rev
                Y_rev = K.permute_dimensions(Y_rev, (1, 0, 2))
                Y_rev = Y_rev[::-1]
                Y_rev = K.permute_dimensions(Y_rev,
                                             (1, 0, 2))  # g,g,g,10,9,7,4

                # Trim off garbage values
                [garbage,
                 Y_rev] = tf.dynamic_partition(Y_rev, mask,
                                               2)  # [g,g,g] [10,9,7,4]

                if self.return_sequences:
                    #pad left
                    zeros = K.zeros_like(garbage)  # 0,0,0
                    Y_rev = K.concatenate([zeros, Y_rev],
                                          axis=1)  # 0,0,0,10,9,7,4
                else:
                    Y_rev = Y_rev[:, 0]  # 10

        else:

            self.reverse.return_sequences = self.return_sequences
            Y_rev = self.reverse(X_rev)
            if self.return_sequences:
                Y_rev = K.permute_dimensions(Y_rev, (1, 0, 2))
                Y_rev = Y_rev[::-1]
                Y_rev = K.permute_dimensions(Y_rev, (1, 0, 2))
            if K._BACKEND != 'theano':
                self.revere.return_sequences = True

        if self.merge_mode == 'concat':
            return K.concatenate([Y, Y_rev])
        elif self.merge_mode == 'sum':
            return Y + Y_rev
        elif self.merge_mode == 'ave':
            return (Y + Y_rev) / 2
        elif self.merge_mode == 'mul':
            return Y * Y_rev
Beispiel #37
0
  def testNeuralLinUCBUpdateNumTrainSteps0(self, batch_size=1, context_dim=10):
    """Check NeuralLinUCBAgent updates when behaving like LinUCB."""

    # Construct a `Trajectory` for the given action, observation, reward.
    num_actions = 5
    initial_step, final_step = _get_initial_and_final_steps(
        batch_size, context_dim)
    action = np.random.randint(num_actions, size=batch_size, dtype=np.int32)
    action_step = _get_action_step(action)
    experience = _get_experience(initial_step, action_step, final_step)

    # Construct an agent and perform the update.
    observation_spec = tensor_spec.TensorSpec([context_dim], tf.float32)
    time_step_spec = time_step.time_step_spec(observation_spec)
    action_spec = tensor_spec.BoundedTensorSpec(
        dtype=tf.int32, shape=(), minimum=0, maximum=num_actions - 1)
    encoder = DummyNet(observation_spec)
    encoding_dim = 10
    agent = neural_linucb_agent.NeuralLinUCBAgent(
        time_step_spec=time_step_spec,
        action_spec=action_spec,
        encoding_network=encoder,
        encoding_network_num_train_steps=0,
        encoding_dim=encoding_dim,
        optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=1e-2))

    loss_info = agent.train(experience)
    self.evaluate(agent.initialize())
    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.evaluate(loss_info)
    final_a = self.evaluate(agent.cov_matrix)
    final_b = self.evaluate(agent.data_vector)

    # Compute the expected updated estimates.
    observations_list = tf.dynamic_partition(
        data=tf.reshape(tf.cast(experience.observation, tf.float64),
                        [batch_size, context_dim]),
        partitions=tf.convert_to_tensor(action),
        num_partitions=num_actions)
    rewards_list = tf.dynamic_partition(
        data=tf.reshape(tf.cast(experience.reward, tf.float64), [batch_size]),
        partitions=tf.convert_to_tensor(action),
        num_partitions=num_actions)
    expected_a_updated_list = []
    expected_b_updated_list = []
    for _, (observations_for_arm, rewards_for_arm) in enumerate(zip(
        observations_list, rewards_list)):

      encoded_observations_for_arm, _ = encoder(observations_for_arm)
      encoded_observations_for_arm = tf.cast(
          encoded_observations_for_arm, dtype=tf.float64)

      num_samples_for_arm_current = tf.cast(
          tf.shape(rewards_for_arm)[0], tf.float64)
      num_samples_for_arm_total = num_samples_for_arm_current

      # pylint: disable=cell-var-from-loop
      def true_fn():
        a_new = tf.matmul(
            encoded_observations_for_arm,
            encoded_observations_for_arm,
            transpose_a=True)
        b_new = bandit_utils.sum_reward_weighted_observations(
            rewards_for_arm, encoded_observations_for_arm)
        return a_new, b_new
      def false_fn():
        return (tf.zeros([encoding_dim, encoding_dim], dtype=tf.float64),
                tf.zeros([encoding_dim], dtype=tf.float64))
      a_new, b_new = tf.cond(
          tf.squeeze(num_samples_for_arm_total) > 0,
          true_fn,
          false_fn)

      expected_a_updated_list.append(self.evaluate(a_new))
      expected_b_updated_list.append(self.evaluate(b_new))

    # Check that the actual updated estimates match the expectations.
    self.assertAllClose(expected_a_updated_list, final_a)
    self.assertAllClose(expected_b_updated_list, final_b)
dataset_path = "../data/"
HEIGHT = 300
WIDTH = 300
CHANNEL = 3
DIMENSIONS = HEIGHT * WIDTH * CHANNEL

all_filepaths = [dataset_path + fp for fp in listdir(dataset_path)]

all_images = ops.convert_to_tensor(all_filepaths, dtype=dtypes.string)

test_set_size = int(0.2 * len(all_filepaths))
paritions = [0] * len(all_filepaths)
paritions[:test_set_size] = [1] * test_set_size
random.shuffle(paritions)

train_images, test_images = tf.dynamic_partition(all_images, paritions, 2)

train_input_queue = tf.train.slice_input_producer([train_images],
                                                  shuffle=False)
test_input_queue = tf.train.slice_input_producer([test_images], shuffle=False)

file_content = tf.read_file(train_input_queue[0])
train_image = tf.image.decode_png(file_content, channels=CHANNEL)

file_content = tf.read_file(train_input_queue[0])
test_image = tf.image.decode_png(file_content, channels=CHANNEL)

train_image = tf.reshape(train_image, [DIMENSIONS, 1])
test_image = tf.reshape(test_image, [DIMENSIONS, 1])

BATCH_SIZE = 100
# test_filepaths = [FLAGS.dataset_path + fp for fp in test_filepaths]

# 整合
# all_filepaths = train_filepaths + test_filepaths
# all_labels = train_labels + test_labels

all_images = ops.convert_to_tensor(all_filepaths, dtype=dtypes.string)
all_labels = ops.convert_to_tensor(all_labels, dtype=dtypes.int32)

# 创建自定义随机分片
partitions = [0] * len(all_filepaths)
TEST_SET_SIZE = int(FLAGS.TEST_DATASET_RATE * len(all_filepaths))
partitions[:TEST_SET_SIZE] = [1] * TEST_SET_SIZE
random.shuffle(partitions)

train_images, test_images = tf.dynamic_partition(all_images, partitions, 2)
train_labels, test_labels = tf.dynamic_partition(all_labels, partitions, 2)

# 创建输入队列
train_input_queue = tf.train.slice_input_producer([train_images, train_labels],
                                                  shuffle=True)
test_input_queue = tf.train.slice_input_producer([test_images, test_labels],
                                                 shuffle=True)

# 读图并依据网络定义要求处理图
file_content = tf.read_file(train_input_queue[0])
train_image = tf.image.decode_jpeg(file_content, channels=FLAGS.NUM_CHANNELS)
train_image = inception_preprocessing.preprocess_image(train_image,
                                                       FLAGS.NET_IMAGE_SIZE_H,
                                                       FLAGS.NET_IMAGE_SIZE_W,
                                                       is_training=False)
Beispiel #40
0
    def prediction(self):

        # embeddings = tf.Variable(tf.random_uniform([self.vocabulary_size, 200], -1.0, 1.0))
        embeddings = tf.constant(self.embs, tf.float32)
        embed = tf.nn.embedding_lookup(embeddings, self.words)

        # dis_embeddings = tf.Variable(tf.random_uniform([self.dis_voc, self.dis_embed_size], -1.0, 1.0))
        # dis_embed = tf.nn.embedding_lookup(dis_embeddings, self.dis)

        # pos_embeddings = tf.Variable(tf.random_uniform([self.pos_voc, self.pos_embed_size], -1.0, 1.0))
        # pos_embed = tf.nn.embedding_lookup(pos_embeddings, self.pos_tags)

        # print(self.words.get_shape())
        # print(pos_embed.get_shape())
        # print(dis_embed.get_shape())

        # last = tf.concat([self.words , pos_embed , dis_embed], 2)
        last = embed
        print(last.get_shape())
        last_expanded = tf.expand_dims(last, -1)

        print(last_expanded.get_shape())
        # emb_size = 200 + self.pos_embed_size + self.dis_embed_size
        emb_size = 200
        # self.count = (self.count + 1)%353
        # print(self.count)

        pooled_outputs = []
        for i, filter_size in enumerate(self.filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):

                filter_shape = [
                    filter_size, self.word_embed_size, 1, self.num_filters
                ]

                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]),
                                name="b")

                conv = tf.nn.conv2d(last_expanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                print(h.get_shape())

                hnew = tf.reshape(
                    h,
                    [-1, self.max_length - filter_size + 1, self.num_filters])
                hnew = tf.transpose(hnew, [1, 0, 2])
                print(hnew.get_shape())
                split = tf.dynamic_partition(hnew, self.partitions[i], 2)
                print(split[0].get_shape())
                [split0,
                 split1] = [tf.transpose(sp, [1, 0, 2]) for sp in split]

                pool1 = tf.reduce_max(split0, 1)
                pool2 = tf.reduce_max(split1, 1)
                # pool3 = tf.reduce_max(split2, 1)
                print(pool2.get_shape())
                pooled = tf.stack([pool1, pool2], 1)
                # print(pooled.get_shape())
                pooled_outputs.append(pooled)
                # print(p)

        num_filters_total = self.num_filters * len(self.filter_sizes) * 2

        h_pool = tf.concat(pooled_outputs, 2)
        # print(h_pool.get_shape())
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
        print(h_pool_flat.get_shape())
        # h_pool_flat = h_pool

        out_size = self.out_size

        with tf.variable_scope('first_layer_weights'):
            weight = tf.Variable(
                tf.truncated_normal([num_filters_total, 100], stddev=0.1))

        with tf.variable_scope('first_layer_Bias'):
            bias = tf.Variable(tf.constant(0.1, shape=[100]))

        hidden = tf.nn.relu(tf.matmul(h_pool_flat, weight) + bias)

        with tf.variable_scope('second_layer_weights'):
            weight2 = tf.Variable(
                tf.truncated_normal([100, out_size], stddev=0.1))

        with tf.variable_scope('Bias'):
            bias2 = tf.Variable(tf.constant(0.1, shape=[out_size]))

        self.prediction = tf.sigmoid(tf.matmul(hidden, weight2) + bias2)
        # print self.prediction.shape
        return self.prediction
Beispiel #41
0
def watch_movie(story, mem, l):
    mask = tf.sequence_mask(l, tf.shape(story)[0], dtype=tf.int32)
    _, clips = tf.dynamic_partition(story, mask, 2)
Beispiel #42
0
def deeplabv3_model_fn(features, labels, mode, params):
    """Model function for PASCAL VOC."""
    images = tf.cast(tf.map_fn(preprocessing.mean_image_addition, features),
                     tf.uint8)

    network = deeplab_v3_generator(params['num_classes'],
                                   params['output_stride'],
                                   params['base_architecture'],
                                   params['pre_trained_model'],
                                   params['batch_norm_decay'])

    logits = network(features, mode == tf.estimator.ModeKeys.TRAIN)

    pred_classes = tf.expand_dims(tf.argmax(logits,
                                            axis=3,
                                            output_type=tf.int32),
                                  axis=3)

    pred_decoded_labels = tf.py_func(
        preprocessing.decode_labels,
        [pred_classes, params['batch_size'], params['num_classes']], tf.uint8)

    predictions = {
        'classes': pred_classes,
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor'),
        'decoded_labels': pred_decoded_labels
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    gt_decoded_labels = tf.py_func(
        preprocessing.decode_labels,
        [labels, params['batch_size'], params['num_classes']], tf.uint8)

    labels = tf.squeeze(labels, axis=3)  # reduce the channel dimension.

    logits_by_num_classes = tf.reshape(logits, [-1, params['num_classes']])
    labels_flat = tf.reshape(labels, [
        -1,
    ])

    valid_indices = tf.to_int32(labels_flat <= params['num_classes'] - 1)
    valid_logits = tf.dynamic_partition(logits_by_num_classes,
                                        valid_indices,
                                        num_partitions=2)[1]
    valid_labels = tf.dynamic_partition(labels_flat,
                                        valid_indices,
                                        num_partitions=2)[1]

    preds_flat = tf.reshape(pred_classes, [
        -1,
    ])
    valid_preds = tf.dynamic_partition(preds_flat,
                                       valid_indices,
                                       num_partitions=2)[1]
    confusion_matrix = tf.confusion_matrix(valid_labels,
                                           valid_preds,
                                           num_classes=params['num_classes'])

    predictions['valid_preds'] = valid_preds
    predictions['valid_labels'] = valid_labels
    predictions['confusion_matrix'] = confusion_matrix

    cross_entropy = tf.losses.sparse_softmax_cross_entropy(logits=valid_logits,
                                                           labels=valid_labels)

    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy')
    tf.summary.scalar('cross_entropy', cross_entropy)

    if not params['freeze_batch_norm']:
        train_var_list = [v for v in tf.trainable_variables()]
    else:
        train_var_list = [
            v for v in tf.trainable_variables()
            if 'beta' not in v.name and 'gamma' not in v.name
        ]

    # Add weight decay to the loss.
    with tf.variable_scope("total_loss"):
        loss = cross_entropy + params.get(
            'weight_decay', _WEIGHT_DECAY) * tf.add_n(
                [tf.nn.l2_loss(v) for v in train_var_list])
    # loss = tf.losses.get_total_loss()  # obtain the regularization losses as well

    if mode == tf.estimator.ModeKeys.TRAIN:
        tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images, gt_decoded_labels, pred_decoded_labels]),
            max_outputs=params['tensorboard_images_max_outputs']
        )  # Concatenate row-wise.

        global_step = tf.train.get_or_create_global_step()

        if params['learning_rate_policy'] == 'piecewise':
            # Scale the learning rate linearly with the batch size. When the batch size
            # is 128, the learning rate should be 0.1.
            initial_learning_rate = 0.1 * params['batch_size'] / 128
            batches_per_epoch = params['num_train'] / params['batch_size']
            # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
            boundaries = [
                int(batches_per_epoch * epoch) for epoch in [100, 150, 200]
            ]
            values = [
                initial_learning_rate * decay
                for decay in [1, 0.1, 0.01, 0.001]
            ]
            learning_rate = tf.train.piecewise_constant(
                tf.cast(global_step, tf.int32), boundaries, values)
        elif params['learning_rate_policy'] == 'poly':
            learning_rate = tf.train.polynomial_decay(
                params['initial_learning_rate'],
                tf.cast(global_step, tf.int32) - params['initial_global_step'],
                params['max_iter'],
                params['end_learning_rate'],
                power=params['power'])
        else:
            raise ValueError(
                'Learning rate policy must be "piecewise" or "poly"')

        # Create a tensor named learning_rate for logging purposes
        tf.identity(learning_rate, name='learning_rate')
        tf.summary.scalar('learning_rate', learning_rate)

        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=params['momentum'])

        # Batch norm requires update ops to be added as a dependency to the train_op
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss,
                                          global_step,
                                          var_list=train_var_list)
    else:
        train_op = None

    accuracy = tf.metrics.accuracy(valid_labels, valid_preds)
    mean_iou = tf.metrics.mean_iou(valid_labels, valid_preds,
                                   params['num_classes'])
    metrics = {'px_accuracy': accuracy, 'mean_iou': mean_iou}

    # Create a tensor named train_accuracy for logging purposes
    tf.identity(accuracy[1], name='train_px_accuracy')
    tf.summary.scalar('train_px_accuracy', accuracy[1])

    def compute_mean_iou(total_cm, name='mean_iou'):
        """Compute the mean intersection-over-union via the confusion matrix."""
        sum_over_row = tf.to_float(tf.reduce_sum(total_cm, 0))
        sum_over_col = tf.to_float(tf.reduce_sum(total_cm, 1))
        cm_diag = tf.to_float(tf.diag_part(total_cm))
        denominator = sum_over_row + sum_over_col - cm_diag

        # The mean is only computed over classes that appear in the
        # label or prediction tensor. If the denominator is 0, we need to
        # ignore the class.
        num_valid_entries = tf.reduce_sum(
            tf.cast(tf.not_equal(denominator, 0), dtype=tf.float32))

        # If the value of the denominator is 0, set it to 1 to avoid
        # zero division.
        denominator = tf.where(tf.greater(denominator, 0), denominator,
                               tf.ones_like(denominator))
        iou = tf.div(cm_diag, denominator)

        for i in range(params['num_classes']):
            tf.identity(iou[i], name='train_iou_class{}'.format(i))
            tf.summary.scalar('train_iou_class{}'.format(i), iou[i])

        # If the number of valid entries is 0 (no classes) we return 0.
        result = tf.where(tf.greater(num_valid_entries, 0),
                          tf.reduce_sum(iou, name=name) / num_valid_entries, 0)
        return result

    train_mean_iou = compute_mean_iou(mean_iou[1])

    tf.identity(train_mean_iou, name='train_mean_iou')
    tf.summary.scalar('train_mean_iou', train_mean_iou)

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=metrics)
Beispiel #43
0
def z_proposal_GMM_factorized(X, samples_s, miss_list, batch_size, z_dim,
                              reuse):
    mean_qz = []
    log_var_qz = []

    for i, d in enumerate(X):
        # Partition the data in missing data (0) and observed data n(1)
        missing_data, observed_data = tf.dynamic_partition(d,
                                                           miss_list[:, i],
                                                           num_partitions=2)
        missing_s, observed_s = tf.dynamic_partition(samples_s,
                                                     miss_list[:, i],
                                                     num_partitions=2)
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(d)[0]),
                                                 miss_list[:, i],
                                                 num_partitions=2)

        # Get the dimensions of the observed data
        nObs = tf.shape(observed_data)[0]

        # Mean layer
        aux_m = tf.layers.dense(
            inputs=tf.concat([observed_data, observed_s], 1),
            units=z_dim,
            activation=None,
            kernel_initializer=tf.random_normal_initializer(stddev=0.05),
            name='layer_1_' + 'mean_enc_z' + str(i),
            reuse=reuse)
        # Reconstruct means with zeros (so they don't affect the mean_joint)
        aux_mean_qz = tf.dynamic_stitch(
            condition_indices,
            [tf.zeros([batch_size - nObs, z_dim], dtype=tf.float32), aux_m])

        # Logvar layers
        aux_lv = tf.layers.dense(
            inputs=tf.concat([observed_data, observed_s], 1),
            units=z_dim,
            activation=None,
            kernel_initializer=tf.random_normal_initializer(stddev=0.05),
            name='layer_1_' + 'logvar_enc_z' + str(i),
            reuse=reuse)
        # Set a high value to make the variance in the missing cases negligible
        aux_log_var_qz = tf.dynamic_stitch(
            condition_indices,
            [tf.fill([batch_size - nObs, z_dim], 15.0), aux_lv])

        mean_qz.append(aux_mean_qz)
        log_var_qz.append(aux_log_var_qz)

    # Input prior
    log_var_qz.append(tf.zeros([batch_size, z_dim]))
    mean_qz.append(tf.zeros([batch_size, z_dim]))
    # Compute full parameters, as a product of Gaussians distribution
    log_var_qz_joint = -tf.reduce_logsumexp(tf.negative(log_var_qz), 0)
    mean_qz_joint = tf.multiply(
        tf.exp(log_var_qz_joint),
        tf.reduce_sum(tf.multiply(mean_qz, tf.exp(tf.negative(log_var_qz))),
                      0))

    # Avoid numerical problems
    log_var_qz = tf.clip_by_value(log_var_qz, -15.0, 15.0)
    # Rep-trick
    eps = tf.random.normal((batch_size, z_dim), 0, 1, dtype=tf.float32)
    samples_z = mean_qz_joint + tf.multiply(tf.exp(log_var_qz_joint / 2), eps)

    return samples_z, [mean_qz_joint, log_var_qz_joint]
Beispiel #44
0
def gmm(data, numClusters):
    # Number of iterations
    iterations = 400

    # Number of data points
    N = data.shape[0]

    # Size of dimension
    d = data.shape[1]

    # Number of clusters/centroids
    K = numClusters

    ### Build Graph ###
    # Create placeholder for data points
    X = tf.placeholder(dtype=tf.float32, shape=(N, d), name="X")
    # Initialize centre of clusters with sampling from standard normal distribution
    MU = tf.Variable(initial_value=tf.random.normal(shape=[K, d],
                                                    mean=0,
                                                    stddev=math.sqrt(1),
                                                    dtype=tf.float32),
                     trainable=True,
                     name="MU")
    # Initialize sigma with sampling from standard normal distribution
    sigma = tf.Variable(initial_value=tf.random_normal(shape=[K, 1],
                                                       mean=0,
                                                       stddev=math.sqrt(1)),
                        trainable=True)
    # pass sigma through exp() to avoid constraints
    sigma = tf.math.exp(sigma)
    # Initialize log_pi with sampling from standard normal distribution
    log_pi = tf.Variable(initial_value=tf.random.normal(shape=[K, 1],
                                                        mean=0,
                                                        stddev=math.sqrt(1)),
                         trainable=True)
    # pass log_pi through logsoftmax to avoid contraints
    log_pi = logsoftmax(log_pi)

    # calculate log probability: P(x,z)
    log_PDF = log_GaussPDF(X, MU, sigma)

    # Calculate loss: L = - logsumexp(log_PDF, log_pi)
    loss = -1 * tf.reduce_sum(reduce_logsumexp(log_PDF + tf.squeeze(log_pi)))

    # Adam Optimizer
    opt = tf.train.AdamOptimizer(learning_rate=0.1,
                                 beta1=0.9,
                                 beta2=0.99,
                                 epsilon=1e-5).minimize(loss)

    # Assign the index of the maximum probability cluster to each point in X
    assign_to_cluster = tf.math.argmax(log_posterior(log_PDF, log_pi),
                                       axis=1,
                                       output_type=tf.int32)

    # Transform data set by splitting into groups (for output)
    output = tf.dynamic_partition(X,
                                  assign_to_cluster,
                                  num_partitions=numClusters)

    # Initialize Tensorflow variables
    init = tf.global_variables_initializer()

    loss_history = []
    clustered = None
    with tf.Session() as sess:
        sess.run(init)
        # Training loop
        for step in range(iterations):
            _MU, _sigma, _log_pi, _loss, _opt = sess.run(
                [MU, sigma, log_pi, loss, opt], feed_dict={X: data})
            loss_history.append(_loss)

        # get trained parameters
        trained_centroids = MU.eval()
        trained_log_pi = log_pi.eval()
        trained_sigma = sigma.eval()

        # Assign each point to cluster based on distance to closest cluster centre
        clustered = sess.run(output, feed_dict={X: data})

    return clustered, trained_centroids, trained_log_pi, trained_sigma, loss_history
Beispiel #45
0
    def __init__(self):
        super(Model, self).__init__()

        with tf.name_scope('input'):
            images_initializer = tf.placeholder(dtype=tf.string,
                                                shape=[DATA_SET_SIZE])
            heatmaps_initializer = tf.placeholder(dtype=tf.string,
                                                  shape=[DATA_SET_SIZE])

            def decode(image):
                return tf.image.decode_jpeg(image, ratio=2)

            images_before_resizing = tf.map_fn(decode,
                                               images_initializer,
                                               dtype=tf.uint8)
            heatmaps_before_resizing = tf.map_fn(decode,
                                                 heatmaps_initializer,
                                                 dtype=tf.uint8)

            images = tf.image.resize_images(images_before_resizing,
                                            [IMAGE_SIZE, IMAGE_SIZE])
            heatmaps = tf.image.resize_images(heatmaps_before_resizing,
                                              [IMAGE_SIZE, IMAGE_SIZE])

            partitions = create_partition_vector()

            train_images_value, validate_images_value = tf.dynamic_partition(
                images, partitions, 2)
            train_heatmaps_value, validate_heatmaps_value = tf.dynamic_partition(
                heatmaps, partitions, 2)

            def data_var(init):
                return tf.Variable(init, trainable=False, validate_shape=False)

            train_images = data_var(train_images_value)
            train_heatmaps = data_var(train_heatmaps_value)
            validate_images = data_var(validate_images_value)
            validate_heatmaps = data_var(validate_heatmaps_value)

            train_images.set_shape(
                [TRAIN_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])
            train_heatmaps.set_shape(
                [TRAIN_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])
            validate_images.set_shape(
                [VALIDATION_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])
            validate_heatmaps.set_shape(
                [VALIDATION_SET_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])

            validate_images_augmented = augment_many(validate_images)

        def initialize_images(sess, images, heatmaps):
            # images_vars = [train_images, train_heatmaps, validate_images, validate_heatmaps]
            # sess.run(
            #     [var.initializer for var in images_vars],
            #     feed_dict={images_initializer: images, heatmaps_initializer: heatmaps})
            sess.run(tf.global_variables_initializer(),
                     feed_dict={
                         images_initializer: images,
                         heatmaps_initializer: heatmaps
                     })

        self.initialize_images = initialize_images

        with tf.name_scope('batch'):
            batch_start = tf.placeholder(tf.int32, shape=[])
            batch_images = tf.slice(
                train_images, [batch_start, 0, 0, 0],
                [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])
            batch_heatmaps = tf.slice(
                train_heatmaps, [batch_start, 0, 0, 0],
                [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS])

            augmented_batch_images = augment_many(batch_images)
            augmented_batch_heatmaps = augment_many(batch_heatmaps)

        pred = conv_net(augmented_batch_images)
        ground_truth = tf.div(augmented_batch_heatmaps, 256)

        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred,
                                                    labels=ground_truth))
        optimizer = tf.train.AdamOptimizer(
            learning_rate=LEARNING_RATE).minimize(cost)

        correct_pred = tf.equal(tf.argmax(pred, 3), tf.argmax(ground_truth, 3))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        def train_on_batch(sess, batch_begin):
            sess.run(optimizer, feed_dict={batch_start: batch_begin})
            return sess.run([cost, accuracy],
                            feed_dict={batch_start: batch_begin})

        self.train_on_batch = train_on_batch

        # validation

        tf.get_variable_scope().reuse_variables()

        validation_pred = conv_net(validate_images_augmented)
        validation_pred = tf.reshape(validation_pred, [
            -1, IMAGE_TRANSFORMATION_NUMBER, IMAGE_SIZE, IMAGE_SIZE, CHANNELS
        ])
        validation_pred = tf.map_fn(gather_transformations, validation_pred)
        validation_ground_truth = tf.div(validate_heatmaps, 256)
        validation_cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=validation_pred, labels=validation_ground_truth))
        validation_correct_pred = tf.equal(
            tf.argmax(validation_pred, 3), tf.argmax(validation_ground_truth,
                                                     3))
        validation_accuracy = tf.reduce_mean(
            tf.cast(validation_correct_pred, tf.float32))

        def validate(sess):
            loss, acc = sess.run([validation_cost, validation_accuracy])
            print("Validation loss %g" % loss)
            print("Validation accuracy %g" % acc)

        self.validate = validate
Beispiel #46
0
def kmeans(data, numClusters):
    # Number of iterations
    iterations = 200

    # Number of data points
    N = data.shape[0]

    # Size of dimension
    d = data.shape[1]

    # Number of clusters/centroids
    K = numClusters

    ### Build Graph ###
    # Create placeholder for data points
    X = tf.placeholder(dtype=tf.float32, shape=(N, d), name="X")
    # Initialize centre of clusters with standard normal distribution
    MU = tf.Variable(initial_value=tf.random.normal(shape=[K, d],
                                                    mean=0,
                                                    stddev=math.sqrt(1),
                                                    dtype=tf.float32),
                     trainable=True,
                     name="MU")

    # Calculate distance of each point to each cluster centre
    distances = distanceFunc(X, MU)

    # Calculate loss: L(MU) = sigma(n=1 to N) min(k=1 to K) ||X-MU||^2
    loss = tf.math.reduce_sum(tf.math.reduce_min(distances, axis=1),
                              name="loss")

    # Adam Optimizer
    opt = tf.train.AdamOptimizer(learning_rate=0.1,
                                 beta1=0.9,
                                 beta2=0.99,
                                 epsilon=1e-5).minimize(loss)

    # Assign the index of the minimum distance centroid to each point in X
    assign_to_cluster = tf.math.argmin(distances, axis=1, output_type=tf.int32)

    # Transform data set by splitting into groups (for output)
    output = tf.dynamic_partition(X,
                                  assign_to_cluster,
                                  num_partitions=numClusters)

    # Initialize Tensorflow variables
    init = tf.global_variables_initializer()

    loss_history = []
    clustered = None
    with tf.Session() as sess:
        sess.run(init)
        # Training loop
        for step in range(iterations):
            _MU, _loss, _opt = sess.run([MU, loss, opt], feed_dict={X: data})
            loss_history.append(_loss)

        # get trained centroids
        trained_centroids = MU.eval()

        # Assign each point to cluster based on distance to closest cluster centre
        clustered = sess.run(output, feed_dict={X: data})

    return clustered, trained_centroids, loss_history
Beispiel #47
0
def image_classifier(input_tensor, label_tensor, is_training, FLAGS):
    return_dict = {}
    global_step = tf.Variable(0, name='global_step', trainable=False)
    return_dict["global_step"] = global_step

    is_bad_file = tf.cast(tf.equal(label_tensor, -1), tf.int32)
    filtered_imgs_tensor = tf.dynamic_partition(input_tensor, is_bad_file, 2)[0]
    filtered_label_tensor = tf.dynamic_partition(label_tensor, is_bad_file, 2)[0]
    with slim.arg_scope(FLAGS["argscope"]):
        logits, end_points = modelFn(filtered_imgs_tensor,
                                     num_classes=FLAGS["class_count"],
                                     is_training=is_training,
                                     reuse=not is_training,
                                     dropout_keep_prob=FLAGS["dropout_rate"])
    onehot_tensor = tf.one_hot(filtered_label_tensor, FLAGS["class_count"])
    prediction = tf.argmax(logits, 1)
    return_dict["prediction"] = prediction
    return_dict["softmax"] = end_points["Predictions"]
    return_dict["onehot_labels"] = onehot_tensor
    with tf.name_scope('evaluation'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.cast(tf.equal(prediction, tf.argmax(onehot_tensor, 1)), tf.int32)
            return_dict["group_sample_number"] = tf.reduce_sum(onehot_tensor, 0, keepdims=True)
            group_correct_prediction = tf.reduce_sum(tf.one_hot(prediction, FLAGS["class_count"]) * onehot_tensor, 0)
            return_dict["group_correct_prediction"] = group_correct_prediction
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_sum(correct_prediction) / tf.shape(prediction)[0]
            # print(accuracy)
            return_dict["accuracy"] = accuracy
            tf.summary.scalar('accuracy', accuracy)
    if is_training:
        if 'AuxLogits' in end_points:
            tf.losses.softmax_cross_entropy(onehot_tensor, end_points['AuxLogits'], weights=np.exp(1) * 0.1,
                                            scope='aux_loss')
        tf.losses.softmax_cross_entropy(onehot_tensor, logits, weights=np.exp(2) * 0.1)
        Logits_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                             scope="{}/Logits".format(FLAGS["modelScope"]))
        Logits_weights = list(filter(lambda x: x.name.find("weight") != -1, Logits_variables))
        # regularizer = tf.nn.l2_loss(Logits_weights)
        regularizer = tf.reduce_sum(tf.log(1 + tf.square(Logits_weights)))
        tf.losses.add_loss(regularizer * 0.001)

        # trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=fully_connected_layer_name)
        # FCL_weights_tensor_list = list(filter(lambda x: x.name.find("weight") != -1, trainable_variables))
        # regularizer = tf.add_n([tf.nn.l2_loss(w) for w in FCL_weights_tensor_list]) * 0.00001 * np.exp(1)
        # tf.losses.add_loss(regularizer)
        # tf.losses.add_loss(loss_AE * 0.00001 * np.exp(2))
        makeLog("losses weights\t{}\t{}\t{}".format(np.exp(1) * 0.1, np.exp(2) * 0.1, 0.001))

        total_loss = tf.losses.get_total_loss()
        return_dict["total_loss"] = total_loss
        tf.summary.scalar('total_loss', total_loss)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=FLAGS["learning_rate"],
                                              momentum=0.9,
                                              decay=0.9,
                                              epsilon=1.0)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies([tf.group(*update_ops)]):
            train_op = optimizer.minimize(loss=total_loss, global_step=global_step)
        return_dict["train_op"] = train_op
    else:
        output_tensor = tf.nn.softmax(logits)
        return_dict["output_tensor_name"] = output_tensor.name.split(":")[0]
        makeLog("output_tensor_name: {}".format(output_tensor.name.split(":")[0]))
        return_dict["input_tensor"] = input_tensor
        return_dict["output_tensor"] = output_tensor
    return return_dict
Beispiel #48
0
    def loss_crf_scan(self, _, current_input):
        """
        Scan function for log likelihood computation
        :param _: previous output
        :param current_input: current tensor line
        :return: sequence log likelihood
        """

        # TILING

        # Create tiling for "start" and "end" scores
        tile = tf.tile(tf.constant(-10000.0, shape=[1, 2], dtype=tf.float32),
                       [tf.shape(current_input[0])[0], 1])

        # Add two scores for each token in each sequence
        tiled_tensor = tf.concat([current_input[0], tile], 1)

        # -----------------------------------------------------------
        # ADDING START TOKEN

        cur_nb_class = current_input[0].get_shape().as_list()[1]

        # Create start and end token unary scores
        start_unary_scores = [[-10000.0] * cur_nb_class + [0.0, -10000.0]]
        end_unary_tensor = [[-10000.0] * cur_nb_class + [-10000.0, 0.0]]

        # Concatenate start unary scores to the tiled vector
        tensor_start = tf.concat([start_unary_scores, tiled_tensor], 0)

        # -----------------------------------------------------------
        # ADDING END TOKEN

        # Creating mask to fetch elements of the sequence
        mask = tf.sequence_mask(
            (tf.cast(tf.reshape(current_input[1], [-1]), dtype=tf.int32) + 1) *
            tf.shape(tensor_start)[1],
            tf.shape(tensor_start)[1] * tf.shape(tensor_start)[0],
            dtype=tf.int32)

        # Flattening unary scores and partitioning
        unary_scores_reshaped = tf.reshape(tensor_start, [1, -1])
        slices = tf.dynamic_partition(unary_scores_reshaped, mask, 2)

        # Reshaping slice one
        slice_1 = tf.reshape(slices[1], [-1, tf.shape(tensor_start)[1]])

        # Concatenating and reshaping
        tensor_start_end = tf.concat([slice_1, end_unary_tensor], 0)
        tensor_start_end_reshaped = tf.reshape(
            tensor_start_end,
            [1,
             tf.shape(tensor_start_end)[0],
             tf.shape(tensor_start_end)[1]])

        # Setting shape to tensor
        tensor_start_end_reshaped.set_shape([1, None, cur_nb_class + 2])

        # -----------------------------------------------------------
        # ADDING START AND END LABELS

        # Creating mask for target
        mask_y = tf.sequence_mask(
            (tf.cast(tf.reshape(current_input[1], [-1]), dtype=tf.int32)),
            tf.shape(current_input[0])[0],
            dtype=tf.int32)

        # Flattening label tensor and partitioning
        y_reshaped = tf.reshape(current_input[2], [1, -1])
        slices_y = tf.dynamic_partition(y_reshaped, mask_y, 2)

        # Concatenating and reshaping
        new_y = tf.concat([[cur_nb_class], slices_y[1], [cur_nb_class + 1]],
                          axis=0)
        new_y_reshaped = tf.reshape(new_y, [1, -1])

        # -----------------------------------------------------------
        # COMPUTING LOG LIKELIHOOD

        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            tensor_start_end_reshaped,
            new_y_reshaped,
            current_input[1],
            transition_params=self.transition_params)

        return tf.reduce_sum(log_likelihood)
Beispiel #49
0
    def model(self, seq_length, img_ph, pnt_ph, aud_ph, partitions_ph, train_ph, prompts_ph, variable_scope,
              variable_scope2, var_img, var_pnt, var_aud, var_lstm, incep_reuse=True):  #
        def process_vars(seq, data_type):
            # cast inputs to the correct data type
            seq_inp = tf.cast(seq, tf.float32)
            return tf.reshape(seq_inp,
                              (self.__batch_size, -1, data_type["cmp_h"], data_type["cmp_w"], data_type["num_c"]))

        def convolve_data_inception(input_data, val, n, dtype):
            data = tf.reshape(input_data, [-1, 299, 299, 3])
            logits, end_points = inception_resnet_v2(data,
                                                     num_classes=output_sizes[-1] * output_sizes[-1] * layer_elements[
                                                         -2], is_training=False, reuse=incep_reuse)
            return logits

        def convolve_data_3layer_pnt(input_data, val, variables, n, dtype):
            def pad_tf(x, p):
                return tf.pad(x, [[0, 0], [p, p], [p, p], [0, 0]], "CONSTANT")

            def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'):
                conv = tf.nn.conv2d(sequence, W, strides=[1, stride, stride, 1], padding=padding) + b
                return tf.nn.relu(conv)

            input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]],
                                    name=n + "_inp_reshape")

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_n: ")
            input_data = pad_tf(input_data, padding_size[0])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], stride_sizes[0],
                                              layer_elements[1], output_sizes[0], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv1")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - ",
                name="conv1_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_n: ")
            input_data = pad_tf(input_data, padding_size[1])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], stride_sizes[1],
                                              layer_elements[2], output_sizes[1], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv2")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - ",
                name="conv2_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_n: ")
            input_data = pad_tf(input_data, padding_size[2])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], stride_sizes[-1],
                                              layer_elements[-2], output_sizes[-1], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv3")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - ",
                name="conv3_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out4_n: ")

            return input_data

        def convolve_data_3layer_aud(input_data, val, variables, n, dtype):
            def pad_tf(x, padding):
                return tf.pad(x, [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]], "CONSTANT")

            def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'):
                conv = tf.nn.conv2d(sequence, W, strides=[1, stride[0], stride[1], 1], padding=padding) + b
                return tf.nn.relu(conv)

            input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]],
                                    name=n + "_inp_reshape")

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_a: ")
            input_data = pad_tf(input_data, aud_padding_size[0])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], aud_stride_sizes[0],
                                              aud_layer_elements[1], aud_output_sizes[0], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv1")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - conv1_" + n,
                name="conv1_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_a: ")
            input_data = pad_tf(input_data, aud_padding_size[1])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], aud_stride_sizes[1],
                                              aud_layer_elements[2], aud_output_sizes[1], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv2")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - conv2_" + n,
                name="conv2_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_a: ")
            input_data = pad_tf(input_data, aud_padding_size[2])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], aud_stride_sizes[2],
                                              aud_layer_elements[3], aud_output_sizes[2], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv3")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - conv3_" + n,
                name="conv3_" + n
            )

            return input_data

        # pass different data types through conv networks
        inp_data = [0] * TOTAL_PARAMS
        conv_inp = [0] * TOTAL_PARAMS

        # with tf.device('/gpu:0'):
        with tf.device('/gpu:1'):
            if (self.graphbuild[0]):
                val = 0
                inp_data[val] = process_vars(img_ph, img_dtype)
                conv_inp[val] = convolve_data_inception(inp_data[val], val, "img", img_dtype)

            with variable_scope as scope:
                # with tf.device('/gpu:1'):

                if (self.graphbuild[1]):
                    val = 1
                    inp_data[val] = process_vars(pnt_ph, pnt_dtype)
                    conv_inp[val] = convolve_data_3layer_pnt(inp_data[val], val, var_pnt, "pnt", pnt_dtype)
                if (self.graphbuild[2]):
                    val = 2
                    inp_data[val] = process_vars(aud_ph, aud_dtype)
                    conv_inp[val] = convolve_data_3layer_aud(inp_data[val], val, var_aud, "aud", aud_dtype)

                # combine different inputs together
                combined_data = None
                for i in range(TOTAL_PARAMS):

                    if (self.graphbuild[i]):
                        tf.Print(conv_inp[i], [tf.shape(conv_inp[i])])
                        if (i < 2):
                            conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1,
                                                                   output_sizes[-1] * output_sizes[-1] * layer_elements[
                                                                       -2]], name="combine_reshape")
                        else:
                            # print(">>", aud_output_sizes[-1][0]*aud_output_sizes[-1][0]*aud_layer_elements[-2])
                            conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1,
                                                                   aud_output_sizes[-1][0] * aud_output_sizes[-1][0] *
                                                                   aud_layer_elements[-2]], name="combine_reshape_aud")
                        # tf.Print(conv_inp[i], [tf.shape(conv_inp[i])])
                        if (combined_data == None):
                            combined_data = conv_inp[i]
                        else:
                            combined_data = tf.concat([combined_data, conv_inp[i]], 2)

                W_lstm = var_lstm["W_lstm"]
                b_lstm = var_lstm["b_lstm"]
                W_fc = var_lstm["W_fc"]
                b_fc = var_lstm["b_fc"]

                combined_data = tf.verify_tensor_all_finite(
                    combined_data,
                    "ERR: Tensor not finite - combined_data",
                    name="combined_data"
                )
            # combined_data = tf.Print(combined_data, [tf.shape(combined_data)], message="combined_data")

        with variable_scope2 as scope:
            # lstm_cell = BNLSTMCell(layer_elements[-2], is_training_tensor=train_ph, max_bn_steps=MAX_BN_LEN)

            lstm_cell = tf.contrib.rnn.LSTMCell(layer_elements[-2],
                                                use_peepholes=False,
                                                cell_clip=None,
                                                initializer=None,
                                                num_proj=None,
                                                proj_clip=None,
                                                forget_bias=1.0,
                                                state_is_tuple=True,
                                                activation=None,
                                                reuse=None
                                                )

            outputs, states = tf.nn.dynamic_rnn(
                cell=lstm_cell,
                inputs=combined_data,
                dtype=tf.float32,
                sequence_length=seq_length,
                time_major=False
            )

            outputs = tf.where(tf.is_nan(outputs), tf.zeros_like(outputs), outputs)
            # outputs = tf.Print(outputs, [outputs], message="outputs", summarize=100)
            # outputs = tf.Print(outputs, [tf.reduce_max(outputs)], message="outputs", summarize=100)
            outputs = tf.verify_tensor_all_finite(
                outputs,
                "ERR: Tensor not finite - outputs",
                name="outputs"
            )

            num_partitions = 2
            res_out = tf.dynamic_partition(outputs, partitions_ph, num_partitions)[1]
            # res_out = tf.Print(res_out, [res_out], message="res_out")

            # tf.where(tf.is_nan(res_out), tf.zeros_like(res_out), res_out)

            # res_out = tf.Print(res_out, [res_out], message="res_out", summarize=100)
            # res_out = tf.Print(res_out, [tf.reduce_max(res_out)], message="res_out", summarize=100)


            rnn_x = tf.matmul(res_out, W_lstm) + b_lstm

            self.variable_summaries(rnn_x, "lstm")

            rnn_x = tf.verify_tensor_all_finite(
                rnn_x,
                "ERR: Tensor not finite - fc1",
                name="fc1"
            )

            # prompts_ph = tf.reshape(prompts_ph, [-1, 1])
            x_tensor = rnn_x  # tf.concat([rnn_x, prompts_ph], 1)

            rnn_x = tf.matmul(x_tensor, W_fc) + b_fc
            self.variable_summaries(rnn_x, "fc")

            rnn_x = tf.verify_tensor_all_finite(
                rnn_x,
                "ERR: Tensor not finite - fc2",
                name="fc2"
            )

            return rnn_x
    def testLinearThompsonSamplingUpdateWithForgetting(self, batch_size,
                                                       context_dim, dtype):
        """Check forgetting agent updates for specified actions and rewards."""
        gamma = 0.9

        # Construct a `Trajectory` for the given action, observation, reward.
        num_actions = 5
        initial_step, final_step = _get_initial_and_final_steps(
            batch_size, context_dim)
        action = np.random.randint(num_actions,
                                   size=batch_size,
                                   dtype=np.int32)
        action_step = _get_action_step(action)
        experience = _get_experience(initial_step, action_step, final_step)

        # Construct an agent and perform the update. Record initial and final
        # weights.
        observation_spec = tensor_spec.TensorSpec([context_dim], tf.float32)
        time_step_spec = time_step.time_step_spec(observation_spec)
        action_spec = tensor_spec.BoundedTensorSpec(dtype=tf.int32,
                                                    shape=(),
                                                    minimum=0,
                                                    maximum=num_actions - 1)
        agent = lin_ts_agent.LinearThompsonSamplingAgent(
            time_step_spec=time_step_spec,
            action_spec=action_spec,
            gamma=gamma,
            dtype=dtype)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        initial_weight_covariances = self.evaluate(agent._weight_covariances)
        initial_parameter_estimators = self.evaluate(
            agent._parameter_estimators)

        loss_info = agent.train(experience)
        self.evaluate(loss_info)
        final_weight_covariances = self.evaluate(agent.weight_covariances)
        final_parameter_estimators = self.evaluate(agent.parameter_estimators)

        # Compute the expected updates.
        observations_list = tf.dynamic_partition(
            data=tf.reshape(experience.observation, [batch_size, context_dim]),
            partitions=tf.convert_to_tensor(action),
            num_partitions=num_actions)
        rewards_list = tf.dynamic_partition(
            data=tf.reshape(experience.reward, [batch_size]),
            partitions=tf.convert_to_tensor(action),
            num_partitions=num_actions)
        expected_weight_covariances_update = []
        expected_parameter_estimators_update = []
        for k, (observations_for_arm, rewards_for_arm) in enumerate(
                zip(observations_list, rewards_list)):
            expected_weight_covariances_update.append(
                self.evaluate(gamma * initial_weight_covariances[k] +
                              tf.matmul(observations_for_arm,
                                        observations_for_arm,
                                        transpose_a=True)))
            expected_parameter_estimators_update.append(
                self.evaluate(gamma * initial_parameter_estimators[k] +
                              bandit_utils.sum_reward_weighted_observations(
                                  rewards_for_arm, observations_for_arm)))
        self.assertAllClose(expected_weight_covariances_update,
                            final_weight_covariances)
        self.assertAllClose(expected_parameter_estimators_update,
                            final_parameter_estimators)
def update_centroids(samples, nearest_indices, n_clusters):
    # Updates the centroid to be the mean of all samples associated with it.
    nearest_indices = tf.to_int32(nearest_indices)
    partitions = tf.dynamic_partition(samples, nearest_indices, n_clusters)
    new_centroids = tf.concat(0, [tf.expand_dims(tf.reduce_mean(partition, 0), 0) for partition in partitions])
    return new_centroids
Beispiel #52
0
    # word_embedding_uniform = tf.concat([word_embedding_0, word_embedding_1], axis=0)

    fact_description = tf.nn.embedding_lookup(word_embedding, fact_input)
    law_description = tf.nn.embedding_lookup(word_embedding, law_input)

    max_graph = len(graph_list_1)
    deg_list = [len(neigh_index[i]) for i in range(n_law)]
    graph_list = list(zip(*graph_membership))[1]

    gold_matrix_law = tf.one_hot(law_labels, 118, dtype=tf.float32)
    gold_matrix_accu = tf.one_hot(accu_labels, 130, dtype=tf.float32)
    gold_matrix_time = tf.one_hot(time_labels, 12, dtype=tf.float32)

    #############----------------------###################
    graph_label = tf.dynamic_partition(
        tf.transpose(gold_matrix_law, [1, 0]), graph_list,
        max_graph)  # size: [batch_size, graph_num, N_each_graph])
    label = []
    for i in range(max_graph):
        label.append(tf.reduce_sum(graph_label[i], 0, keepdims=True))

    graph_label = tf.transpose(tf.concat(label, 0),
                               [1, 0])  # size: [batch_size, graph_num]
    #############----------------------###################

    neigh_index = sorted(neigh_index.items(), key=lambda x: len(x[1]))
    max_deg = len(neigh_index[-1][1])
    t = 0
    adj_list = [[]]
    for i in range(n_law):
        each = neigh_index[i]
Beispiel #53
0
def model_fn(features, labels, mode, params):
	"""Model function for PASCAL VOC."""
	if isinstance(features, dict):
		features = features['data']
	images = features

	network = deeplab_v3_generator(params['num_classes'], 
				       _OUTPUT_STRIDE,
				       _BASE_ARCHITECTURE,
				       None,
				       _BATCH_NORM_DECAY)

	logits = network(features, mode == tf.estimator.ModeKeys.TRAIN)

	pred_classes = tf.expand_dims(tf.argmax(logits, axis=3, output_type=tf.int32), axis=3)

	pred_decoded_labels = tf.cast(pred_classes, tf.uint8)

	predictions = {
		'classes': pred_classes,
		'probabilities': tf.nn.softmax(logits, name='softmax_tensor'),
		'decoded_labels': pred_decoded_labels
	}

	if mode == tf.estimator.ModeKeys.PREDICT:
		# Delete 'decoded_labels' from predictions because custom functions produce error when used with saved_model
		predictions_without_decoded_labels = predictions.copy()
		del predictions_without_decoded_labels['decoded_labels']

		return tf.estimator.EstimatorSpec(
			mode=mode,
			predictions=predictions,
			export_outputs={
				'preds': tf.estimator.export.PredictOutput(
					predictions_without_decoded_labels)
			})

	gt_decoded_labels = tf.cast(labels, tf.uint8)
	labels = tf.squeeze(labels, axis=3)  # reduce the channel dimension.

	logits_by_num_classes = tf.reshape(logits, [-1, params['num_classes']])
	labels_flat = tf.reshape(labels, [-1, ])

	valid_indices = tf.to_int32(labels_flat <= params['num_classes'] - 1)
	valid_logits = tf.dynamic_partition(logits_by_num_classes, valid_indices, num_partitions=2)[1]
	valid_labels = tf.dynamic_partition(labels_flat, valid_indices, num_partitions=2)[1]

	preds_flat = tf.reshape(pred_classes, [-1, ])
	valid_preds = tf.dynamic_partition(preds_flat, valid_indices, num_partitions=2)[1]
	confusion_matrix = tf.confusion_matrix(valid_labels, valid_preds, num_classes=params['num_classes'])

	predictions['valid_preds'] = valid_preds
	predictions['valid_labels'] = valid_labels
	predictions['confusion_matrix'] = confusion_matrix

	cross_entropy = tf.losses.sparse_softmax_cross_entropy(logits=valid_logits, labels=tf.cast(valid_labels,tf.int32))

	# Create a tensor named cross_entropy for logging purposes.
	tf.identity(cross_entropy, name='cross_entropy')
	tf.summary.scalar('cross_entropy', cross_entropy)

	if not _FREEZE_BATCH_NORM:
		train_var_list = [v for v in tf.trainable_variables()]
	else:
		train_var_list = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]

	# Add weight decay to the loss.
	with tf.variable_scope("total_loss"):
		loss = cross_entropy + params.get('weight_decay', _WEIGHT_DECAY) * tf.add_n([tf.nn.l2_loss(v) for v in train_var_list])
	# loss = tf.losses.get_total_loss()  # obtain the regularization losses as well

	if mode == tf.estimator.ModeKeys.TRAIN:
		rgb=images[:,:,:,0:3]
		rgb_norm=((rgb-tf.reduce_min(rgb))/tf.reduce_max(rgb))*255
		ir=tf.expand_dims(images[:,:,:,3],-1)*255

		tf.summary.image('images', rgb_norm,max_outputs=params['tensorboard_images_max_outputs'])
		tf.summary.image('ir_near', ir, max_outputs=params['tensorboard_images_max_outputs'])
		tf.summary.image('labels', gt_decoded_labels*255, max_outputs=params['tensorboard_images_max_outputs'])
		tf.summary.image('output', pred_decoded_labels*255, max_outputs=params['tensorboard_images_max_outputs'])

	    	# tf.summary.image('images',
	    	#                  tf.concat(axis=2, values=[images, gt_decoded_labels, pred_decoded_labels]),
	    	#                  max_outputs=params['tensorboard_images_max_outputs'])  # Concatenate row-wise.

		global_step = tf.train.get_or_create_global_step()

		#if _LEARNING_RATE_POLICY == 'piecewise':
		#	# Scale the learning rate linearly with the batch size. When the batch size
		#	# is 128, the learning rate should be 0.1.
		#	initial_learning_rate = 0.1 * params['batch_size'] / 128
		#	batches_per_epoch = params['num_train'] / params['batch_size']
		#	# Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
		#	boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]]
		#	values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]]
		#	learning_rate = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), boundaries, values)
		#elif _LEARNING_RATE_POLICY == 'poly':
		#	learning_rate = tf.train.polynomial_decay(
		#		_INITIAL_LEARNING_RATE,
		#		tf.cast(global_step, tf.int32) - _INITIAL_GLOBAL_STEP,
		#		_MAX_ITER, _END_LEARNING_RATE, power=_POWER
		#	)
		#else:
		#	raise ValueError('Learning rate policy must be "piecewise" or "poly"')
		
		learning_rate=params['learning_rate']
		# Create a tensor named learning_rate for logging purposes
		tf.identity(learning_rate, name='learning_rate')
		tf.summary.scalar('learning_rate', learning_rate)

		optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=_MOMENTUM)

		# Batch norm requires update ops to be added as a dependency to the train_op
		update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
		with tf.control_dependencies(update_ops):
			train_op = optimizer.minimize(loss, global_step, var_list=train_var_list)
	else:
		train_op = None

	accuracy = tf.metrics.accuracy(valid_labels, valid_preds)
	mean_iou = tf.metrics.mean_iou(valid_labels, valid_preds, params['num_classes'])
	metrics = {'px_accuracy': accuracy, 'mean_iou': mean_iou}

	# Create a tensor named train_accuracy for logging purposes
	tf.identity(accuracy[1], name='train_px_accuracy')
	tf.summary.scalar('train_px_accuracy', accuracy[1])

	def compute_mean_iou(total_cm, name='mean_iou'):
		"""Compute the mean intersection-over-union via the confusion matrix."""
		sum_over_row = tf.to_float(tf.reduce_sum(total_cm, 0))
		sum_over_col = tf.to_float(tf.reduce_sum(total_cm, 1))
		cm_diag = tf.to_float(tf.diag_part(total_cm))
		denominator = sum_over_row + sum_over_col - cm_diag

		# The mean is only computed over classes that appear in the
		# label or prediction tensor. If the denominator is 0, we need to
		# ignore the class.
		num_valid_entries = tf.reduce_sum(tf.cast(
			tf.not_equal(denominator, 0), dtype=tf.float32))

		# If the value of the denominator is 0, set it to 1 to avoid
		# zero division.
		denominator = tf.where(
			tf.greater(denominator, 0),
			denominator,
			tf.ones_like(denominator))
		iou = tf.div(cm_diag, denominator)

		for i in range(params['num_classes']):
			tf.identity(iou[i], name='train_iou_class{}'.format(i))
			tf.summary.scalar('train_iou_class{}'.format(i), iou[i])

		# If the number of valid entries is 0 (no classes) we return 0.
		result = tf.where(
			tf.greater(num_valid_entries, 0),
			tf.reduce_sum(iou, name=name) / num_valid_entries,
			0)
		return result

	train_mean_iou = compute_mean_iou(mean_iou[1])

	tf.identity(train_mean_iou, name='train_mean_iou')
	tf.summary.scalar('train_mean_iou', train_mean_iou)

	return tf.estimator.EstimatorSpec(
		mode=mode,
		predictions=predictions,
		loss=loss,
		train_op=train_op,
		eval_metric_ops=metrics
	)
Beispiel #54
0
    def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta,
                             epsilon, lr, max_step):
        """
        Creates training-specific Tensorflow ops for PPO models.
        :param probs: Current policy probabilities
        :param old_probs: Past policy probabilities
        :param value: Current value estimate
        :param beta: Entropy regularization strength
        :param entropy: Current policy entropy
        :param epsilon: Value for policy-divergence threshold
        :param lr: Learning rate
        :param max_step: Total number of training steps.
        """
        self.returns_holder = tf.placeholder(shape=[None],
                                             dtype=tf.float32,
                                             name='discounted_rewards')
        self.advantage = tf.placeholder(shape=[None, 1],
                                        dtype=tf.float32,
                                        name='advantages')
        self.learning_rate = tf.train.polynomial_decay(lr,
                                                       self.global_step,
                                                       max_step,
                                                       1e-10,
                                                       power=1.0)

        self.old_value = tf.placeholder(shape=[None],
                                        dtype=tf.float32,
                                        name='old_value_estimates')

        decay_epsilon = tf.train.polynomial_decay(epsilon,
                                                  self.global_step,
                                                  max_step,
                                                  0.1,
                                                  power=1.0)
        decay_beta = tf.train.polynomial_decay(beta,
                                               self.global_step,
                                               max_step,
                                               1e-5,
                                               power=1.0)
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)

        clipped_value_estimate = self.old_value + tf.clip_by_value(
            tf.reduce_sum(value, axis=1) - self.old_value, -decay_epsilon,
            decay_epsilon)

        v_opt_a = tf.squared_difference(self.returns_holder,
                                        tf.reduce_sum(value, axis=1))
        v_opt_b = tf.squared_difference(self.returns_holder,
                                        clipped_value_estimate)
        self.value_loss = tf.reduce_mean(
            tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.mask,
                                 2)[1])

        # Here we calculate PPO policy loss. In continuous control this is done independently for each action gaussian
        # and then averaged together. This provides significantly better performance than treating the probability
        # as an average of probabilities, or as a joint probability.
        r_theta = tf.exp(probs - old_probs)
        p_opt_a = r_theta * self.advantage
        p_opt_b = tf.clip_by_value(r_theta, 1.0 - decay_epsilon,
                                   1.0 + decay_epsilon) * self.advantage
        self.policy_loss = -tf.reduce_mean(
            tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.mask,
                                 2)[1])

        self.loss = self.policy_loss + 0.5 * self.value_loss - decay_beta * tf.reduce_mean(
            tf.dynamic_partition(entropy, self.mask, 2)[1])

        if self.use_curiosity:
            self.loss += 10 * (0.2 * self.forward_loss +
                               0.8 * self.inverse_loss)
        self.update_batch = optimizer.minimize(self.loss)
Beispiel #55
0
def one_dimensional_calibration_layer(uncalibrated_tensor,
                                      num_keypoints,
                                      signal_name,
                                      keypoints_initializers=None,
                                      keypoints_initializer_fns=None,
                                      bound=False,
                                      monotonic=None,
                                      missing_input_value=None,
                                      missing_output_value=None,
                                      **regularizer_amounts):
    """Creates a calibration layer for one single continuous signal.

  Returns a calibrated tensor of the uncalibrated continuous signal and a list
  of projections ops.

  Args:
    uncalibrated_tensor: Tensor of shape [batch_size] of one single signal.
    num_keypoints: Number of keypoints to use.
    signal_name: (Required) Used as a suffix to the variable names.
    keypoints_initializers: For evaluation or inference (or when resuming
      training from a checkpoint) the values will be loaded from disk, so they
      don't need to be given -- but in this case num_keypoints need to be
      accurate. Two tensors of shape [num_keypoints]. See
      load_keypoints_from_quantiles or uniform_keypoints_for_signal on how to
      generate these (module keypoints_initialization).
    keypoints_initializer_fns: Like keypoints_initializers but using lambda
      initializers. They should be compatible with tf.compat.v1.get_variable. If
      this is set, then keypoints_initializers must be None.
    bound: boolean whether output of calibration must be bound. Alternatively a
      dict mapping feature name to boundness.
    monotonic: whether calibration has to be kept monotonic: None or 0 means no
      monotonicity. Positive or negative values mean increasing or decreasing
      monotonicity respectively. Alternatively a dict mapping feature name to
      monotonic.
    missing_input_value: If set, and if the input has this value it is assumed
      to be missing and the output will either be calibrated to some value
      between `[calibration_output_min, calibration_output_max]` or set to a
      fixed value set by missing_output_value. Limitation: it only works for
        scalars.
    missing_output_value: Requires missing_input_value also to be set. If set if
      will convert missing input to this value.
    **regularizer_amounts: Keyword args of regularization amounts passed to
      regularizers.calibrator_regularization(). Keyword names should be among
      supported regularizers.CALIBRATOR_REGULARIZERS and values should be float.

  Returns:
    A tuple of:
    * calibrated tensor of shape [batchsize]
    * None or projection ops, that must be applied at each
      step (or every so many steps) to project the model to a feasible space:
      used for bounding the outputs or for imposing monotonicity.
    * None of a regularization loss, if regularization is configured.

  Raises:
    ValueError: if dtypes are incompatible.
    ValueError: if keypoints_initializers and keypoints_initializer_fns are both
      set.




  """
    if (keypoints_initializers is not None
            and keypoints_initializer_fns is not None):
        raise ValueError(
            'keypoints_initializers and keypoints_initializer_fns '
            'cannot both be set.')
    with tf.compat.v1.variable_scope('pwl_calibration'):
        # Sanity checks.
        if uncalibrated_tensor.get_shape().ndims != 1:
            raise ValueError(
                'one_dimensional_calibration_layer can only be used for a single '
                'signal, so uncalibrated shape must be of form (batchsize), got %s'
                % uncalibrated_tensor.get_shape())
        if missing_output_value is not None and missing_input_value is None:
            raise ValueError(
                'missing_output_value can only be set if a misisng_input_value is '
                'also set, missing_input_value=None, missing_output_values=%s'
                % missing_output_value)

        # Create variables: only uses initializer if they are given.
        kp_in_name = signal_name + '_keypoints_inputs'
        kp_out_name = signal_name + '_keypoints_outputs'
        missing_out_calibrated_name = signal_name + '_calibrated_missing_output'

        if keypoints_initializers is not None:
            kp_in, kp_out = keypoints_initializers[0], keypoints_initializers[
                1]
            if (uncalibrated_tensor.dtype != kp_in.dtype
                    or uncalibrated_tensor.dtype != kp_out.dtype):
                raise ValueError(
                    'incompatible types for signal \'%s\': uncalibrated=%s, '
                    'keypoints_initializers[input=%s, output=%s]' %
                    (signal_name, uncalibrated_tensor.dtype, kp_in.dtype,
                     kp_out.dtype))
            tools.assert_shape(kp_in, [num_keypoints],
                               'keypoints_initializers[input]')
            tools.assert_shape(kp_out, [num_keypoints],
                               'keypoints_initializers[output]')
            keypoints_inputs = tf.compat.v1.get_variable(kp_in_name,
                                                         initializer=kp_in)
            keypoints_outputs = tf.compat.v1.get_variable(kp_out_name,
                                                          initializer=kp_out)

            if missing_input_value is not None:
                # Value to be taken by missing features.
                if missing_output_value is not None:
                    missing_out_calibrated = tf.constant(
                        missing_output_value, dtype=uncalibrated_tensor.dtype)
                else:
                    # Learned missing value, initialized by the first value of kp_out.
                    missing_out_calibrated = tf.compat.v1.get_variable(
                        missing_out_calibrated_name, initializer=kp_out[0])
        elif keypoints_initializer_fns is not None:
            kp_in, kp_out = keypoints_initializer_fns[
                0], keypoints_initializer_fns[1]
            keypoints_inputs = tf.compat.v1.get_variable(kp_in_name,
                                                         shape=[num_keypoints],
                                                         initializer=kp_in)
            keypoints_outputs = tf.compat.v1.get_variable(
                kp_out_name, shape=[num_keypoints], initializer=kp_out)

            if missing_input_value is not None:
                # Value to be taken by missing features.
                if missing_output_value is not None:
                    missing_out_calibrated = tf.constant(
                        missing_output_value, dtype=uncalibrated_tensor.dtype)
                else:
                    # Learned missing value, initialized by the first value of kp_out.
                    def first_kp_out(*args, **kwargs):
                        return kp_out(*args, **kwargs)[0]

                    missing_out_calibrated = tf.compat.v1.get_variable(
                        missing_out_calibrated_name,
                        shape=[],
                        initializer=first_kp_out)
        else:
            # When loading a model, no initializer.
            keypoints_inputs = tf.compat.v1.get_variable(
                kp_in_name,
                shape=[num_keypoints],
                dtype=uncalibrated_tensor.dtype)
            keypoints_outputs = tf.compat.v1.get_variable(
                kp_out_name,
                shape=[num_keypoints],
                dtype=uncalibrated_tensor.dtype)
            if missing_input_value is not None:
                if missing_output_value is not None:
                    missing_out_calibrated = tf.constant(
                        missing_output_value, dtype=uncalibrated_tensor.dtype)
                else:
                    missing_out_calibrated = tf.compat.v1.get_variable(
                        missing_out_calibrated_name,
                        shape=[],
                        dtype=uncalibrated_tensor.dtype)

        # Split missing values from normal values.
        # FutureWork: move handling of missing values be moved to C++ land.
        if missing_input_value is not None:
            missing_mask = tf.equal(uncalibrated_tensor,
                                    tf.constant(missing_input_value))
            mask_indices = tf.range(tf.shape(uncalibrated_tensor)[0])
            mask_indices = tf.dynamic_partition(
                mask_indices, tf.cast(missing_mask, tf.int32), 2)
            (uncalibrated_tensor, missing_values) = tf.dynamic_partition(
                uncalibrated_tensor, tf.cast(missing_mask, tf.int32), 2)

            # Assign value to missing_values.
            missing_values = tf.ones_like(missing_values)
            missing_values *= missing_out_calibrated

        # Dense implementation.
        interpolation = pwl_calibration_ops.pwl_indexing_calibrator(
            uncalibrated_tensor, keypoints_inputs)
        calibrated = tf.reduce_sum(interpolation * keypoints_outputs, 1)
        projection_ops = None

        # Re-join missing values.
        if missing_input_value is not None:
            calibrated = tf.dynamic_stitch(mask_indices,
                                           [calibrated, missing_values])

        # Boundness.
        projected_keypoints_outputs = None
        if bound:
            bound_min_name = signal_name + '_bound_min'
            bound_max_name = signal_name + '_bound_max'
            # Set bound_min/max from min/max values initialized.
            if keypoints_initializers is not None:
                # Store bound_min and bound_max in variables because their values (from
                # kp_out) are only available during train (when keypoints_initializers
                # is available). During inference the value is not available. Storing
                # them in variables make them available during inference.
                bound_min = tf.compat.v1.get_variable(
                    bound_min_name,
                    dtype=uncalibrated_tensor.dtype,
                    initializer=tf.reduce_min(kp_out))
                bound_max = tf.compat.v1.get_variable(
                    bound_max_name,
                    dtype=uncalibrated_tensor.dtype,
                    initializer=tf.reduce_max(kp_out))
            elif keypoints_initializer_fns is not None:
                # Store bound_min and bound_max in variables because their values (from
                # kp_out) are only available during train (when keypoints_initializers
                # is available). During inference the value is not available. Storing
                # them in variables make them available during inference.
                def min_kp_out(*args, **kwargs):
                    return tf.reduce_min(kp_out(*args, **kwargs))

                def max_kp_out(*args, **kwargs):
                    return tf.reduce_max(kp_out(*args, **kwargs))

                bound_min = tf.compat.v1.get_variable(
                    bound_min_name,
                    dtype=uncalibrated_tensor.dtype,
                    shape=[],
                    initializer=min_kp_out)
                bound_max = tf.compat.v1.get_variable(
                    bound_max_name,
                    dtype=uncalibrated_tensor.dtype,
                    shape=[],
                    initializer=max_kp_out)
            else:
                # No need to initialize, since presumably their values will be read
                # from some checkpoint.
                bound_min = tf.compat.v1.get_variable(
                    bound_min_name, dtype=uncalibrated_tensor.dtype, shape=[])
                bound_max = tf.compat.v1.get_variable(
                    bound_max_name, dtype=uncalibrated_tensor.dtype, shape=[])
            projected_keypoints_outputs = tf.minimum(
                tf.maximum(keypoints_outputs, bound_min), bound_max)

        # Monotonicity.
        if monotonic:
            # First a soft-enforcement: might not break indirect constraints.
            if projected_keypoints_outputs is None:
                projected_keypoints_outputs = keypoints_outputs
            projected_keypoints_outputs = pwl_calibration_ops.monotonic_projection(
                increasing=bool(monotonic > 0),
                values=projected_keypoints_outputs,
                name='project_calibration_to_monotonic')

        # Make assing_add op to projected output.
        if projected_keypoints_outputs is not None:
            constrained_diff = projected_keypoints_outputs - keypoints_outputs
            projection_ops = tf.compat.v1.assign_add(keypoints_outputs,
                                                     constrained_diff,
                                                     use_locking=None,
                                                     name='project_feasible')
            if (bound and missing_input_value is not None
                    and missing_output_value is None):
                # Include op bounding calibrated missing value.
                projected_missing_out_calibrated = tf.minimum(
                    tf.maximum(missing_out_calibrated, bound_min), bound_max)
                projected_missing_out_calibrated_diff = (
                    projected_missing_out_calibrated - missing_out_calibrated)
                projected_missing_out_calibrated_op = tf.compat.v1.assign_add(
                    missing_out_calibrated,
                    projected_missing_out_calibrated_diff,
                    use_locking=None,
                    name='project_missing_calibration_to_bounds')
                projection_ops = tf.group(projection_ops,
                                          projected_missing_out_calibrated_op)

        # Regularization
        regularization = regularizers.calibrator_regularization(
            keypoints_outputs,
            name=signal_name + '_calibrator_regularization',
            **regularizer_amounts)
    return calibrated, projection_ops, regularization
Beispiel #56
0
    def build(self):
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.softmax_temperature = tf.maximum( \
                                              self.config.max_temperature-tf.cast(tf.divide(self.global_step, tf.constant(self.config.linear_steps)), dtype=tf.float32), \
                                              self.config.min_temperature)

        with tf.name_scope('t_variables'):
            self.sample = self.t_variables['sample']

            self.batch_l = self.t_variables['batch_l']
            self.doc_l = self.t_variables['doc_l']
            self.sent_l = self.t_variables['sent_l']
            self.dec_sent_l = self.t_variables[
                'dec_sent_l']  # batch_l x max_doc_l

            self.max_doc_l = tf.reduce_max(self.doc_l)
            self.max_sent_l = tf.reduce_max(self.sent_l)
            self.max_dec_sent_l = tf.reduce_max(
                self.dec_sent_l)  # = max_sent_l + 1

            self.mask_doc = tf.sequence_mask(self.doc_l, dtype=tf.float32)
            self.mask_sent = tf.sequence_mask(self.sent_l, dtype=tf.float32)

            mask_bow = np.zeros(self.config.n_vocab)
            mask_bow[self.config.bow_idxs] = 1.
            self.mask_bow = tf.constant(mask_bow, dtype=tf.float32)

            self.enc_keep_prob = self.t_variables['enc_keep_prob']

        # ------------------------------Encoder ------------------------------
        with tf.variable_scope('emb'):
            with tf.variable_scope('word', reuse=False):
                pad_embedding = tf.zeros([1, self.config.dim_emb],
                                         dtype=tf.float32)
                nonpad_embeddings = tf.get_variable('emb', [self.config.n_vocab-1, self.config.dim_emb], dtype=tf.float32, \
                                                                initializer=tf.contrib.layers.xavier_initializer())
                self.embeddings = tf.concat([pad_embedding, nonpad_embeddings],
                                            0)  # n_vocab x dim_emb
                self.bow_embeddings = tf.nn.embedding_lookup(
                    self.embeddings, self.config.bow_idxs)  # dim_bow x dim_emb

                # get sentence embeddings
                self.enc_input_idxs = tf.one_hot(
                    self.t_variables['enc_input_idxs'],
                    depth=self.config.n_vocab
                )  # batch_l x max_doc_l x max_sent_l x n_vocab
                self.enc_inputs = tf.tensordot(
                    self.enc_input_idxs, self.embeddings,
                    axes=[[-1],
                          [0]])  # batch_l x max_doc_l x max_sent_l x dim_emb

            with tf.variable_scope('sent', reuse=False):
                self.sent_outputs, self.sent_state = \
                    encode_inputs(self, enc_inputs=self.enc_inputs, sent_l=self.sent_l) # batch_l x max_doc_l x dim_hidden*2

        with tf.variable_scope('enc'):
            # get sentence latents
            with tf.variable_scope('latents_sent', reuse=False):
                self.w_topic_posterior = tf.get_variable(
                    'topic_posterior/kernel', [
                        self.config.n_topic, self.sent_state.shape[-1],
                        self.config.dim_hidden
                    ],
                    dtype=tf.float32)
                self.b_topic_posterior = tf.get_variable(
                    'topic_posterior/bias',
                    [1, self.config.n_topic, self.config.dim_hidden],
                    dtype=tf.float32)

                self.topic_state = tf.reduce_sum(
                    self.sent_state * tf.expand_dims(self.mask_doc, -1),
                    -2) / tf.reduce_sum(self.mask_doc, -1, keepdims=True)
                self.hidden_topic_posterior = tf.tensordot(
                    self.topic_state, self.w_topic_posterior, axes=[[1], [1]]
                ) + self.b_topic_posterior  # batch_l x n_topic x dim_hidden

        # ------------------------------Discriminator------------------------------
        with tf.variable_scope('disc'):
            with tf.variable_scope('prob_topic', reuse=False):
                # encode by TSNTM
                self.probs_sent_topic_posterior, _, _ = \
                    encode_gsm_probs_topic_posterior(self, self.hidden_topic_posterior.get_shape()[-1], self.hidden_topic_posterior, self.mask_doc, self.config) # batch_l x max_doc_l x n_topic

            with tf.name_scope('latents_topic'):
                # get topic sentence posterior distribution for each document
                self.probs_topic_posterior = tf.reduce_sum(
                    self.probs_sent_topic_posterior, 1)  # batch_l x n_topic

                self.means_sent_topic_posterior = tf.multiply(tf.expand_dims(self.probs_sent_topic_posterior, -1), \
                        tf.expand_dims(self.means_sent_posterior, -2)) # batch_l x max_doc_l x n_topic x dim_latent
                self.means_topic_posterior_ = tf.reduce_sum(self.means_sent_topic_posterior, 1) / \
                        tf.expand_dims(self.probs_topic_posterior, -1) # batch_l x n_topic x dim_latent
                self.means_topic_posterior = tf_clip_means(
                    self.means_topic_posterior_, self.probs_topic_posterior)

                diffs_sent_topic_posterior = tf.expand_dims(self.means_sent_posterior, 2) - \
                        tf.expand_dims(self.means_topic_posterior, 1) # batch_l x max_doc_l x n_topic x dim_latent
                self.covs_sent_topic_posterior = tf.multiply(tf.expand_dims(tf.expand_dims(self.probs_sent_topic_posterior, -1), -1), \
                        tf.matrix_diag(tf.expand_dims(tf.exp(self.logvars_sent_posterior), 2)) + tf.matmul(tf.expand_dims(diffs_sent_topic_posterior, -1), \
                        tf.expand_dims(diffs_sent_topic_posterior, -2))) # batch_l x max_doc_l x n_topic x dim_latent x dim_latent
                self.covs_topic_posterior_ = tf.reduce_sum(self.covs_sent_topic_posterior, 1) / \
                        tf.expand_dims(tf.expand_dims(self.probs_topic_posterior, -1), -1) # batch_l x n_topic x dim_latent x dim_latent
                self.covs_topic_posterior = tf_clip_covs(
                    self.covs_topic_posterior_, self.probs_topic_posterior)

                self.latents_topic_posterior = sample_latents_fullcov(self.means_topic_posterior, self.covs_topic_posterior, \
                                                                      seed=self.config.seed, sample=self.sample)

                self.means_topic_prior = tf.zeros(
                    [
                        self.batch_l, self.config.n_topic,
                        self.config.dim_latent
                    ],
                    dtype=tf.float32)  # batch_l x n_topic x dim_latent
                self.covs_topic_prior = tf.eye(
                    self.config.dim_latent,
                    batch_shape=[self.batch_l, self.config.n_topic],
                    dtype=tf.float32) * self.config.cov_root

        # ------------------------------Decoder----------------------------------
        with tf.variable_scope('dec'):
            # decode for training sent
            with tf.variable_scope(
                    'sent',
                    initializer=tf.contrib.layers.xavier_initializer(),
                    dtype=tf.float32,
                    reuse=False):
                self.dec_cell = tf.contrib.rnn.GRUCell(self.config.dim_hidden)
                self.dec_cell = tf.contrib.rnn.DropoutWrapper(
                    self.dec_cell,
                    output_keep_prob=self.t_variables['dec_keep_prob'])
                self.dec_sent_cell = self.dec_cell
                self.latent_hidden_layer = tf.layers.Dense(
                    units=self.config.dim_hidden,
                    activation=tf.nn.relu,
                    name='latent_hidden_linear')
                self.dec_sent_initial_state = self.latent_hidden_layer(
                    self.latents_sent_posterior
                )  # batch_l x max_doc_l x dim_hidden
                self.output_layer = tf.layers.Dense(self.config.n_vocab,
                                                    use_bias=False,
                                                    name='out')

                if self.config.attention:
                    self.sent_outputs_flat = tf.reshape(
                        self.sent_outputs, [
                            self.batch_l * self.max_doc_l, self.max_sent_l,
                            self.config.dim_hidden * 2
                        ])
                    self.att_sent_l_flat = tf.reshape(
                        tf.maximum(self.sent_l, 1),
                        [self.batch_l * self.max_doc_l])
                    self.att_sent_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=self.config.dim_hidden,
                                                                                memory=self.sent_outputs_flat, \
                                                                                memory_sequence_length=self.att_sent_l_flat)
                    self.att_cell = tf.contrib.seq2seq.AttentionWrapper(
                        self.dec_cell,
                        attention_mechanism=self.att_sent_mechanism,
                        attention_layer_size=self.config.dim_hidden)
                    self.dec_sent_cell = self.att_cell

                # teacher forcing
                self.dec_input_idxs = self.t_variables[
                    'dec_input_idxs']  # batch_l x max_doc_l x max_dec_sent_l
                self.dec_inputs = tf.nn.embedding_lookup(
                    self.embeddings, self.dec_input_idxs
                )  # batch_l x max_doc_l x max_dec_sent_l x dim_emb

                # output_sent_l == dec_sent_l
                self.output_logits_flat, self.output_sent_l_flat = decode_output_logits_flat(
                    self,
                    dec_cell=self.dec_sent_cell,
                    dec_initial_state=self.dec_sent_initial_state,
                    dec_inputs=self.dec_inputs,
                    dec_sent_l=self.dec_sent_l,
                    latents_input=self.latents_sent_posterior
                )  # batch_l*max_doc_l x max_output_sent_l x n_vocab

                self.output_sent_l = tf.reshape(self.output_sent_l_flat,
                                                [self.batch_l, self.max_doc_l])
                self.max_output_sent_l = tf.reduce_max(self.output_sent_l)
                self.output_logits = tf.reshape(self.output_logits_flat, \
                                    [self.batch_l, self.max_doc_l, self.max_output_sent_l, self.config.n_vocab], name='output_logits')
                if self.config.disc_gumbel:
                    self.output_input_idxs = sample_gumbels(
                        self.output_logits, self.softmax_temperature,
                        self.config.seed, self.sample
                    )  # batch_l x max_doc_l x max_output_sent_l  x n_vocab
                else:
                    self.output_input_idxs = self.output_logits

            # decode for training topic probs
            with tf.variable_scope(
                    'sent',
                    initializer=tf.contrib.layers.xavier_initializer(),
                    dtype=tf.float32,
                    reuse=True):
                self.dec_topic_cell = self.dec_cell
                if self.config.attention:
                    self.topic_outputs_flat = tf.contrib.seq2seq.tile_batch(tf.reshape(self.sent_outputs, \
                                            [self.batch_l, self.max_doc_l*self.max_sent_l, self.sent_outputs.get_shape()[-1]]), \
                                            multiplier=self.config.n_topic) # batch_l*n_topic x max_doc_l*max_sent_l x dim_hidden*2
                    self.score_mask = tf.contrib.seq2seq.tile_batch(tf.reshape(tf.sequence_mask(self.sent_l), \
                                            [self.batch_l, self.max_doc_l*self.max_sent_l]), multiplier=self.config.n_topic) # batch_l*n_topic x max_doc_l*max_sent_l
                    self.hier_score = tf.reshape(tf.transpose(self.probs_sent_topic_posterior, [0, 2, 1]), \
                                            [self.batch_l*self.config.n_topic, self.max_doc_l]) # batch_l*n_topic x max_doc_l

                    self.att_topic_mechanism = HierarchicalAttention(
                        num_units=self.config.dim_hidden,
                        memory=self.topic_outputs_flat,
                        score_mask=self.score_mask,
                        hier_score=self.hier_score)
                    self.att_topic_cell = AttentionWrapper(
                        self.dec_cell,
                        attention_mechanism=self.att_topic_mechanism,
                        attention_layer_size=self.config.dim_hidden)
                    self.dec_topic_cell = self.att_topic_cell

                if not self.config.disc_mean:
                    self.dec_topic_initial_state = self.latent_hidden_layer(
                        self.latents_topic_posterior)
                    dec_topic_outputs, self.summary_sent_l_flat = decode_output_sample_flat(
                        self,
                        dec_cell=self.dec_topic_cell,
                        dec_initial_state=self.dec_topic_initial_state,
                        softmax_temperature=self.softmax_temperature,
                        sample=self.sample,
                        latents_input=self.latents_topic_posterior
                    )  # batch_l*max_doc_l x max_summary_sent_l x n_vocab
                else:
                    self.dec_topic_initial_state = self.latent_hidden_layer(
                        self.means_topic_posterior)
                    dec_topic_outputs, self.summary_sent_l_flat = decode_output_sample_flat(
                        self,
                        dec_cell=self.dec_topic_cell,
                        dec_initial_state=self.dec_topic_initial_state,
                        softmax_temperature=self.softmax_temperature,
                        sample=self.sample,
                        latents_input=self.means_topic_posterior
                    )  # batch_l*max_doc_l x max_summary_sent_l x n_vocab

                self.summary_sent_l = tf.reshape(
                    self.summary_sent_l_flat,
                    [self.batch_l, self.config.n_topic])
                self.max_summary_sent_l = tf.reduce_max(self.summary_sent_l)
                if self.config.disc_gumbel:
                    summary_input_idxs_flat = dec_topic_outputs.sample_id
                else:
                    summary_input_idxs_flat = dec_topic_outputs.rnn_output
                self.summary_input_idxs = tf.reshape(summary_input_idxs_flat, \
                                                     [self.batch_l, self.config.n_topic, self.max_summary_sent_l, self.config.n_vocab], name='summary_input_idxs')

                # re-encode topic sentence outputs
                self.summary_inputs = tf.tensordot(
                    self.summary_input_idxs, self.embeddings, axes=[[-1], [
                        0
                    ]])  # batch_l x n_topic x max_summary_sent_l x dim_emb
                self.summary_input_sent_l = self.summary_sent_l - 1  # to remove EOS
                self.mask_summary_sent = tf.sequence_mask(self.summary_input_sent_l, \
                                                          maxlen=self.max_summary_sent_l, dtype=tf.float32) # batch_l x n_topic x max_summary_sent_l
                self.mask_summary_doc = tf.ones(
                    [self.batch_l, self.config.n_topic], dtype=tf.float32)

            # beam decode for inference of original sentences
            with tf.variable_scope(
                    'sent',
                    initializer=tf.contrib.layers.xavier_initializer(),
                    dtype=tf.float32,
                    reuse=True):
                self.beam_dec_sent_cell = self.dec_cell
                if self.config.attention:
                    self.beam_sent_outputs_flat = tf.contrib.seq2seq.tile_batch(
                        self.sent_outputs_flat,
                        multiplier=self.config.beam_width)
                    self.beam_att_sent_l_flat = tf.contrib.seq2seq.tile_batch(
                        self.att_sent_l_flat,
                        multiplier=self.config.beam_width)
                    self.beam_att_sent_mechanism = tf.contrib.seq2seq.LuongAttention(
                        num_units=self.config.dim_hidden,
                        memory=self.beam_sent_outputs_flat,
                        memory_sequence_length=self.beam_att_sent_l_flat)
                    self.beam_dec_sent_cell = tf.contrib.seq2seq.AttentionWrapper(
                        self.beam_dec_sent_cell,
                        attention_mechanism=self.beam_att_sent_mechanism,
                        attention_layer_size=self.config.dim_hidden)

                # infer original sentences
                self.beam_output_idxs, _, _ = decode_beam_output_token_idxs(
                    self,
                    beam_dec_cell=self.beam_dec_sent_cell,
                    dec_initial_state=self.dec_sent_initial_state,
                    latents_input=self.means_sent_posterior,
                    name='beam_output_idxs')

            # beam decode for inference of topic sentences
            with tf.variable_scope(
                    'sent',
                    initializer=tf.contrib.layers.xavier_initializer(),
                    dtype=tf.float32,
                    reuse=True):
                self.beam_dec_topic_cell = self.dec_cell
                if self.config.attention:
                    self.beam_topic_outputs_flat = tf.contrib.seq2seq.tile_batch(
                        self.topic_outputs_flat,
                        multiplier=self.config.beam_width)
                    self.beam_score_mask = tf.contrib.seq2seq.tile_batch(
                        self.score_mask, multiplier=self.config.beam_width)
                    self.beam_hier_score = tf.contrib.seq2seq.tile_batch(
                        self.hier_score, multiplier=self.config.beam_width)
                    self.beam_att_topic_mechanism = HierarchicalAttention(
                        num_units=self.config.dim_hidden,
                        memory=self.beam_topic_outputs_flat,
                        score_mask=self.beam_score_mask,
                        hier_score=self.beam_hier_score)
                    self.beam_dec_topic_cell = AttentionWrapper(
                        self.beam_dec_topic_cell,
                        attention_mechanism=self.beam_att_topic_mechanism,
                        attention_layer_size=self.config.dim_hidden)

                # infer topic sentences
                self.beam_summary_idxs, _, _ = decode_beam_output_token_idxs(
                    self,
                    beam_dec_cell=self.beam_dec_topic_cell,
                    dec_initial_state=self.dec_topic_initial_state,
                    latents_input=self.latents_topic_posterior,
                    name='beam_summary_idxs')

                self.beam_mask_summary_sent = tf.logical_not(tf.equal(self.beam_summary_idxs, \
                                                                      self.config.EOS_IDX)) # batch_l x n_topic x max_summary_sent_l
                self.beam_summary_input_sent_l = tf.reduce_sum(
                    tf.cast(self.beam_mask_summary_sent, tf.int32),
                    -1)  # batch_l x n_topic
                beam_summary_soft_idxs = tf.one_hot(tf.where(self.beam_mask_summary_sent, \
                                                                            self.beam_summary_idxs, tf.zeros_like(self.beam_summary_idxs)), depth=self.config.n_vocab)
                self.beam_summary_inputs = tf.tensordot(beam_summary_soft_idxs, \
                                                        self.embeddings, [[-1], [0]]) # batch_l x n_topic x max_beam_summary_sent_l x dim_emb

        # ------------------------------Discriminator------------------------------
        # encode by MLP
        if self.config.enc == 'mlp':
            with tf.variable_scope('disc'):
                with tf.variable_scope('prob_topic', reuse=True):
                    self.summary_state = encode_states(self, enc_inputs=self.summary_inputs, mask_sent=self.mask_summary_sent, \
                                                                   enc_keep_prob=self.enc_keep_prob, config=self.config) # batch_l x n_topic x dim_hidden
        elif self.config.enc == 'bow':
            with tf.variable_scope('disc'):
                with tf.variable_scope('prob_topic', reuse=True):
                    self.bow_summary_input_idxs = tf.multiply(
                        self.summary_input_idxs, self.mask_bow)
                    self.bow_summary_inputs = tf.tensordot(
                        self.bow_summary_input_idxs,
                        self.embeddings,
                        axes=[[-1], [0]
                              ])  # batch_l x max_doc_l x max_sent_l x dim_emb
                    self.mask_summary_bow = tf.reduce_sum(
                        self.bow_summary_input_idxs, -1)
                    self.summary_state = encode_states(self, enc_inputs=self.bow_summary_inputs, mask_sent=self.mask_summary_bow, \
                                                                   enc_keep_prob=self.enc_keep_prob, config=self.config) # batch_l x max_doc_l x dim_hidden
        elif self.config.enc == 'rnn':
            with tf.variable_scope('emb'):
                with tf.variable_scope('sent', reuse=True):
                    _, self.summary_state = encode_inputs(
                        self,
                        enc_inputs=self.summary_inputs,
                        sent_l=self.summary_input_sent_l
                    )  # batch_l x max_doc_l x dim_hidden*2
                    _, self.beam_summary_state = encode_inputs(
                        self,
                        enc_inputs=self.beam_summary_inputs,
                        sent_l=self.beam_summary_input_sent_l
                    )  # batch_l x max_doc_l x dim_hidden*2

        with tf.variable_scope('disc'):
            with tf.variable_scope('prob_topic', reuse=True):
                self.probs_summary_topic_posterior, _, _ = \
                        encode_gsm_probs_topic_posterior(self, self.summary_state.get_shape()[-1], self.summary_state, self.mask_summary_doc, self.config)
                self.logits_summary_topic_posterior_ = tf_log(
                    tf.matrix_diag_part(self.probs_summary_topic_posterior)
                )  # batch_l x n_topic
                self.logits_summary_topic_posterior = tf_clip_vals(
                    self.logits_summary_topic_posterior_,
                    self.probs_topic_posterior)

        # ------------------------------Optimizer and Loss------------------------------
        with tf.name_scope('opt'):
            partition_doc = tf.cast(self.mask_doc, dtype=tf.int32)
            self.n_sents = tf.cast(tf.reduce_sum(self.doc_l), dtype=tf.float32)
            self.n_tokens = tf.reduce_sum(self.dec_sent_l)

            # ------------------------------Reconstruction Loss of Language Model------------------------------
            # target and mask
            self.dec_target_idxs = self.t_variables[
                'dec_target_idxs']  # batch_l x max_doc_l x max_dec_sent_l
            self.dec_sent_l = self.t_variables[
                'dec_sent_l']  # batch_l x max_doc_l
            self.max_dec_sent_l = tf.reduce_max(
                self.dec_sent_l)  # = max_sent_l + 1
            self.dec_mask_sent = tf.sequence_mask(self.dec_sent_l,
                                                  maxlen=self.max_dec_sent_l,
                                                  dtype=tf.float32)
            self.dec_target_idxs_flat = tf.reshape(
                self.dec_target_idxs,
                [self.batch_l * self.max_doc_l, self.max_dec_sent_l])
            self.dec_mask_sent_flat = tf.reshape(
                self.dec_mask_sent,
                [self.batch_l * self.max_doc_l, self.max_dec_sent_l])

            # nll for each token (summed over sentence)
            self.recon_max_sent_l = tf.minimum(
                self.max_dec_sent_l,
                self.max_output_sent_l) if self.config.sample else None
            losses_recon_flat = tf.reduce_sum(
                tf.contrib.seq2seq.sequence_loss(
                    self.output_logits_flat[:, :self.recon_max_sent_l, :],
                    self.dec_target_idxs_flat[:, :self.recon_max_sent_l],
                    self.dec_mask_sent_flat[:, :self.recon_max_sent_l],
                    average_across_timesteps=False,
                    average_across_batch=False), -1)  # batch_l*max_doc_l
            self.losses_recon = tf.reshape(losses_recon_flat,
                                           [self.batch_l, self.max_doc_l])
            self.loss_recon = tf.reduce_mean(
                tf.dynamic_partition(
                    self.losses_recon, partition_doc,
                    num_partitions=2)[1])  # average over doc x batch

            # ------------------------------KL divergence Loss of Topic Probability Distribution------------------------------
            if self.config.topic_model:
                self.probs_sent_topic_prior = tf.expand_dims(
                    self.probs_doc_topic_posterior, 1)  # batch_l x 1 x n_topic
            else:
                self.probs_sent_topic_prior = tf.ones_like(self.probs_sent_topic_posterior, dtype=tf.float32) / \
                                                        self.config.n_topic # batch_l x max_doc_l x n_topic, uniform distribution over topics
            self.losses_kl_prob = tf.reduce_sum(tf.multiply(self.probs_sent_topic_posterior, \
                                                            (tf_log(self.probs_sent_topic_posterior)-tf_log(self.probs_sent_topic_prior))), -1)
            self.loss_kl_prob = tf.reduce_mean(
                tf.dynamic_partition(
                    self.losses_kl_prob, partition_doc,
                    num_partitions=2)[1])  # average over doc x batch

            # ------------------------------KL divergence Loss of Sentence Latents Distribution------------------------------
            self.losses_kl_sent_gauss = compute_kl_losses_sent_gauss(
                self
            )  # batch_l x max_doc_l x n_topic, sum over latent dimension
            self.losses_kl_sent_gmm = tf.reduce_sum(
                tf.multiply(self.probs_sent_topic_posterior,
                            self.losses_kl_sent_gauss),
                -1)  # batch_l x max_doc_l, sum over topics
            self.loss_kl_sent_gmm = tf.reduce_mean(
                tf.dynamic_partition(
                    self.losses_kl_sent_gmm, partition_doc,
                    num_partitions=2)[1])  # average over doc x batch

            # ------------------------------KL divergence Loss of Topic Latents Distribution------------------------------
            if self.config.reverse_kl:
                self.losses_kl_topic_pairs_gauss = compute_kl_losses_topic_paris_gauss(
                    self)
                self.losses_kl_topic_gauss_reverse = tf.reduce_sum(self.losses_kl_topic_pairs_gauss * self.config.mask_tree[None, None, :, :], -1) / \
                                        np.maximum(np.sum(self.config.mask_tree[None, None, :, :], -1), 1) # batch_l x 1 x n_topic, mean over other child topics
                self.losses_kl_topic_gmm_reverse = tf.reduce_sum(
                    tf.multiply(self.probs_sent_topic_posterior,
                                self.losses_kl_topic_gauss_reverse),
                    -1)  # batch_l x max_doc_l, sum over topics
                self.loss_kl_topic_gmm_reverse = tf.reduce_mean(
                    tf.dynamic_partition(self.losses_kl_topic_gmm_reverse,
                                         partition_doc,
                                         num_partitions=2)[1])
            else:
                self.loss_kl_topic_gmm_reverse = tf.constant(0.,
                                                             dtype=tf.float32)

            # for monitor
            self.losses_kl_topic_gauss = compute_kl_losses_topic_gauss(
                self)  # batch_l x 1 x n_topic, sum over latent dimension
            self.losses_kl_topic_gmm = tf.reduce_sum(
                tf.multiply(self.probs_sent_topic_posterior,
                            self.losses_kl_topic_gauss),
                -1)  # batch_l x max_doc_l, sum over topics
            self.loss_kl_topic_gmm = tf.reduce_mean(
                tf.dynamic_partition(self.losses_kl_topic_gmm,
                                     partition_doc,
                                     num_partitions=2)[1])

            # ------------------------------KL divergence Loss of Root State Distribution------------------------------
            if self.config.prior_root:
                self.losses_kl_root = compute_kl_losses(
                    self.means_state_root_posterior,
                    self.logvars_state_root_posterior)  # batch_l x max_doc_l
                self.loss_kl_root = tf.reduce_sum(
                    self.losses_kl_root) / tf.cast(
                        tf.reduce_sum(self.doc_l),
                        dtype=tf.float32)  # average over doc x batch
            else:
                self.loss_kl_root = tf.constant(0, dtype=tf.float32)

            # ------------------------------Discriminator Loss------------------------------
            if self.config.disc_topic:
                self.losses_disc_topic = -tf.reduce_sum(
                    self.logits_summary_topic_posterior,
                    -1)  # batch_l, sum over topic
                self.loss_disc_topic = tf.reduce_sum(
                    self.losses_disc_topic
                ) / self.n_sents  # average over doc x batch
            else:
                self.loss_disc_topic = tf.constant(0, dtype=tf.float32)

            # ------------------------------Loss of Topic Model------------------------------
            if self.config.topic_model:
                # recon
                self.topic_losses_recon = -tf.reduce_sum(
                    tf.multiply(self.t_variables['doc_bows'], self.logits_bow),
                    -1)  # n_batch, sum over n_bow
                self.topic_loss_recon = tf.reduce_mean(
                    self.topic_losses_recon)  # average over doc x batch

                # kl_bow
                self.means_topic_bow_prior = tf.squeeze(get_params_topic_prior(self, tf.expand_dims(self.means_topic_bow_posterior, 0), \
                                                                    tf.zeros([1, self.config.dim_latent], dtype=tf.float32)), 0) # n_topic x dim_latent
                self.logvars_topic_bow_prior = tf.squeeze(get_params_topic_prior(self, tf.expand_dims(self.logvars_topic_bow_posterior, 0), \
                                                                                tf.zeros([1, self.config.dim_latent], dtype=tf.float32)), 0) # n_topic x dim_latent
                self.topic_losses_kl_bow = compute_kl_losses(self.means_topic_bow_posterior, self.logvars_topic_bow_posterior, \
                                                                            means_prior=self.means_topic_bow_prior, logvars_prior=self.logvars_topic_bow_prior) # n_topic
                self.topic_loss_kl_bow = tf.reduce_mean(
                    self.topic_losses_kl_bow)  # average over doc x batch

                # kl_prob
                self.topic_losses_kl_prob = compute_kl_losses(
                    self.means_probs_doc_topic_posterior,
                    self.logvars_probs_doc_topic_posterior)  # batch_l
                self.topic_loss_kl_prob = tf.reduce_mean(
                    self.topic_losses_kl_prob)  # average over doc x batch
            else:
                self.topic_loss_recon = tf.constant(0, dtype=tf.float32)
                self.topic_loss_kl_bow = tf.constant(0, dtype=tf.float32)
                self.topic_loss_kl_prob = tf.constant(0, dtype=tf.float32)

            # ------------------------------Topic Regularization Loss------------------------------
            if self.config.reg != '':
                if self.config.reg == 'mean':
                    self.topic_dots = self.get_topic_dots(
                        self.means_topic_posterior
                    )  # batch_l x n_topic-1 x n_topic-1
                elif self.config.reg == 'bow':
                    self.topic_dots = self.get_topic_dots(
                        tf.expand_dims(
                            self.topic_bow,
                            0))  # batch_l(=1) x n_topic-1 x n_topic-1

                self.losses_reg = tf.reduce_sum(tf.square(self.topic_dots - tf.eye(len(self.config.all_child_idxs))) * self.config.mask_tree_reg, [1, 2])\
                                        / tf.reduce_sum(self.config.mask_tree_reg) # batch_l
                self.loss_reg = tf.reduce_mean(
                    self.losses_reg)  # average over batch
            else:
                self.loss_reg = tf.constant(0, dtype=tf.float32)

            # ------------------------------Optimizer------------------------------
            if self.config.anneal == 'linear':
                self.tau = tf.cast(tf.divide(
                    self.global_step, tf.constant(self.config.linear_steps)),
                                   dtype=tf.float32)
                self.beta = tf.minimum(1., self.config.beta_init + self.tau)
            elif self.config.anneal == 'cycle':
                self.tau = tf.cast(tf.divide(
                    tf.mod(self.global_step,
                           tf.constant(self.config.cycle_steps)),
                    tf.constant(self.config.cycle_steps)),
                                   dtype=tf.float32)
                self.beta = tf.minimum(
                    1., self.config.beta_init + self.tau /
                    (1. - self.config.r_cycle))
            else:
                self.beta = tf.constant(1.)

            self.beta_disc = self.beta if self.config.beta_disc else tf.constant(
                1.)

            def get_opt(loss, var_list, lr, global_step=None):
                if self.config.opt == 'adam':
                    Optimizer = tf.train.AdamOptimizer
                elif self.config.opt == 'adagrad':
                    Optimizer = tf.train.AdagradOptimizer

                optimizer = Optimizer(lr)
                grad_vars = optimizer.compute_gradients(loss=loss,
                                                        var_list=var_list)
                clipped_grad_vars = [
                    (tf.clip_by_value(grad, -self.config.grad_clip,
                                      self.config.grad_clip), var)
                    for grad, var in grad_vars if grad is not None
                ]
                opt = optimizer.apply_gradients(clipped_grad_vars,
                                                global_step=global_step)
                return opt, grad_vars, clipped_grad_vars

            # ------------------------------Loss Setting------------------------------
            if self.config.turn:
                self.loss = self.loss_recon + \
                             self.beta * tf.maximum(tf.maximum(self.loss_kl_sent_gmm, self.config.capacity_gmm) \
                                                            - self.loss_kl_topic_gmm_reverse, self.config.margin_gmm) + \
                             self.beta * self.loss_kl_root + \
                             self.topic_loss_recon + \
                             self.beta * self.topic_loss_kl_bow + \
                             self.beta * self.topic_loss_kl_prob + \
                             self.config.lam_reg * self.loss_reg

                self.opt, self.grad_vars, self.clipped_grad_vars = \
                    get_opt(self.loss, var_list=list(tf.trainable_variables('emb') + tf.trainable_variables('enc') + tf.trainable_variables('dec')), \
                                lr=self.config.lr, global_step=self.global_step)

                self.loss_disc = self.beta_disc * self.config.lam_disc * self.loss_disc_topic + \
                                    self.beta * tf.maximum(self.loss_kl_prob, self.config.capacity_prob)

                self.opt_disc, self.grad_vars_disc, self.clipped_grad_vars_disc = \
                    get_opt(self.loss_disc, var_list=list(tf.trainable_variables('emb') + tf.trainable_variables('disc')), lr=self.config.lr_disc)

            else:
                self.loss = self.loss_recon + \
                             self.beta * tf.maximum(tf.maximum(self.loss_kl_sent_gmm, self.config.capacity_gmm) \
                                                            - self.loss_kl_topic_gmm_reverse, self.config.margin_gmm) + \
                             self.beta * self.loss_kl_root + \
                             self.topic_loss_recon + \
                             self.beta * self.topic_loss_kl_bow + \
                             self.beta * self.topic_loss_kl_prob + \
                             self.beta_disc * self.config.lam_disc * self.loss_disc_topic + \
                             self.beta * tf.maximum(self.loss_kl_prob, self.config.capacity_prob) + \
                             self.config.lam_reg * self.loss_reg
                self.loss_disc = tf.constant(0, dtype=tf.float32)

                self.opt, self.grad_vars, self.clipped_grad_vars = \
                    get_opt(self.loss, var_list=tf.trainable_variables(), lr=self.config.lr, global_step=self.global_step)
                self.opt_disc = tf.constant(0, dtype=tf.float32)

            # ------------------------------Evaluatiion------------------------------
            self.loss_list_train = [self.loss, self.loss_disc, self.loss_recon, self.loss_kl_prob, self.loss_kl_sent_gmm, self.loss_kl_topic_gmm_reverse, \
                self.loss_kl_root, self.loss_disc_topic, self.topic_loss_recon, self.topic_loss_kl_bow, self.topic_loss_kl_prob, self.loss_reg, tf.constant(0)]
            self.loss_list_eval = [self.loss, self.loss_disc, self.loss_recon, self.loss_kl_prob, self.loss_kl_sent_gmm, self.loss_kl_topic_gmm_reverse, \
                self.loss_kl_root, self.loss_disc_topic, self.topic_loss_recon, self.topic_loss_kl_bow, self.topic_loss_kl_prob, self.loss_reg, self.loss_kl_topic_gmm]
            self.loss_sum = (self.loss_recon + self.loss_kl_prob + self.loss_kl_sent_gmm + self.loss_kl_root + self.loss_disc_topic + \
                                 self.topic_loss_recon + self.topic_loss_kl_bow + self.topic_loss_kl_prob) * self.n_sents
Beispiel #57
0
  def _sample_n(self, n, seed=None):
    if self._use_static_graph:
      # This sampling approach is almost the same as the approach used by
      # `MixtureSameFamily`. The differences are due to having a list of
      # `Distribution` objects rather than a single object, and maintaining
      # random seed management that is consistent with the non-static code path.
      samples = []
      cat_samples = self.cat.sample(n, seed=seed)
      for c in range(self.num_components):
        seed = distribution_util.gen_new_seed(seed, "mixture")
        samples.append(self.components[c].sample(n, seed=seed))
      x = tf.stack(samples, -self._static_event_shape.ndims - 1)  # [n, B, k, E]
      npdt = x.dtype.as_numpy_dtype
      mask = tf.one_hot(
          indices=cat_samples,  # [n, B]
          depth=self._num_components,  # == k
          on_value=np.ones([], dtype=npdt),
          off_value=np.zeros([], dtype=npdt))  # [n, B, k]
      mask = distribution_utils.pad_mixture_dimensions(
          mask, self, self._cat,
          self._static_event_shape.ndims)                   # [n, B, k, [1]*e]
      return tf.reduce_sum(
          x * mask, axis=-1 - self._static_event_shape.ndims)  # [n, B, E]

    with tf.control_dependencies(self._assertions):
      n = tf.convert_to_tensor(n, name="n")
      static_n = tensor_util.constant_value(n)
      n = int(static_n) if static_n is not None else n
      cat_samples = self.cat.sample(n, seed=seed)

      static_samples_shape = cat_samples.get_shape()
      if static_samples_shape.is_fully_defined():
        samples_shape = static_samples_shape.as_list()
        samples_size = static_samples_shape.num_elements()
      else:
        samples_shape = tf.shape(cat_samples)
        samples_size = tf.size(cat_samples)
      static_batch_shape = self.batch_shape
      if static_batch_shape.is_fully_defined():
        batch_shape = static_batch_shape.as_list()
        batch_size = static_batch_shape.num_elements()
      else:
        batch_shape = self.batch_shape_tensor()
        batch_size = tf.reduce_prod(batch_shape)
      static_event_shape = self.event_shape
      if static_event_shape.is_fully_defined():
        event_shape = np.array(static_event_shape.as_list(), dtype=np.int32)
      else:
        event_shape = self.event_shape_tensor()

      # Get indices into the raw cat sampling tensor. We will
      # need these to stitch sample values back out after sampling
      # within the component partitions.
      samples_raw_indices = tf.reshape(tf.range(0, samples_size), samples_shape)

      # Partition the raw indices so that we can use
      # dynamic_stitch later to reconstruct the samples from the
      # known partitions.
      partitioned_samples_indices = tf.dynamic_partition(
          data=samples_raw_indices,
          partitions=cat_samples,
          num_partitions=self.num_components)

      # Copy the batch indices n times, as we will need to know
      # these to pull out the appropriate rows within the
      # component partitions.
      batch_raw_indices = tf.reshape(
          tf.tile(tf.range(0, batch_size), [n]), samples_shape)

      # Explanation of the dynamic partitioning below:
      #   batch indices are i.e., [0, 1, 0, 1, 0, 1]
      # Suppose partitions are:
      #     [1 1 0 0 1 1]
      # After partitioning, batch indices are cut as:
      #     [batch_indices[x] for x in 2, 3]
      #     [batch_indices[x] for x in 0, 1, 4, 5]
      # i.e.
      #     [1 1] and [0 0 0 0]
      # Now we sample n=2 from part 0 and n=4 from part 1.
      # For part 0 we want samples from batch entries 1, 1 (samples 0, 1),
      # and for part 1 we want samples from batch entries 0, 0, 0, 0
      #   (samples 0, 1, 2, 3).
      partitioned_batch_indices = tf.dynamic_partition(
          data=batch_raw_indices,
          partitions=cat_samples,
          num_partitions=self.num_components)
      samples_class = [None for _ in range(self.num_components)]

      for c in range(self.num_components):
        n_class = tf.size(partitioned_samples_indices[c])
        seed = distribution_util.gen_new_seed(seed, "mixture")
        samples_class_c = self.components[c].sample(n_class, seed=seed)

        # Pull out the correct batch entries from each index.
        # To do this, we may have to flatten the batch shape.

        # For sample s, batch element b of component c, we get the
        # partitioned batch indices from
        # partitioned_batch_indices[c]; and shift each element by
        # the sample index. The final lookup can be thought of as
        # a matrix gather along locations (s, b) in
        # samples_class_c where the n_class rows correspond to
        # samples within this component and the batch_size columns
        # correspond to batch elements within the component.
        #
        # Thus the lookup index is
        #   lookup[c, i] = batch_size * s[i] + b[c, i]
        # for i = 0 ... n_class[c] - 1.
        lookup_partitioned_batch_indices = (
            batch_size * tf.range(n_class) + partitioned_batch_indices[c])
        samples_class_c = tf.reshape(
            samples_class_c, tf.concat([[n_class * batch_size], event_shape],
                                       0))
        samples_class_c = tf.gather(
            samples_class_c,
            lookup_partitioned_batch_indices,
            name="samples_class_c_gather")
        samples_class[c] = samples_class_c

      # Stitch back together the samples across the components.
      lhs_flat_ret = tf.dynamic_stitch(
          indices=partitioned_samples_indices, data=samples_class)
      # Reshape back to proper sample, batch, and event shape.
      ret = tf.reshape(
          lhs_flat_ret, tf.concat(
              [samples_shape, self.event_shape_tensor()], 0))
      ret.set_shape(
          tf.TensorShape(static_samples_shape).concatenate(self.event_shape))
      return ret
Beispiel #58
0
def generate_dynamic_mask(inputs, lengths, present_rate, mask_id, boa_id,
                          eoa_id, pad_id, partition_num):
    def _fill_mask(inputs, lengths, present_rate, eoa_id, pad_id,
                   partition_num):
        """
        The input batch has the same mask pattern, randoms through max_seq_length in lengths.
        :param inputs:
        :param lengths:
        :param present_rate:
        :return: answers: a tensor of shape [batch_size, sum(unfixed_answer_len for each ans)]
        start_pos and end_pos marks out ranges for answers
        """
        def _fill_mask_py_func(inputs, lengths, present_rate, eoa_id, pad_id,
                               partition_num):
            # TODO(wanrong): bound check
            def _get_split_pos(masked_num):
                # split masked_num into partition_num segments
                if masked_num <= 1:
                    return [1] * (partition_num - 1)

                splitted = np.array_split(range(masked_num), partition_num)
                split_positions = [a.size for a in splitted]
                for i in range(1, partition_num):
                    split_positions[i] += split_positions[i - 1]
                return np.insert(split_positions, 0, 0, axis=0)

            batch_size = inputs.shape[0]
            masked_nums = ((lengths - 2) * (1 - present_rate)).astype(
                np.int64)  # [batch_size]
            split_positions = \
                [_get_split_pos(masked_num) for masked_num in masked_nums]  # [batch_size, partition_num+1]

            # calculate the length of each mask segment
            mask_lengths = np.zeros(shape=(batch_size, partition_num),
                                    dtype=np.int64)
            left_len = np.zeros(shape=(batch_size, partition_num + 1),
                                dtype=np.int64)  # add a -1 at the end
            for bid, split_position in enumerate(split_positions):
                for idx, (prev, cur) in enumerate(
                        zip(split_position[:-1], split_position[1:])):
                    mask_lengths[bid][idx] = cur - prev
                left_len[bid][-1] = 0  # leave <EOS> unmasked
                for idx, cur_len in reversed(list(enumerate(
                        mask_lengths[bid]))):
                    left_len[bid][idx] = left_len[bid][idx + 1] + cur_len + 1
            left_len = left_len[:, :-1]  # remove last column

            # splitting
            start_positions = np.zeros(shape=(batch_size, 1))
            end_positions = np.zeros(shape=(batch_size, 1))
            answers = np.zeros((batch_size, 0))
            partitions = np.array([])
            masks = np.full_like(inputs, 0)
            after_pad_ans_lens = np.zeros(shape=partition_num)
            boa = np.full(shape=(batch_size, 1), fill_value=boa_id)
            for i in range(1, partition_num + 1):
                idx = i - 1  # ignore padding 0 in start/end_positions
                # get start and end position for current mask
                cur_start_pos = np.zeros(shape=(batch_size, 1), dtype=np.int64)
                cur_end_pos = np.zeros(shape=(batch_size, 1), dtype=np.int64)
                cur_answers = []
                for bid in range(batch_size):
                    s = end_positions[bid][idx] + 1
                    e = lengths[bid] - left_len[bid][idx] + 1
                    cur_start_pos[bid][0] = s + (e - s) / (partition_num + 1)
                    cur_end_pos[bid][
                        0] = cur_start_pos[bid][0] + mask_lengths[bid][idx]
                    cur_answers.append(
                        np.append(
                            inputs[bid]
                            [cur_start_pos[bid][0]:cur_end_pos[bid][0]],
                            eoa_id))
                    # update mask
                    for j in range(cur_start_pos[bid][0], cur_end_pos[bid][0]):
                        masks[bid][j] = 1  # set masked element to 1
                start_positions = np.concatenate(
                    (start_positions, cur_start_pos), axis=1)
                end_positions = np.concatenate((end_positions, cur_end_pos),
                                               axis=1)

                # pad cur_answers to same length
                cur_padded_ans, cur_max_len = _pad_array_list(
                    cur_answers, mask_lengths[:, idx], pad_id)
                cur_padded_ans = np.concatenate((boa, cur_padded_ans), axis=1)
                after_pad_ans_lens[idx] = cur_max_len
                answers = np.concatenate((answers, cur_padded_ans), axis=1)

                # generate current partition index
                cur_idx = np.full_like(cur_padded_ans[0], idx)
                partitions = np.concatenate((partitions, cur_idx), axis=0)

            return masks, start_positions[:, 1:].astype(np.int64),\
                   end_positions[:, 1:].astype(np.int64),\
                   answers.astype(np.int64), after_pad_ans_lens.astype(np.int64), \
                   mask_lengths.astype(np.int32), partitions.astype(np.int32)

        eoa_id = tf.Variable(eoa_id, dtype=tf.int64)
        present_rate = tf.Variable(present_rate, dtype=tf.float32)
        partition_num = tf.Variable(partition_num, dtype=tf.int64)
        return tf.py_func(
            _fill_mask_py_func,
            [inputs, lengths, present_rate, eoa_id, pad_id, partition_num], [
                tf.int64, tf.int64, tf.int64, tf.int64, tf.int64, tf.int32,
                tf.int32
            ])

    masks, start_positions, end_positions, answers, after_pad_ans_lens, true_ans_lens, partitions = \
        _fill_mask(inputs, lengths, present_rate, eoa_id, pad_id, partition_num)
    answers = tf.dynamic_partition(
        data=tf.transpose(answers, perm=[1, 0]),  # [sum(lens), batch_size]
        partitions=partitions,
        num_partitions=partition_num)
    answers = [tf.transpose(ans, perm=[1, 0]) for ans in answers]
    mask_id = tf.Variable(mask_id, dtype=tf.int64)
    pad_id = tf.Variable(pad_id, dtype=tf.int64)
    templates, template_masks = \
        _prepare_squeezed_template(inputs, masks, start_positions, end_positions, mask_id, pad_id)

    return masks, answers, after_pad_ans_lens, true_ans_lens, templates, template_masks, \
           start_positions, end_positions
centroids = tf.Variable(tf.gather(vector_values, centroid_indices))
expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroids = tf.expand_dims(centroids, 1)

vectors_subtration = tf.sub(expanded_vectors,expanded_centroids)
euclidean_distances = \
		tf.reduce_sum(tf.square(vectors_subtration), 2)

assignments = tf.to_int32(tf.argmin(euclidean_distances, 0))

partitions = [0, 0, 1, 1, 0]
num_partitions = 2
data = [10, 20, 30, 40, 50]
outputs[0] = [10, 20, 50]
outputs[1] = [30, 40]
partitions = tf.dynamic_partition(vectors, assignments, num_clusters)

update_centroids = tf.concat(0, \
                             [tf.expand_dims\
                              (tf.reduce_mean(partition, 0), 0)\
                              for partition in partitions])


init_op = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init_op)
for step in xrange(num_steps):
   _, centroid_values, assignment_values =\
      sess.run([update_centroids,\
                centroids,\
Beispiel #60
0
    def step_m(self, x):

        #reshape input
        input = tf.concat([x, tf.reshape(self.read_vecs, [1, self.num_heads*self.word_size])],1)

        #forward propagation
        l1_out = tf.matmul(input, self.W1) + self.b1
        l1_act = tf.nn.tanh(l1_out)
        l2_out = tf.matmul(l1_act, self.W2) + self.b2
        l2_act = tf.nn.tanh(l2_out)

        #output vector
        self.nn_out = tf.matmul(l2_act, self.nn_out_weights) #(1*eta+Y, eta+Y*Y)->(1*Y)
        #interaction vector - how to interact with memory
        self.interface_vec = tf.matmul(l2_act, self.interface_weights) #(1*eta+Y, eta+Y*eta)->(1*eta)


        partition = tf.constant([[0]*(self.num_heads*self.word_size) + [1]*(self.num_heads) + [2]*(self.word_size) + [3] + \
                    [4]*(self.word_size) + [5]*(self.word_size) + \
                    [6]*(self.num_heads) + [7] + [8] + [9]*(self.num_heads*3)], dtype=tf.int32)

        #convert interface vector into a set of read write vectors
        #using tf.dynamic_partitions(Partitions interface_vec into 10 tensors using indices from partition)
        (read_keys, read_str, write_key, write_str,
         erase_vec, write_vec, free_gates, alloc_gate, write_gate, read_modes) = \
            tf.dynamic_partition(self.interface_vec, partition, 10)

        #read vectors
        read_keys = tf.reshape(read_keys,[self.num_heads, self.word_size]) #R*W
        read_str = 1 + tf.nn.softplus(tf.expand_dims(read_str, 0)) #1*R

        #write vectors
        write_key = tf.expand_dims(write_key, 0) #1*W
        #help init our write weights
        write_str = 1 + tf.nn.softplus(tf.expand_dims(write_str, 0)) #1*1
        erase_vec = tf.nn.sigmoid(tf.expand_dims(erase_vec, 0)) #1*W
        write_vec = tf.expand_dims(write_vec, 0) #1*W

        #the degree to which locations at read heads will be freed
        free_gates = tf.nn.sigmoid(tf.expand_dims(free_gates, 0)) #1*R
        #the fraction of writing that is being allocated in a new location
        alloc_gate = tf.nn.sigmoid(alloc_gate) #1
        #the amount of information to be written to memory
        write_gate = tf.nn.sigmoid(write_gate) #1
        #the softmax distribution between the three read modes (backward, forward, lookup)
        #The read heads can use gates called read modes to switch between content lookup
        #using a read key and reading out locations either forwards or backwards
        #in the order they were written.
        read_modes = tf.nn.softmax(tf.reshape(read_modes, [3, self.num_heads])) #3*R

        #used to calculate usage vector, what's available to write to?
        retention_vec = tf.reduce_prod(1-free_gates*self.read_weights, reduction_indices=1)
        #used to dynamically allocate memory
        self.usage_vec = (self.usage_vec + self.write_weights - self.usage_vec * self.write_weights) * retention_vec

        ##retreives the writing allocation weighting
        alloc_weights = self.allocation_weighting() #N*1
        #where to write to??
        write_lookup_weights = self.content_lookup(write_key, write_str) #N*1
        #define our write weights now that we know how much space to allocate for them and where to write to
        self.write_weights = write_gate*(alloc_gate*alloc_weights + (1-alloc_gate)*write_lookup_weights)

        #write erase, then write to memory!
        self.mem_mat = self.mem_mat*(1-tf.matmul(self.write_weights, erase_vec)) + \
                       tf.matmul(self.write_weights, write_vec)

        #As well as writing, the controller can read from multiple locations in memory.
        #Memory can be searched based on the content of each location, or the associative
        #temporal links can be followed forward and backward to recall information written
        #in sequence or in reverse. (3rd attention mechanism)

        #updates and returns the temporal link matrix for the latest write
        #given the precedence vector and the link matrix from previous step
        nnweight_vec = tf.matmul(self.write_weights, tf.ones([1,self.num_words])) #N*N
        self.link_mat = (1 - nnweight_vec - tf.transpose(nnweight_vec))*self.link_mat + \
                        tf.matmul(self.write_weights, self.precedence_weight, transpose_b=True)
        self.link_mat *= tf.ones([self.num_words, self.num_words]) - tf.constant(np.identity(self.num_words, dtype=np.float32))


        self.precedence_weight = (1-tf.reduce_sum(self.write_weights, reduction_indices=0)) * \
                                 self.precedence_weight + self.write_weights
        #3 modes - forward, backward, content lookup
        forw_w = read_modes[2]*tf.matmul(self.link_mat, self.read_weights) #(N*N,N*R)->N*R
        look_w = read_modes[1]*self.content_lookup(read_keys, read_str) #N*R
        back_w = read_modes[0]*tf.matmul(self.link_mat, self.read_weights, transpose_a=True) #N*R

        #use them to intiialize read weights
        self.read_weights = back_w + look_w + forw_w #N*R
        #create read vectors by applying read weights to memory matrix
        self.read_vecs = tf.transpose(tf.matmul(self.mem_mat, self.read_weights, transpose_a=True)) #(W*N,N*R)^T->R*W

        #multiply them together
        read_vec_mut = tf.matmul(tf.reshape(self.read_vecs, [1, self.num_heads * self.word_size]),
                                 self.read_vecs_out_weight)  # (1*RW, RW*Y)-> (1*Y)

        #return output + read vecs product
        return self.nn_out+read_vec_mut