def get_box3d_corners_helper(centers, headings, sizes): """ TF layer. Input: (N,3), (N,), (N,3), Output: (N,8,3) """ #print '-----', centers N = centers.get_shape()[0].value l = tf.slice(sizes, [0,0], [-1,1]) # (N,1) w = tf.slice(sizes, [0,1], [-1,1]) # (N,1) h = tf.slice(sizes, [0,2], [-1,1]) # (N,1) #print l,w,h x_corners = tf.concat([l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], axis=1) # (N,8) y_corners = tf.concat([h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], axis=1) # (N,8) z_corners = tf.concat([w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2], axis=1) # (N,8) corners = tf.concat([tf.expand_dims(x_corners,1), tf.expand_dims(y_corners,1), tf.expand_dims(z_corners,1)], axis=1) # (N,3,8) #print x_corners, y_corners, z_corners c = tf.cos(headings) s = tf.sin(headings) ones = tf.ones([N], dtype=tf.float32) zeros = tf.zeros([N], dtype=tf.float32) row1 = tf.stack([c,zeros,s], axis=1) # (N,3) row2 = tf.stack([zeros,ones,zeros], axis=1) row3 = tf.stack([-s,zeros,c], axis=1) R = tf.concat([tf.expand_dims(row1,1), tf.expand_dims(row2,1), tf.expand_dims(row3,1)], axis=1) # (N,3,3) #print row1, row2, row3, R, N corners_3d = tf.matmul(R, corners) # (N,3,8) corners_3d += tf.tile(tf.expand_dims(centers,2), [1,1,8]) # (N,3,8) corners_3d = tf.transpose(corners_3d, perm=[0,2,1]) # (N,8,3) return corners_3d
def get_log_rhos(target_action_log_probs, behaviour_action_log_probs): """With the selected log_probs for multi-discrete actions of behaviour and target policies we compute the log_rhos for calculating the vtrace.""" t = tf.stack(target_action_log_probs) b = tf.stack(behaviour_action_log_probs) log_rhos = tf.reduce_sum(t - b, axis=0) return log_rhos
def generate_system(self, eqs): resolve = lambda eq: eval(eq, self.env) if type(eq) is str else eq self.eqn_exp = {nm: resolve(eq) for (nm, eq) in eqs.items()} # equations system self.eqn_vec = tf.concat(list(self.eqn_exp.values()), 0) self.eqn_spec = {nm: int(eq.get_shape()[0]) for (nm, eq) in self.eqn_exp.items()} self.eqn_sz = sum(self.eqn_spec.values()) # find gradients n_eqns = self.eqn_vec.get_shape()[0] eqn_list = tf.split(self.eqn_vec, n_eqns) self.par_jac = tf.stack([tf.gradients(eqn, self.par_vec)[0] for eqn in eqn_list]) self.var_jac = tf.stack([tf.gradients(eqn, self.var_vec)[0] for eqn in eqn_list]) # create functions def state_evaler(f, matrix=False): def ev(p, v): y = f.eval({self.par_vec: p, self.var_vec: v}) return ensure_matrix(y) if matrix else y return ev self.eqn_fun = state_evaler(self.eqn_vec) self.parjac_fun = state_evaler(self.par_jac, matrix=True) self.varjac_fun = state_evaler(self.var_jac, matrix=True)
def create_tensor(self, in_layers=None, set_tensors=True, **kwargs): """ Generate Radial Symmetry Function """ if in_layers is None: in_layers = self.in_layers in_layers = convert_to_layers(in_layers) self.build() d_cutoff = in_layers[0].out_tensor d = in_layers[1].out_tensor if self.atomic_number_differentiated: atom_numbers = in_layers[2].out_tensor atom_number_embedded = tf.nn.embedding_lookup(self.atom_number_embedding, atom_numbers) d_cutoff = tf.stack([d_cutoff] * self.length, axis=3) d = tf.stack([d] * self.length, axis=3) Rs = tf.reshape(self.Rs, (1, 1, 1, -1)) ita = tf.reshape(self.ita, (1, 1, 1, -1)) out_tensor = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff if self.atomic_number_differentiated: out_tensors = [] for atom_type in self.atom_number_cases: selected_atoms = tf.expand_dims( tf.expand_dims(atom_number_embedded[:, :, atom_type], axis=1), axis=3) out_tensors.append(tf.reduce_sum(out_tensor * selected_atoms, axis=2)) self.out_tensor = tf.concat(out_tensors, axis=2) else: self.out_tensor = tf.reduce_sum(out_tensor, axis=2)
def iou(self, boxes1, boxes2): """calculate ious Args: boxes1: 4-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4] ====> (x_center, y_center, w, h) boxes2: 1-D tensor [4] ===> (x_center, y_center, w, h) Return: iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] """ boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2, boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2]) boxes1 = tf.transpose(boxes1, [1, 2, 3, 0]) boxes2 = tf.stack([boxes2[0] - boxes2[2] / 2, boxes2[1] - boxes2[3] / 2, boxes2[0] + boxes2[2] / 2, boxes2[1] + boxes2[3] / 2]) #calculate the left up point lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2]) rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:]) #intersection intersection = rd - lu inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1] mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32) inter_square = mask * inter_square #calculate the boxs1 square and boxs2 square square1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1]) square2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1]) return inter_square/(square1 + square2 - inter_square + 1e-6)
def bboxes_resize(bbox_ref, bboxes, name=None): """Resize bounding boxes based on a reference bounding box, assuming that the latter is [0, 0, 1, 1] after transform. Useful for updating a collection of boxes after cropping an image. """ # Bboxes is dictionary. if isinstance(bboxes, dict): with tf.name_scope(name, 'bboxes_resize_dict'): d_bboxes = {} for c in bboxes.keys(): d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c]) return d_bboxes # Tensors inputs. with tf.name_scope(name, 'bboxes_resize'): # Translate. v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]]) bboxes = bboxes - v # Scale. s = tf.stack([bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1], bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]) bboxes = bboxes / s return bboxes
def _summarize_input(self, groundtruth_boxes_list, match_list): """Creates tensorflow summaries for the input boxes and anchors. This function creates four summaries corresponding to the average number (over images in a batch) of (1) groundtruth boxes, (2) anchors marked as positive, (3) anchors marked as negative, and (4) anchors marked as ignored. Args: groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4] containing corners of the groundtruth boxes. match_list: a list of matcher.Match objects encoding the match between anchors and groundtruth boxes for each image of the batch, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. """ num_boxes_per_image = tf.stack( [tf.shape(x)[0] for x in groundtruth_boxes_list]) pos_anchors_per_image = tf.stack( [match.num_matched_columns() for match in match_list]) neg_anchors_per_image = tf.stack( [match.num_unmatched_columns() for match in match_list]) ignored_anchors_per_image = tf.stack( [match.num_ignored_columns() for match in match_list]) tf.summary.scalar('Input/AvgNumGroundtruthBoxesPerImage', tf.reduce_mean(tf.to_float(num_boxes_per_image))) tf.summary.scalar('Input/AvgNumPositiveAnchorsPerImage', tf.reduce_mean(tf.to_float(pos_anchors_per_image))) tf.summary.scalar('Input/AvgNumNegativeAnchorsPerImage', tf.reduce_mean(tf.to_float(neg_anchors_per_image))) tf.summary.scalar('Input/AvgNumIgnoredAnchorsPerImage', tf.reduce_mean(tf.to_float(ignored_anchors_per_image)))
def hard_negative_mining(): bboxes_per_batch = tf.unstack(bboxes) classification_loss_per_batch = tf.unstack(classification_loss) num_positives_per_batch = tf.unstack(tf.reduce_sum(positives, axis=-1)) neg_class_loss_per_batch = tf.unstack(neg_class_loss_all) neg_class_losses = [] total_negatives = [] for bboxes_per_image, classification_loss_per_image, num_positives_per_image, neg_class_loss_per_image in \ zip(bboxes_per_batch, classification_loss_per_batch, num_positives_per_batch, neg_class_loss_per_batch): min_negatives_keep = tf.maximum(self.neg_pos_ratio * num_positives_per_image, 3) num_negatives_keep = tf.minimum(min_negatives_keep, tf.count_nonzero(neg_class_loss_per_image, dtype=tf.float32)) indices = tf.image.non_max_suppression(bboxes_per_image, classification_loss_per_image, tf.to_int32(num_negatives_keep), iou_threshold=0.99) num_negatives = tf.size(indices) total_negatives.append(num_negatives) expanded_indexes = tf.expand_dims(indices, axis=1) # shape: (num_negatives, 1) negatives_keep = tf.scatter_nd(expanded_indexes, updates=tf.ones_like(indices, dtype=tf.int32), shape=tf.shape(classification_loss_per_image)) # shape: (num_priors,) negatives_keep = tf.to_float(tf.reshape(negatives_keep, [num_priors])) # shape: (batch_size, num_priors) neg_class_losses.append(tf.reduce_sum(classification_loss_per_image * negatives_keep, axis=-1)) # shape: (1,) return tf.stack(neg_class_losses), tf.reduce_sum(tf.stack(total_negatives))
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) err = self.Y - self.mean_function(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def _my_top_k(x, k): """GPU-compatible version of top-k that works for very small constant k. Calls argmax repeatedly. tf.nn.top_k is implemented for GPU, but the gradient, sparse_to_dense, seems not to be, so if we use tf.nn.top_k, then both the top_k and its gradient go on cpu. Once this is not an issue, this function becomes obselete and should be replaced by tf.nn.top_k. Args: x: a 2d Tensor. k: a small integer. Returns: values: a Tensor of shape [batch_size, k] indices: a int32 Tensor of shape [batch_size, k] """ if k > 10: return tf.nn.top_k(x, k) values = [] indices = [] depth = tf.shape(x)[1] for i in xrange(k): values.append(tf.reduce_max(x, 1)) argmax = tf.argmax(x, 1) indices.append(argmax) if i + 1 < k: x += tf.one_hot(argmax, depth, -1e9) return tf.stack(values, axis=1), tf.to_int32(tf.stack(indices, axis=1))
def radial_symmetry(self, d_cutoff, d, atom_numbers): """ Radial Symmetry Function """ embedding = tf.eye(np.max(self.atom_cases) + 1) atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers) Rs = np.linspace(0., self.radial_cutoff, self.radial_length) ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2 Rs = tf.cast(np.reshape(Rs, (1, 1, 1, -1)), tf.float32) ita = tf.cast(np.reshape(ita, (1, 1, 1, -1)), tf.float32) length = ita.get_shape().as_list()[-1] d_cutoff = tf.stack([d_cutoff] * length, axis=3) d = tf.stack([d] * length, axis=3) out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff if self.atomic_number_differentiated: out_tensors = [] for atom_type in self.atom_cases: selected_atoms = tf.expand_dims( tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1), axis=3) out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2)) return tf.concat(out_tensors, axis=2) else: return tf.reduce_sum(out, axis=2)
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.stack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.matmul(diff, inv_cov) all_scores.append( tf.sqrt(tf.matmul( m_left, tf.transpose( diff, perm=[0, 2, 1])))) self._all_scores.append( tf.reshape( tf.concat_v2(all_scores, 1), tf.stack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat_v2(self._all_scores, 0) assignments = tf.concat_v2(self.assignments(), 0) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat_v2( [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)], 1) self._scores = tf.gather_nd(self._all_scores, indices)
def get_filters(R, filter_size, P=None, n_rings=None): """Perform single-frequency DFT on each ring of a polar-resampled patch""" k = filter_size filters = {} N = n_samples(k) from scipy.linalg import dft for m, r in R.iteritems(): rsh = r.get_shape().as_list() # Get the basis matrices weights = get_interpolation_weights(k, m, n_rings=n_rings) DFT = dft(N)[m,:] LPF = np.dot(DFT, weights).T cosine = np.real(LPF).astype(np.float32) sine = np.imag(LPF).astype(np.float32) # Reshape for multiplication with radial profile cosine = tf.constant(cosine) sine = tf.constant(sine) # Project taps on to rotational basis r = tf.reshape(r, tf.stack([rsh[0],rsh[1]*rsh[2]])) ucos = tf.reshape(tf.matmul(cosine, r), tf.stack([k, k, rsh[1], rsh[2]])) usin = tf.reshape(tf.matmul(sine, r), tf.stack([k, k, rsh[1], rsh[2]])) if P is not None: # Rotate basis matrices ucos_ = tf.cos(P[m])*ucos + tf.sin(P[m])*usin usin = -tf.sin(P[m])*ucos + tf.cos(P[m])*usin ucos = ucos_ filters[m] = (ucos, usin) return filters
def _batch_decode(self, box_encodings): """Decodes a batch of box encodings with respect to the anchors. Args: box_encodings: A float32 tensor of shape [batch_size, num_anchors, box_code_size] containing box encodings. Returns: decoded_boxes: A float32 tensor of shape [batch_size, num_anchors, 4] containing the decoded boxes. decoded_keypoints: A float32 tensor of shape [batch_size, num_anchors, num_keypoints, 2] containing the decoded keypoints if present in the input `box_encodings`, None otherwise. """ combined_shape = shape_utils.combined_static_and_dynamic_shape( box_encodings) batch_size = combined_shape[0] tiled_anchor_boxes = tf.tile( tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1]) tiled_anchors_boxlist = box_list.BoxList( tf.reshape(tiled_anchor_boxes, [-1, 4])) decoded_boxes = self._box_coder.decode( tf.reshape(box_encodings, [-1, self._box_coder.code_size]), tiled_anchors_boxlist) decoded_keypoints = None if decoded_boxes.has_field(fields.BoxListFields.keypoints): decoded_keypoints = decoded_boxes.get_field( fields.BoxListFields.keypoints) num_keypoints = decoded_keypoints.get_shape()[1] decoded_keypoints = tf.reshape( decoded_keypoints, tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2])) decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack( [combined_shape[0], combined_shape[1], 4])) return decoded_boxes, decoded_keypoints
def bboxes_crop_or_pad(bboxes, height, width, offset_y, offset_x, target_height, target_width): """Adapt bounding boxes to crop or pad operations. Coordinates are always supposed to be relative to the image. Arguments: bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max]; height, width: Original image dimension; offset_y, offset_x: Offset to apply, negative if cropping, positive if padding; target_height, target_width: Target dimension after cropping / padding. """ with tf.name_scope('bboxes_crop_or_pad'): # Rescale bounding boxes in pixels. scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype) bboxes = bboxes * scale # Add offset. offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype) bboxes = bboxes + offset # Rescale to target dimension. scale = tf.cast(tf.stack([target_height, target_width, target_height, target_width]), bboxes.dtype) bboxes = bboxes / scale return bboxes
def _transform(theta, input_dim, out_size): num_batch = tf.shape(input=input_dim)[0] num_channels = tf.shape(input=input_dim)[3] theta = tf.reshape(theta, (-1, 2, 3)) theta = tf.cast(theta, 'float32') # grid of (x_t, y_t, 1), eq (1) in ref [1] out_height = out_size[0] out_width = out_size[1] grid = _meshgrid(out_height, out_width) grid = tf.expand_dims(grid, 0) grid = tf.reshape(grid, [-1]) grid = tf.tile(grid, tf.stack([num_batch])) grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = tf.matmul(theta, grid) x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) x_s_flat = tf.reshape(x_s, [-1]) y_s_flat = tf.reshape(y_s, [-1]) input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, out_size) output = tf.reshape(input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) return output
def __init__(self, config): self.inputs = [ev.placeholder(config) for ev in config.evidence] exists = [ev.exists(i) for ev, i in zip(config.evidence, self.inputs)] zeros = tf.zeros([config.batch_size, config.latent_size], dtype=tf.float32) # Compute the denominator used for mean and covariance for ev in config.evidence: ev.init_sigma(config) d = [tf.where(exist, tf.tile([1. / tf.square(ev.sigma)], [config.batch_size]), tf.zeros(config.batch_size)) for ev, exist in zip(config.evidence, exists)] d = 1. + tf.reduce_sum(tf.stack(d), axis=0) denom = tf.tile(tf.reshape(d, [-1, 1]), [1, config.latent_size]) # Compute the mean of Psi with tf.variable_scope('mean'): # 1. compute encoding self.encodings = [ev.encode(i, config) for ev, i in zip(config.evidence, self.inputs)] encodings = [encoding / tf.square(ev.sigma) for ev, encoding in zip(config.evidence, self.encodings)] # 2. pick only encodings from valid inputs that exist, otherwise pick zero encoding encodings = [tf.where(exist, enc, zeros) for exist, enc in zip(exists, encodings)] # 3. tile the encodings according to each evidence type encodings = [[enc] * ev.tile for ev, enc in zip(config.evidence, encodings)] encodings = tf.stack(list(chain.from_iterable(encodings))) # 4. compute the mean of non-zero encodings self.psi_mean = tf.reduce_sum(encodings, axis=0) / denom # Compute the covariance of Psi with tf.variable_scope('covariance'): I = tf.ones([config.batch_size, config.latent_size], dtype=tf.float32) self.psi_covariance = I / denom
def hnet_transformation(gt_pts, transformation_coeffcient, name): """ :param gt_pts: :param transformation_coeffcient: :param name: :return: """ with tf.variable_scope(name): # 首先映射原始标签点对 transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1) H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]]) H_shape = tf.constant([9]) H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape) H = tf.reshape(H, shape=[3, 3]) gt_pts = tf.transpose(gt_pts) pts_projects = tf.matmul(H, gt_pts) # 求解最小二乘二阶多项式拟合参数矩阵 Y = tf.transpose(pts_projects[1, :]) X = tf.transpose(pts_projects[0, :]) Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32)) Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1) w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)), tf.transpose(Y_stack)), tf.expand_dims(X, -1)) # 利用二阶多项式参数求解拟合位置 x_preds = tf.matmul(Y_stack, w) preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1)) preds_fit = tf.stack([tf.squeeze(x_preds, -1), Y], axis=1) x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds) return x_transformation_back
def _transform(theta, input_dim, out_size, z_near, z_far): with tf.variable_scope('_transform'): num_batch = input_dim.get_shape().as_list()[0] num_channels = input_dim.get_shape().as_list()[4] theta = tf.reshape(theta, (-1, 4, 4)) theta = tf.cast(theta, 'float32') out_depth = out_size[0] out_height = out_size[1] out_width = out_size[2] grid = _meshgrid(out_depth, out_height, out_width, z_near, z_far) grid = tf.expand_dims(grid, 0) grid = tf.reshape(grid, [-1]) grid = tf.tile(grid, tf.stack([num_batch])) grid = tf.reshape(grid, tf.stack([num_batch, 4, -1])) # Transform A x (x_t', y_t', 1, d_t)^T -> (x_s, y_s, z_s, 1). t_g = tf.matmul(theta, grid) z_s = tf.slice(t_g, [0, 0, 0], [-1, 1, -1]) y_s = tf.slice(t_g, [0, 1, 0], [-1, 1, -1]) x_s = tf.slice(t_g, [0, 2, 0], [-1, 1, -1]) z_s_flat = tf.reshape(z_s, [-1]) y_s_flat = tf.reshape(y_s, [-1]) x_s_flat = tf.reshape(x_s, [-1]) input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, z_s_flat, out_size) output = tf.reshape( input_transformed, tf.stack([num_batch, out_depth, out_height, out_width, num_channels])) return output
def language_model(input, vocab_size): """Form p(x[0], ..., x[timesteps - 1]), \prod_{t=0}^{timesteps - 1} p(x[t] | x[:t]), To calculate the probability, we call log_prob on x = [x[0], ..., x[timesteps - 1]] given `input` = [0, x[0], ..., x[timesteps - 2]]. We implement this separately from the generative model so the forward pass, e.g., embedding/dense layers, can be parallelized. [batch_size, timesteps] -> [batch_size, timesteps] """ x = tf.one_hot(input, depth=vocab_size, dtype=tf.float32) h = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0) c = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0) hs = [] reuse = None for t in range(FLAGS.timesteps): if t > 0: reuse = True xt = x[:, t, :] h, c = lstm_cell(xt, h, c, name="lstm", reuse=reuse) hs.append(h) h = tf.stack(hs, 1) logits = tf.layers.dense(h, vocab_size, name="dense") output = Categorical(logits=logits) return output
def hnet_loss(gt_pts, transformation_coeffcient, name): """ :param gt_pts: 原始的标签点对 [x, y, 1] :param transformation_coeffcient: 映射矩阵参数(6参数矩阵) [[a, b, c], [0, d, e], [0, f, 1]] :param name: :return: """ with tf.variable_scope(name): # 首先映射原始标签点对 transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1) H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]]) H_shape = tf.constant([9]) H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape) H = tf.reshape(H, shape=[3, 3]) gt_pts = tf.transpose(gt_pts) pts_projects = tf.matmul(H, gt_pts) # 求解最小二乘二阶多项式拟合参数矩阵 Y = tf.transpose(pts_projects[1, :]) X = tf.transpose(pts_projects[0, :]) Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32)) Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1) w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)), tf.transpose(Y_stack)), tf.expand_dims(X, -1)) # 利用二阶多项式参数求解拟合位置并反算到原始投影空间计算损失 x_preds = tf.matmul(Y_stack, w) preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1)) x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds) loss = tf.reduce_mean(tf.pow(gt_pts[0, :] - x_transformation_back[0, :], 2)) return loss
def testConst(self): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape).astype(np.float32) # Pack back into a single tensorflow tensor directly using np array c = tf.pack(data) # This is implemented via a Const: self.assertEqual(c.op.type, "Const") self.assertAllEqual(c.eval(), data) # Python lists also work for 1-D case: if len(shape) == 1: data_list = list(data) cl = tf.pack(data_list) self.assertEqual(cl.op.type, "Const") self.assertAllEqual(cl.eval(), data) cl = tf.stack(data_list) self.assertEqual(cl.op.type, "Const") self.assertAllEqual(cl.eval(), data) # Verify that shape induction works with shapes produced via const pack a = tf.constant([1, 2, 3, 4, 5, 6]) b = tf.reshape(a, tf.pack([2, 3])) self.assertAllEqual(b.get_shape(), [2, 3]) b = tf.reshape(a, tf.stack([2, 3])) self.assertAllEqual(b.get_shape(), [2, 3])
def getImage(filenames): # convert filenames to a queue for an input pipeline. filenameQ = tf.train.string_input_producer(filenames,num_epochs=None) # object to read records recordReader = tf.TFRecordReader() # read the full set of features for a single example key, fullExample = recordReader.read(filenameQ) # parse the full example into its' component features. features = tf.parse_single_example( fullExample, features={ 'image/height': tf.FixedLenFeature([], tf.int64), 'image/width': tf.FixedLenFeature([], tf.int64), 'image/depth': tf.FixedLenFeature([], tf.int64), 'image/class/label': tf.FixedLenFeature([],tf.int64), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string,default_value=''), 'image/filename': tf.FixedLenFeature([], dtype=tf.string,default_value=''), 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='') }) label = features['image/class/label'] image_buffer = features['image/encoded'] image = tf.decode_raw(image_buffer, tf.float32) image = tf.reshape(image, tf.stack([FLAGS.width*FLAGS.height*FLAGS.depth])) label=tf.stack(tf.one_hot(label-1, nLabel)) return label, image
def when_singular(): center = min_ bucket_starts = tf.stack([center - 0.5]) bucket_ends = tf.stack([center + 0.5]) bucket_counts = tf.stack([tf.cast(tf.size(data), tf.float64)]) return tf.transpose( tf.stack([bucket_starts, bucket_ends, bucket_counts]))
def objective(self, x): ''' Returns scalar to maximize ''' encoder = NN(self.encoder_net, self.encoder_act_func, self.batch_size) decoder = BNN(self.decoder_net, self.decoder_act_func, self.batch_size) log_px_list = [] log_pz_list = [] log_qz_list = [] log_pW_list = [] log_qW_list = [] for W_i in range(self.n_W_particles): # Sample decoder weights __, [1], [1] W, log_pW, log_qW = decoder.sample_weights() # Sample z [P,B,Z], [P,B], [P,B] z, log_pz, log_qz = self.sample_z(x, encoder, decoder, W) # z: [PB,Z] z = tf.reshape(z, [self.n_z_particles*self.batch_size, self.z_size]) # Decode [PB,X] y = decoder.feedforward(W, z) # y: [P,B,X] y = tf.reshape(y, [self.n_z_particles, self.batch_size, self.x_size]) # Likelihood p(x|z) [P,B] log_px = log_bern(x,y) #Store for later log_px_list.append(log_px) log_pz_list.append(log_pz) log_qz_list.append(log_qz) log_pW_list.append(log_pW) log_qW_list.append(log_qW) log_px = tf.stack(log_px_list) #[S,P,B] log_pz = tf.stack(log_pz_list) #[S,P,B] log_qz = tf.stack(log_qz_list) #[S,P,B] log_pW = tf.stack(log_pW_list) #[S] log_qW = tf.stack(log_qW_list) #[S] # Calculte log probs for printing self.log_px = tf.reduce_mean(log_px) self.log_pz = tf.reduce_mean(log_pz) self.log_qz = tf.reduce_mean(log_qz) self.log_pW = tf.reduce_mean(log_pW) self.log_qW = tf.reduce_mean(log_qW) self.z_elbo = self.log_px + self.log_pz - self.log_qz #Calc elbo elbo = self.log_px + self.log_pz - self.log_qz + self.batch_frac*(self.log_pW - self.log_qW) return elbo
def mtrx2vecBatch(pMtrxBatch,opt): with tf.name_scope("mtrx2vec"): if opt.warpType=="translation": [row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1) [e00,e01,e02] = tf.unstack(row0,axis=1) [e10,e11,e12] = tf.unstack(row1,axis=1) [e20,e21,e22] = tf.unstack(row2,axis=1) pBatch = tf.stack([e02,e12],axis=1) elif opt.warpType=="similarity": [row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1) [e00,e01,e02] = tf.unstack(row0,axis=1) [e10,e11,e12] = tf.unstack(row1,axis=1) [e20,e21,e22] = tf.unstack(row2,axis=1) pBatch = tf.stack([e00-1,e10,e02,e12],axis=1) elif opt.warpType=="affine": [row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1) [e00,e01,e02] = tf.unstack(row0,axis=1) [e10,e11,e12] = tf.unstack(row1,axis=1) [e20,e21,e22] = tf.unstack(row2,axis=1) pBatch = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1) elif opt.warpType=="homography": pMtrxBatch = pMtrxBatch/pMtrxBatch[:,2:3,2:3] [row0,row1,row2] = tf.unstack(pMtrxBatch,axis=1) [e00,e01,e02] = tf.unstack(row0,axis=1) [e10,e11,e12] = tf.unstack(row1,axis=1) [e20,e21,e22] = tf.unstack(row2,axis=1) pBatch = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1) return pBatch
def tile_anchors(grid_height, grid_width, scales, aspect_ratios, base_anchor_size, anchor_stride, anchor_offset): """Create a tiled set of anchors strided along a grid in image space. This op creates a set of anchor boxes by placing a "basis" collection of boxes with user-specified scales and aspect ratios centered at evenly distributed points along a grid. The basis collection is specified via the scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2] and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2 and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before placing it over its respective center. Grid points are specified via grid_height, grid_width parameters as well as the anchor_stride and anchor_offset parameters. Args: grid_height: size of the grid in the y direction (int or int scalar tensor) grid_width: size of the grid in the x direction (int or int scalar tensor) scales: a 1-d (float) tensor representing the scale of each box in the basis set. aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each box in the basis set. The length of the scales and aspect_ratios tensors must be equal. base_anchor_size: base anchor size as [height, width] (float tensor of shape [2]) anchor_stride: difference in centers between base anchors for adjacent grid positions (float tensor of shape [2]) anchor_offset: center of the anchor with scale and aspect ratio 1 for the upper left element of the grid, this should be zero for feature networks with only VALID padding and even receptive field size, but may need some additional calculation if other padding is used (float tensor of shape [2]) Returns: a BoxList holding a collection of N anchor boxes """ ratio_sqrts = tf.sqrt(aspect_ratios) heights = scales / ratio_sqrts * base_anchor_size[0] widths = scales * ratio_sqrts * base_anchor_size[1] # Get a grid of box centers y_centers = tf.to_float(tf.range(grid_height)) y_centers = y_centers * anchor_stride[0] + anchor_offset[0] x_centers = tf.to_float(tf.range(grid_width)) x_centers = x_centers * anchor_stride[1] + anchor_offset[1] x_centers, y_centers = ops.meshgrid(x_centers, y_centers) widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers) heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers) bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3) bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3) bbox_centers = tf.reshape(bbox_centers, [-1, 2]) bbox_sizes = tf.reshape(bbox_sizes, [-1, 2]) bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes) return box_list.BoxList(bbox_corners)
def collapse_mixture_of_tastes(tastes_predictions, tastes_attentions): """ Collapses a list of prediction nodes in to a single prediction node. :param tastes_predictions: :param tastes_attentions: :return: """ stacked_predictions = tf.stack(tastes_predictions) # If there is attention, the attentions are used to weight each prediction if tastes_attentions is not None: # Stack the attentions and perform softmax across the tastes stacked_attentions = tf.stack(tastes_attentions) softmax_attentions = tf.nn.softmax(stacked_attentions, axis=0) # The softmax'd attentions serve as weights for the taste predictiones weighted_predictions = tf.multiply(stacked_predictions, softmax_attentions) result_prediction = tf.reduce_sum(weighted_predictions, axis=0) # If there is no attention, the max prediction is returned else: result_prediction = tf.reduce_max(stacked_predictions, axis=0) return result_prediction
def vec2mtrxBatch(pBatch,opt): with tf.name_scope("vec2mtrx"): batchSize = tf.shape(pBatch)[0] O = tf.zeros([batchSize]) I = tf.ones([batchSize]) if opt.warpType=="translation": tx,ty = tf.unstack(pBatch,axis=1) pMtrxBatch = tf.transpose(tf.stack([[I,O,tx], [O,I,ty], [O,O,I]]),perm=[2,0,1]) elif opt.warpType=="similarity": pc,ps,tx,ty = tf.unstack(pBatch,axis=1) pMtrxBatch = tf.transpose(tf.stack([[I+pc,-ps,tx], [ps,I+pc,ty], [O,O,I]]),perm=[2,0,1]) elif opt.warpType=="affine": p1,p2,p3,p4,p5,p6 = tf.unstack(pBatch,axis=1) pMtrxBatch = tf.transpose(tf.stack([[I+p1,p2,p3], [p4,I+p5,p6], [O,O,I]]),perm=[2,0,1]) elif opt.warpType=="homography": p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(pBatch,axis=1) pMtrxBatch = tf.transpose(tf.stack([[I+p1,p2,p3], [p4,I+p5,p6], [p7,p8,I]]),perm=[2,0,1]) return pMtrxBatch
def K(self, X, X2=None, presliced=False): if X2 is None: d = tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance)) return tf.matrix_diag(d) else: shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]]) return tf.zeros(shape, settings.float_type)
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness): """Creates the operations to apply the specified distortions. During training it can help to improve the results if we run the images through simple distortions like crops, scales, and flips. These reflect the kind of variations we expect in the real world, and so can help train the model to cope with natural data more effectively. Here we take the supplied parameters and construct a network of operations to apply them to an image. Cropping ~~~~~~~~ Cropping is done by placing a bounding box at a random position in the full image. The cropping parameter controls the size of that box relative to the input image. If it's zero, then the box is the same size as the input and no cropping is performed. If the value is 50%, then the crop box will be half the width and height of the input. In a diagram it looks like this: < width > +---------------------+ | | | width - crop% | | < > | | +------+ | | | | | | | | | | | | | | +------+ | | | | | +---------------------+ Scaling ~~~~~~~ Scaling is a lot like cropping, except that the bounding box is always centered and its size varies randomly within the given range. For example if the scale percentage is zero, then the bounding box is the same size as the input and no scaling is applied. If it's 50%, then the bounding box will be in a random range between half the width and height and full size. Args: flip_left_right: Boolean whether to randomly mirror images horizontally. random_crop: Integer percentage setting the total margin used around the crop box. random_scale: Integer percentage of how much to vary the scale by. random_brightness: Integer range to randomly multiply the pixel values by. graph. Returns: The jpeg input layer and the distorted result tensor. """ jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH) decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) margin_scale = 1.0 + (random_crop / 100.0) resize_scale = 1.0 + (random_scale / 100.0) margin_scale_value = tf.constant(margin_scale) resize_scale_value = tf.random_uniform(tensor_shape.scalar(), minval=1.0, maxval=resize_scale) scale_value = tf.multiply(margin_scale_value, resize_scale_value) precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH) precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT) precrop_shape = tf.stack([precrop_height, precrop_width]) precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) precropped_image = tf.image.resize_bilinear(decoded_image_4d, precrop_shape_as_int) precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) cropped_image = tf.random_crop( precropped_image_3d, [MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH, MODEL_INPUT_DEPTH]) if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image brightness_min = 1.0 - (random_brightness / 100.0) brightness_max = 1.0 + (random_brightness / 100.0) brightness_value = tf.random_uniform(tensor_shape.scalar(), minval=brightness_min, maxval=brightness_max) brightened_image = tf.multiply(flipped_image, brightness_value) distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') return jpeg_data, distort_result
def load_and_preprocess_from_path_label(path, x, y): image = load_and_preprocess_image(path) image = tf.image.resize(image, [shape, shape]) return image, tf.stack([tf.to_float(x)/width, tf.to_float(y)/height])
def load_and_preprocess_from_path_label_translate(path, x, y): image = load_and_preprocess_image(path) image, translations = translate(image) image = tf.image.resize(image, [shape, shape]) return image, tf.stack([tf.to_float((x+translations[0]))/width, tf.to_float((y+translations[1]))/height])
optimizer = tf.train.AdamOptimizer(learning_rate=alpha) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) loss_arr.append(loss) train_op_arr.append(train_op) # functions for evaluating the network correct_pred = tf.equal(predictions["classes"], ph["y"]) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) accuracy_arr.append(accuracy) # combine all of the models together for the ensemble all_preds = tf.stack([x["probabilities"] for x in predictions_arr]) all_preds = tf.transpose(all_preds, [1,2,0]) model_preds = tf.transpose(all_preds, [0, 2, 1]) model_top_10_values, model_top_10_indices = tf.nn.top_k(model_preds, k=10) model_preds = tf.argmax(model_preds, axis=2, output_type=tf.int32) if aggregate_method == 'average': # average over softmaxes test_prob = tf.reduce_mean(all_preds, axis = 2) test_class = tf.argmax(test_prob, axis=1, output_type=tf.int32) elif aggregate_method == 'most_common': test_prob = tf.argmax(all_preds, axis=1, output_type=tf.int32) test_class = tf.argmax(tf.bincount(test_prob_max), output_type=tf.int32)
def build(self, n_hidden, update_next=False): with open(self.logfile, 'w') as file: file.write("Build file with Parameters:\n") file.write("[rnn_units: {}; update_next: {};]\n".format( n_hidden, update_next)) #graph build self.outputs = tf.placeholder( tf.int32, [None, None], name='output_layer') #batch_size * time_steps self.inputs = tf.placeholder(tf.int32, [None, None], name='input_layer') self.target_len = tf.placeholder(tf.int32, [None], name='target_length') n_time_step = tf.reduce_max(self.target_len) # task-specific embedding #print_op = tf.print("tensors:", {'outputs': outputs},output_stream=sys.stdout) #with tf.control_dependencies([print_op]): #embedding_mat = tf.Variable(tf.truncated_normal([self.n_voca,n_embedding]), name="task-specific-embedding-mat") #embedding_vec = tf.fill(tf.stack([tf.shape(inputs)[0],max_len, embed_size]),0.0) # one-hot encoding onehot_enc = tf.one_hot(self.inputs, self.n_voca, axis=-1) rnnCell = tf.contrib.rnn.BasicRNNCell(n_hidden) '''hidden_output is the output vectors of lstm cells [batch_size * time_steps * rnn_units]''' hidden_output, _ = tf.nn.dynamic_rnn(rnnCell, onehot_enc, dtype=tf.float32) self.hidden_output = hidden_output '''flatten it, but when update, choose to use the target one or the entire sequence to update the network''' hidden_output = tf.reshape( hidden_output, [-1, n_hidden] ) # reshape from [batch_size, time_steps, n_hidden] to [batch_size*timesteps, n_hidden] softmax_weight = tf.Variable(tf.truncated_normal( [n_hidden, self.n_voca]), name="softmax_weight") softmax_bias = tf.Variable(tf.constant(0.1, shape=[self.n_voca])) #hidden_output = tf.transpose(hidden_output,[1,0,2]) #batch_size * time_steps * lstm_units --> time_steps * batch_size * lstm_units #target_output = tf.gather(hidden_output, int(hidden_output.get_shape()[0])-1) '''pred is the vectors of all the timesteps, use last one or every step for update''' logits = tf.matmul(hidden_output, softmax_weight) + softmax_bias logits = tf.reshape(logits, [-1, n_time_step, self.n_voca]) self.logits = logits #assert logits.get_shape().as_list() == [None,None,self.n_voca] #targets = tf.one_hot(outputs,self.n_voca,axis=-1) index_offset = tf.fill(tf.stack([tf.shape(self.target_len)[0]]), -1) target_indices = tf.add(self.target_len, index_offset) mask = tf.one_hot(target_indices, n_time_step, dtype=tf.float32) '''mask_for_padding to guarantee update on every next token''' mask_for_padding = tf.sequence_mask(self.target_len, dtype=tf.float32) if update_next: # use every time steps to supervise the model. self.loss = tf.contrib.seq2seq.sequence_loss( logits, self.outputs, mask_for_padding, average_across_timesteps=False, average_across_batch=True) else: self.loss = tf.contrib.seq2seq.sequence_loss( logits, self.outputs, mask, average_across_timesteps=False, average_across_batch=True) self.optimizer = tf.train.AdamOptimizer().minimize(self.loss) # answer pred = tf.argmax(logits, 2) #batch_size*time_step self.pred = pred mask_pred = tf.multiply(tf.cast(pred, tf.float32), mask) self.mask = mask self.mask_pred = mask_pred pred_logits = tf.boolean_mask(logits, mask) pred_logits = tf.reshape(pred_logits, [-1, self.n_voca]) # output probability of target self.softmax_prob = tf.nn.softmax(pred_logits) # answer [prediction] self.last_pred = tf.reduce_sum(mask_pred, 1) mask_output = tf.multiply(tf.cast(self.outputs, tf.float32), mask) # label last_output = tf.reduce_sum(mask_output, 1) correctPred = tf.equal(tf.cast(self.last_pred, tf.int64), tf.cast(last_output, tf.int64)) self.last_acc = tf.reduce_mean(tf.cast(correctPred, tf.float32)) # second last second_target_indices = tf.add(tf.cast(target_indices, tf.int32), index_offset) mask_second_last = tf.one_hot(second_target_indices, n_time_step, dtype=tf.float32) second_mask_pred = tf.multiply(tf.cast(pred, tf.float32), mask_second_last) self.second_last_pred = tf.reduce_sum(second_mask_pred, 1) second_mask_output = tf.multiply(tf.cast(self.outputs, tf.float32), mask_second_last) second_last_output = tf.reduce_sum(second_mask_output, 1) second_correct = tf.equal(tf.cast(self.second_last_pred, tf.int64), tf.cast(second_last_output, tf.int64)) self.second_last_acc = tf.reduce_mean( tf.cast(second_correct, tf.float32)) self.lstm_saver = tf.train.Saver()
def matching(self, gt_bbox, gt_labels, num_crowd=None, threshold_pos=0.5, threshold_neg=0.4, threshold_crowd=0.7): """ :param gt_bbox: :param gt_labels: :return: Args: num_crowd: threshold_pos: threshold_neg: threshold_crowd: pos_iou_threshold: num_crowd: neg_iou_threshold: """ if num_crowd > 0: # split the gt_bbox gt_bbox = gt_bbox[:-num_crowd] crowd_gt_bbox = gt_bbox[-num_crowd:] else: crowd_gt_bbox = tf.zeros_like(gt_bbox) # Matching only for non-crowd annotation # -------------------------------------------------------------------------------------------------------------- num_gt = tf.shape(gt_bbox)[0] # tf.print("num gt", num_gt) # pairwise IoU pairwise_iou = self._pairwise_iou(gt_bbox=gt_bbox, is_crowd=False) # assign the max overlap gt index for each anchor max_iou_for_anchors = tf.reduce_max(pairwise_iou, axis=-1) max_id_for_anchors = tf.math.argmax(pairwise_iou, axis=-1) # force the anchors which is the best matched of each gt to predict the correspond gt forced_update_id = tf.cast(tf.range(0, num_gt), tf.int64) # force the iou over threshold for not wasting any training data forced_update_iou = tf.reduce_max(pairwise_iou, axis=0) forced_update_indice = tf.expand_dims(tf.math.argmax(pairwise_iou, axis=0), axis=-1) max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, forced_update_indice, forced_update_iou) max_id_for_anchors = tf.tensor_scatter_nd_update(max_id_for_anchors, forced_update_indice, forced_update_id) # decide the anchors to be positive or negative based on the IoU and given threshold pos_iou = tf.where(max_iou_for_anchors > threshold_pos) max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, pos_iou, tf.ones(tf.size(pos_iou))) neg_iou = tf.where(max_iou_for_anchors < threshold_neg) max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, neg_iou, tf.zeros(tf.size(neg_iou))) neu_iou = tf.where( tf.math.logical_and((max_iou_for_anchors <= threshold_pos), max_iou_for_anchors >= threshold_neg)) max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, neu_iou, -1 * tf.ones(tf.size(neu_iou))) # deal with crowd annotations, only affect non-positive # -------------------------------------------------------------------------------------------------------------- if num_crowd > 0 and threshold_crowd < 1: # crowd pairwise IoU crowd_pairwise_iou = self._pairwise_iou(gt_bbox=crowd_gt_bbox, is_crowd=True) # assign the max overlap gt index for each anchor crowd_max_iou_for_anchors = tf.reduce_max(crowd_pairwise_iou, axis=-1) # assign neutral for those neg iou that over crowd threshold crowd_neu_iou = tf.where( tf.math.logical_and((max_iou_for_anchors <= 0), crowd_max_iou_for_anchors > threshold_crowd)) # reassigh from negative to neutral max_iou_for_anchors = tf.tensor_scatter_nd_update(max_iou_for_anchors, crowd_neu_iou, -1 * tf.ones(tf.size(crowd_neu_iou))) match_positiveness = max_iou_for_anchors # create class target # map idx to label[idx] # match_labels = tf.map_fn(lambda x: gt_labels[x], max_id_for_anchors) match_labels = tf.gather(gt_labels, max_id_for_anchors) """ element-wise multiplication of label[idx] and positiveness: 1. positive sample will have correct label 2. negative sample will have 0 * label[idx] = 0 3. neural sample will have -1 * label[idx] = -1 * label[idx] it can be useful to distinguish positive sample during loss calculation """ target_cls = tf.multiply(tf.cast(match_labels, tf.float32), match_positiveness) # create loc target # map_loc = tf.map_fn(lambda x: gt_bbox[x], max_id_for_anchors, dtype=tf.float32) map_loc = tf.gather(gt_bbox, max_id_for_anchors) # convert to center form [cx, cy, w, h] # center_anchors = tf.map_fn(lambda x: map_to_center_form(x), self.anchors) h = self.anchors[:, 2] - self.anchors[:, 0] w = self.anchors[:, 3] - self.anchors[:, 1] center_anchors = tf.stack([self.anchors[:, 1] + (w / 2), self.anchors[:, 0] + (h / 2), w, h], axis=-1) # center_gt = tf.map_fn(lambda x: map_to_center_form(x), map_loc) h = map_loc[:, 2] - map_loc[:, 0] w = map_loc[:, 3] - map_loc[:, 1] center_gt = tf.stack([map_loc[:, 1] + (w / 2), map_loc[:, 0] + (h / 2), w, h], axis=-1) variances = [0.1, 0.2] # calculate offset # target_loc = tf.map_fn(lambda x: map_to_offset(x), tf.stack([center_gt, center_anchors], axis=-1)) g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2] / variances[0] g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3] / variances[0] tf.debugging.assert_non_negative(center_anchors[:, 2] / center_gt[:, 2]) tf.debugging.assert_non_negative(center_anchors[:, 3] / center_gt[:, 3]) g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2]) / variances[1] g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3]) / variances[1] target_loc = tf.stack([g_hat_cx, g_hat_cy, g_hat_w, g_hat_h], axis=-1) return target_cls, target_loc, max_id_for_anchors, match_positiveness
def call(self, inputs): h = inputs[0] x = inputs[1] aug_x = tf.stack([1-x, x], axis=1) emission = tf.stack([tf.nn.softmax(self.emission_weight_raw[i,:]) for i in range(0, self.cardinality)], axis=0) return get_normalized_pr(h * tf.matmul(aug_x, tf.transpose(emission)))
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if Config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif Config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + Config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) U = tf.get_variable( 'U', shape=(Config.hidden_size, Config.n_classes), initializer=tf.contrib.layers.xavier_initializer(seed=1)) b2 = tf.get_variable( 'b2', shape=(Config.n_classes), initializer=tf.contrib.layers.xavier_initializer(seed=2)) h = tf.zeros(shape=(tf.shape(x)[0], Config.hidden_size)) ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) if time_step > 0: tf.get_variable_scope().reuse_variables() output, h = cell(x[:, time_step, :], h) output = tf.nn.dropout(output, self.dropout_placeholder) output = tf.matmul(output, U) + b2 preds.append(output) ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) preds = tf.stack(preds) print preds.shape preds = tf.transpose(preds, perm=[1, 0, 2]) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, Config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, Config.n_classes], preds.get_shape().as_list()) return preds
def conv_slim_capsule(input_tensor, input_dim, output_dim, layer_name, input_atoms=8, output_atoms=8, stride=2, kernel_size=5, padding='SAME', **routing_args): """Builds a slim convolutional capsule layer. This layer performs 2D convolution given 5D input tensor of shape `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines the votes with routing and applies Squash non linearity for each capsule. Each capsule in this layer is a convolutional unit and shares its kernel over the position grid and different capsules of layer below. Therefore, number of trainable variables in this layer is: kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms] bias: [output_dim, output_atoms] Output of a conv2d layer is a single capsule with channel number of atoms. Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer with num_routing=1, input_dim=1 and input_atoms=conv_channels. Args: input_tensor: tensor, of rank 5. Last two dimmensions representing height and width position grid. input_dim: scalar, number of capsules in the layer below. output_dim: scalar, number of capsules in this layer. layer_name: string, Name of this layer. input_atoms: scalar, number of units in each capsule of input layer. output_atoms: scalar, number of units in each capsule of output layer. stride: scalar, stride of the convolutional kernel. kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size]. padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels. **routing_args: dictionary {leaky, num_routing}, args to be passed to the update_routing function. Returns: Tensor of activations for this layer of shape `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is 'SAME', out_height = in_height and out_width = in_width. Otherwise, height and width is adjusted with same rules as 'VALID' in tf.nn.conv2d. """ with tf.variable_scope(layer_name): kernel = variables.weight_variable(shape=[ kernel_size, kernel_size, input_atoms, output_dim * output_atoms ]) biases = variables.bias_variable([output_dim, output_atoms, 1, 1]) votes, votes_shape, input_shape = _depthwise_conv3d( input_tensor, kernel, input_dim, output_dim, input_atoms, output_atoms, stride, padding) with tf.name_scope('routing'): logit_shape = tf.stack([ input_shape[0], input_dim, output_dim, votes_shape[2], votes_shape[3] ]) biases_replicated = tf.tile(biases, [1, 1, votes_shape[2], votes_shape[3]]) activations = _update_routing(votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=6, input_dim=input_dim, output_dim=output_dim, **routing_args) return activations
def conv2d_transpose(inputs, num_output_channels, kernel_size, scope, stride=[1, 1], padding='SAME', use_xavier=False, stddev=1e-3, weight_decay=0.0, activation_fn=tf.nn.relu, bn=False, bn_decay=None, is_training=None): """ 2D convolution transpose with non-linear operation. Args: inputs: 4-D tensor variable BxHxWxC num_output_channels: int kernel_size: a list of 2 ints scope: string stride: a list of 2 ints padding: 'SAME' or 'VALID' use_xavier: bool, use xavier_initializer if true stddev: float, stddev for truncated_normal init weight_decay: float activation_fn: function bn: bool, whether to use batch norm bn_decay: float or float tensor variable in [0,1] is_training: bool Tensor variable Returns: Variable tensor Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a """ with tf.variable_scope(scope) as sc: kernel_h, kernel_w = kernel_size num_in_channels = inputs.get_shape()[-1].value kernel_shape = [kernel_h, kernel_w, num_output_channels, num_in_channels] # reversed to conv2d kernel = _variable_with_weight_decay('weights', shape=kernel_shape, use_xavier=use_xavier, stddev=stddev, wd=weight_decay) stride_h, stride_w = stride # from slim.convolution2d_transpose def get_deconv_dim(dim_size, stride_size, kernel_size, padding): dim_size *= stride_size if padding == 'VALID' and dim_size is not None: dim_size += max(kernel_size - stride_size, 0) return dim_size # caculate output shape batch_size = tf.shape(inputs)[0] height = tf.shape(inputs)[1] width = tf.shape(inputs)[2] out_height = get_deconv_dim(height, stride_h, kernel_h, padding) out_width = get_deconv_dim(width, stride_w, kernel_w, padding) output_shape = tf.stack([batch_size, out_height, out_width, num_output_channels], axis=0) outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape, [1, stride_h, stride_w, 1], padding=padding) biases = _variable_on_cpu('biases', [num_output_channels], tf.constant_initializer(0.0)) outputs = tf.nn.bias_add(outputs, biases) if bn: # outputs = batch_norm_for_conv2d(outputs, is_training, # bn_decay=bn_decay, scope='bn') outputs = tf.layers.batch_normalization(outputs, momentum=0.99, epsilon=1e-6, training=is_training) if activation_fn is not None: # outputs = activation_fn(outputs) outputs = tf.nn.leaky_relu(outputs, alpha=0.2) return outputs
def __init__(self, args): ''' 모델 초기화 :param args: 하이퍼 파라미터가 저장된 dict ''' self.is_train = args["is_train"] self.batch_size = args["batch_size"] self.keep_pob = args["keep_prob"] self.dropout_prob = 1.0 - self.keep_pob self.learning_rate = args["learning_rate"] self.relation_vocab_size = args["relation_vocab_size"] self.entity_vocab_size = args["entity_vocab_size"] self.entity_type_emb_size = args["entity_type_emb_size"] self.char_vocab_size = args["char_vocab_size"] self.char_emb_size = args["char_emb_size"] self.max_sentences = args["max_sentences"] self.word_maxlen = args["word_maxlen"] self.word_emb_table = args["embedding_table"] self.word_emb_size = args["word_emb_size"] self.filter_size = args["filter_size"] self.num_filter = args["num_filter"] self.max_entities = args["max_entities"] self.entity_max_tokens = args["entity_max_tokens"] self.entity_max_chars = args["entity_max_chars"] # 인코더, 디코더 파라미터 self.encoder_stack = args["encoder_stack"] self.encoder_max_step = args["encoder_max_step"] self.encoder_hidden = args["encoder_hidden"] self.decoder_hidden = args["decoder_hidden"] self.global_step = tf.get_variable( 'global_step', shape=[], dtype='int32', initializer=tf.constant_initializer(0), trainable=False) # 모델 입력단 초기화 self._placeholder_init() # 모델과 함께 학습하며 finetune되는 단어 임베딩 테이블 finetune_table = tf.get_variable( name="word_embedding_table_finetuning", initializer=self.word_emb_table, trainable=True, dtype=tf.float32) # 사전 학습 값 그대로 사용할 고정 단어 임베딩 테이블 fix_table = tf.get_variable(name="word_embedding_table_fix", initializer=self.word_emb_table, trainable=False, dtype=tf.float32) # 임의 초기화 문자 임베딩 테이블 char_emb_table = tf.get_variable( "char_emb_table", shape=[self.char_vocab_size, self.char_emb_size], initializer=tf.truncated_normal_initializer(stddev=0.1)) # 임의 초기화 개체 타입 임베딩 테이블 entity_type_emb_table = tf.get_variable( "entity_type_emb_table", shape=[self.entity_vocab_size, self.entity_type_emb_size], initializer=tf.truncated_normal_initializer(stddev=0.1)) # 문장 인덱스 one-hot 임베딩 테이블 sentence_id_emb_table = tf.eye(num_rows=self.max_sentences) # 문장 단어 임베딩 context_embedding = self._context_embedding_layer( fix_table=fix_table, finetune_table=finetune_table, char_emb_table=char_emb_table) # 문장 개체 임베딩 entity_type_embedding = tf.nn.embedding_lookup( entity_type_emb_table, self.context_entity_type) # 문장 인덱스 임베딩 sentence_id_embedding = tf.nn.embedding_lookup(sentence_id_emb_table, self.sentence_id) # entity token, character, type, position, sentence_id embedding entity_embedding = self._entity_pool_embedding( fix_table=fix_table, finetune_table=finetune_table, char_emb_table=char_emb_table, token_entities=self.entity_pool, char_entities=self.char_entity_pool) # 문장에 있는 개체의 임베딩 가져오는 부분 context_entity_emb = [] unstack_entity_pool = tf.unstack(entity_embedding, axis=0) unstack_context_entity_id = tf.unstack(self.context_entity_id, axis=0) for entity_pool, context in zip(unstack_entity_pool, unstack_context_entity_id): context_entity_emb.append( tf.nn.embedding_lookup(entity_pool, context)) context_entity_emb = tf.stack(context_entity_emb, axis=0) # context token, character, entity_type, sentence_id embedding context_embedding = tf.concat([ context_embedding, entity_type_embedding, sentence_id_embedding, context_entity_emb ], axis=-1) # 개체 임베딩, 개체 문장 인덱스 임베딩 entity_pool_type_emb = tf.nn.embedding_lookup(entity_type_emb_table, self.entity_pool_type) entity_pool_sent_emb = tf.nn.embedding_lookup(sentence_id_emb_table, self.entity_sent_id) entity_pool_emb = tf.concat( [entity_embedding, entity_pool_type_emb, entity_pool_sent_emb], axis=-1) # 관계 없는 개체가 포인팅하게 할 none 벡터 none_emb = tf.get_variable(name="none_emb", shape=[self.decoder_hidden], initializer=tf.zeros_initializer) pad_emb = tf.get_variable(name="pad_emb", shape=[self.decoder_hidden], initializer=tf.zeros_initializer) pad_token = tf.expand_dims(tf.stack([pad_emb] * self.batch_size, 0), axis=1, name="pad_token") none_token = tf.expand_dims(tf.stack([none_emb] * self.batch_size, 0), axis=1, name="none_token") # 문장 인코딩 encoder_output, encoder_state = self._biGRU_encoding_layer( encoder_input=context_embedding, encoder_length=self.context_input_length, name="encoder_layer") # 개체 인코딩 및 문장 개체 간 주의 집중 pointing_mem, decoder_state = self._entity_encoding_layer( entity_pool_emb, encoder_output, encoder_state) # 디코더에서 포인팅 할 타겟 self.pointing_target = tf.concat([pad_token, none_token, pointing_mem], axis=1) # 디코더 입력 decoder_input = tf.concat([entity_pool_emb, pointing_mem], axis=-1) # 디코더 레이어 및 train op self._dual_pointer_decoder(decoder_input=decoder_input, decoder_init_state=decoder_state, decoder_hidden=self.decoder_hidden, pointing_memory=self.pointing_target)
def learn(env, benchmark_env, q_func, replay_memory, optimizer, exploration=LinearSchedule(1000000, 0.1), max_timesteps=50000000, batch_size=32, learning_starts=50000, learning_freq=4, target_update_freq=10000, grad_clip=None, log_every_n_steps=100000, mov_avg_size=300, ): assert (learning_starts % target_update_freq) == 0 assert type(env.observation_space) == gym.spaces.Box assert type(env.action_space) == gym.spaces.Discrete input_shape = (replay_memory.history_len, *env.observation_space.shape) n_actions = env.action_space.n benchmark_env = HistoryWrapper(benchmark_env, replay_memory.history_len) # build model session = get_session() obs_t_ph = tf.placeholder(env.observation_space.dtype, [None] + list(input_shape)) act_t_ph = tf.placeholder(tf.int32, [None]) return_ph = tf.placeholder(tf.float32, [None]) qvalues, rnn_state_tf = q_func(obs_t_ph, n_actions, scope='q_func') greedy_action = tf.argmax(qvalues, axis=1) action_indices = tf.stack([tf.range(tf.size(act_t_ph)), act_t_ph], axis=-1) onpolicy_qvalues = tf.gather_nd(qvalues, action_indices) td_error = return_ph - onpolicy_qvalues total_error = tf.reduce_mean(tf.square(td_error)) # compute and clip gradients grads_and_vars = optimizer.compute_gradients(total_error, var_list=tf.trainable_variables(scope='q_func')) if grad_clip is not None: grads_and_vars = [(tf.clip_by_value(g, -grad_clip, +grad_clip), v) for g, v in grads_and_vars] train_op = optimizer.apply_gradients(grads_and_vars) def refresh(states, actions): onpolicy_qvals, greedy = session.run([onpolicy_qvalues, greedy_action], feed_dict={ obs_t_ph: states, act_t_ph: actions, }) mask = (actions == greedy) return onpolicy_qvals, mask replay_memory.register_refresh_func(refresh) # initialize variables session.run(tf.global_variables_initializer()) def epsilon_greedy(obs, rnn_state, epsilon): if random.random() < epsilon: action = env.action_space.sample() else: action = session.run(greedy_action, feed_dict={obs_t_ph: obs[None]})[0] return action, None def epsilon_greedy_rnn(obs, rnn_state, epsilon): feed_dict = {obs_t_ph: obs[None]} if rnn_state is not None: feed_dict[q_func.rnn_state] = rnn_state if random.random() < epsilon: action = env.action_space.sample() rnn_state = session.run(rnn_state_tf, feed_dict) else: action, rnn_state = session.run([greedy_action, rnn_state_tf], feed_dict) action = action[0] return action, rnn_state best_mean_reward = -float('inf') obs = env.reset() rnn_state = None n_epochs = 0 policy = epsilon_greedy_rnn if q_func.is_recurrent() else epsilon_greedy rewards = deque(benchmark(benchmark_env, policy, epsilon=1.0, n_episodes=mov_avg_size), maxlen=mov_avg_size) start_time = time.time() for t in itertools.count(): if t % log_every_n_steps == 0: print('Epoch', n_epochs) print('Timestep', t) print('Realtime {:.3f}'.format(time.time() - start_time)) rewards.extend(get_episode_rewards(env)) mean_reward = np.mean(rewards) std_reward = np.std(rewards) best_mean_reward = max(mean_reward, best_mean_reward) print('Episodes', len(get_episode_rewards(env))) print('Exploration', exploration.value(t)) print('Mean reward', mean_reward) print('Best mean reward', best_mean_reward) print('Standard dev', std_reward) print(flush=True) n_epochs += 1 if t >= max_timesteps: break replay_memory.store_frame(obs) obs = replay_memory.encode_recent_observation() action, rnn_state = policy(obs, rnn_state, epsilon=exploration.value(t)) obs, reward, done, _ = env.step(action) replay_memory.store_effect(action, reward, done) if done: obs = env.reset() rnn_state = None if t >= learning_starts: if t % target_update_freq == 0: replay_memory.refresh() if t % learning_freq == 0: obs_batch, act_batch, ret_batch = replay_memory.sample(batch_size) session.run(train_op, feed_dict={ obs_t_ph: obs_batch, act_t_ph: act_batch, return_ph: ret_batch, })
# Packing sequences n_steps = 2 n_inputs = 3 n_neurons = 5 reset_graph() X = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2])) basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons) output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype=tf.float32) outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2]) init = tf.global_variables_initializer() X_batch = np.array([ # t = 0 t = 1 [[0, 1, 2], [9, 8, 7]], # instance 1 [[3, 4, 5], [0, 0, 0]], # instance 2 [[6, 7, 8], [6, 5, 4]], # instance 3 [[9, 0, 1], [3, 2, 1]], # instance 4 ]) with tf.Session() as sess: init.run() outputs_val = outputs.eval(feed_dict={X: X_batch})
def build_loss_and_gradients(self, var_list): """Build loss function .. math:: \\text{KL}( p(z \mid x) \| q(z) ) = \mathbb{E}_{p(z \mid x)} [ \log p(z \mid x) - \log q(z; \lambda) ] and stochastic gradients based on importance sampling. The loss function can be estimated as .. math:: \\frac{1}{S} \sum_{s=1}^S [ w_{\\text{norm}}(z^s; \lambda) (\log p(x, z^s) - \log q(z^s; \lambda) ], where for :math:`z^s \sim q(z; \lambda)`, .. math:: w_{\\text{norm}}(z^s; \lambda) = w(z^s; \lambda) / \sum_{s=1}^S w(z^s; \lambda) normalizes the importance weights, :math:`w(z^s; \lambda) = p(x, z^s) / q(z^s; \lambda)`. This provides a gradient, .. math:: - \\frac{1}{S} \sum_{s=1}^S [ w_{\\text{norm}}(z^s; \lambda) \\nabla_{\lambda} \log q(z^s; \lambda) ]. """ p_log_prob = [0.0] * self.n_samples q_log_prob = [0.0] * self.n_samples for s in range(self.n_samples): # Form dictionary in order to replace conditioning on prior or # observed variable with conditioning on a specific value. scope = 'inference_' + str(id(self)) + '/' + str(s) dict_swap = {} for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable): if isinstance(qx, RandomVariable): qx_copy = copy(qx, scope=scope) dict_swap[x] = qx_copy.value() else: dict_swap[x] = qx for z, qz in six.iteritems(self.latent_vars): # Copy q(z) to obtain new set of posterior samples. qz_copy = copy(qz, scope=scope) dict_swap[z] = qz_copy.value() q_log_prob[s] += tf.reduce_sum( qz_copy.log_prob(tf.stop_gradient(dict_swap[z]))) for z in six.iterkeys(self.latent_vars): z_copy = copy(z, dict_swap, scope=scope) p_log_prob[s] += tf.reduce_sum(z_copy.log_prob(dict_swap[z])) for x in six.iterkeys(self.data): if isinstance(x, RandomVariable): x_copy = copy(x, dict_swap, scope=scope) p_log_prob[s] += tf.reduce_sum( x_copy.log_prob(dict_swap[x])) p_log_prob = tf.stack(p_log_prob) q_log_prob = tf.stack(q_log_prob) if self.logging: summary_key = 'summaries_' + str(id(self)) tf.summary.scalar("loss/p_log_prob", tf.reduce_mean(p_log_prob), collections=[summary_key]) tf.summary.scalar("loss/q_log_prob", tf.reduce_mean(q_log_prob), collections=[summary_key]) log_w = p_log_prob - q_log_prob log_w_norm = log_w - tf.reduce_logsumexp(log_w) w_norm = tf.exp(log_w_norm) loss = tf.reduce_mean(w_norm * log_w) grads = tf.gradients( -tf.reduce_mean(q_log_prob * tf.stop_gradient(w_norm)), var_list) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars
def _last_token(x: tf.Tensor, sequence_lengths: tf.Tensor) -> tf.Tensor: last_sequence_index = tf.maximum(0, sequence_lengths - 1) batch_index = tf.range(tf.shape(last_sequence_index)[0]) indices = tf.stack([batch_index, last_sequence_index], axis=1) return tf.gather_nd(x, indices)
def _dual_pointer_decoder(self, decoder_input, decoder_init_state, decoder_hidden, pointing_memory): ''' 듀얼 포인터 네트워크 디코더 및 train operate layer :param decoder_input: 디코더 입력 :param decoder_init_state: 디코더 초기 상태 값, 인코더 최종 state 사용 :param decoder_hidden: 디코더 은닉층 사이즈 :param pointing_memory: 디코더에서 포인팅 할 타겟 :return: ''' with tf.variable_scope("decoder_v3"): init_state = decoder_init_state with tf.variable_scope("object_cell_define"): object_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( decoder_hidden, dropout_keep_prob=self.keep_pob) object_cell_pre_state = init_state with tf.variable_scope("subject_cell_define"): subject_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( decoder_hidden, dropout_keep_prob=self.keep_pob) subject_cell_pre_state = init_state with tf.variable_scope("decoder_input_layer"): decoder_input_per_step = tf.unstack(decoder_input, axis=1) with tf.variable_scope("decoding_triple", reuse=tf.AUTO_REUSE): # 듀얼 포인팅 부분 object_logits = [] relation_logits = [] subject_logits = [] rev_relation_logits = [] for i in range(self.max_entities): input = decoder_input_per_step[i] object_deocder_output, object_state = object_decoder_cell( input, object_cell_pre_state) subject_decoder_output, subject_state = subject_decoder_cell( input, subject_cell_pre_state) object_deocder_output = tf.expand_dims( object_deocder_output, axis=1) subject_decoder_output = tf.expand_dims( subject_decoder_output, axis=1) # 포인팅은 multi-head attention 기반으로 수행 relation_output, object_pointing = self._multi_head_attention( key=pointing_memory, query=object_deocder_output, value=pointing_memory, attention_name="object_pointing") rev_output, subject_pointing = self._multi_head_attention( key=pointing_memory, query=subject_decoder_output, value=pointing_memory, attention_name="subject_pointing") object_pointing = tf.squeeze(object_pointing, axis=1) subject_pointing = tf.squeeze(subject_pointing, axis=1) relation_output = tf.squeeze(relation_output, axis=1) rev_output = tf.squeeze(rev_output, axis=1) relation_logit = tf.layers.dense( relation_output, units=self.relation_vocab_size, activation=tf.nn.leaky_relu, name="relation_label") rev_relation_logit = tf.layers.dense( rev_output, units=self.relation_vocab_size, activation=tf.nn.leaky_relu, name="rev_relation_label") object_logits.append(object_pointing) relation_logits.append(relation_logit) subject_logits.append(subject_pointing) rev_relation_logits.append(rev_relation_logit) object_cell_pre_state = object_state subject_cell_pre_state = subject_state object_logits = tf.stack(object_logits, axis=1) relation_logits = tf.stack(relation_logits, axis=1) subject_logits = tf.stack(subject_logits, axis=1) rev_relation_logits = tf.stack(rev_relation_logits, axis=1) self.object_predicts = tf.argmax(object_logits, axis=-1) self.relation_predicts = tf.argmax(relation_logits, axis=-1) self.subject_predicts = tf.argmax(subject_logits, axis=-1) self.rev_relation_predicts = tf.argmax(rev_relation_logits, axis=-1) with tf.variable_scope("training_layer"): # train operate 부분 self.object_loss = tf.losses.sparse_softmax_cross_entropy( logits=object_logits, labels=self.object_target, weights=self.relation_weight) self.re_loss = tf.losses.sparse_softmax_cross_entropy( logits=relation_logits, labels=self.relation_target, weights=self.relation_weight) self.subject_loss = tf.losses.sparse_softmax_cross_entropy( logits=subject_logits, labels=self.subject_target, weights=self.rev_relation_weight) self.rev_re_loss = tf.losses.sparse_softmax_cross_entropy( logits=rev_relation_logits, labels=self.rev_relation_target, weights=self.rev_relation_weight) self.object_loss = tf.reduce_mean(self.object_loss) self.re_loss = tf.reduce_mean(self.re_loss) self.subject_loss = tf.reduce_mean(self.subject_loss) self.rev_re_loss = tf.reduce_mean(self.rev_re_loss) self.loss = (0.4 * self.object_loss) + (0.4 * self.subject_loss) + ( 0.1 * self.re_loss) + (0.1 * self.rev_re_loss) # Adam optimizer 및 EMA 사용, 학습 parameter tuning _optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) self._gradients = _optimizer.compute_gradients(self.loss) # for g in self._gradients: # print(g) _apply_op = _optimizer.apply_gradients( self._gradients, global_step=self.global_step) _ema = tf.train.ExponentialMovingAverage(decay=0.9999) with tf.control_dependencies([_apply_op]): _ema_op = _ema.apply( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) self.train_op = tf.group(_ema_op) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
def call(self, dec_output, final_output, attention_weights, encoder_input, inp_shape, tar_shape, batch, training): # tf.debugging.assert_non_negative(dec_output, # message='negative_values_in_dec_output' # ) # tf.debugging.assert_greater(dec_output, # tf.cast([0], dtype=tf.float32), # message = 'zeros_in_dec_output') # might contain negative values #dec_output = tf.math.abs(dec_output) #adding a small value to dec_output to maintain numerical stability #dec_output = dec_output+0.0001 # p_gen (batch_size, tar_seq_len, 1) batch = tf.shape(encoder_input)[0] p_gen = self.generator_vec(dec_output) #p_gen += 0.0001 #p_gen = tf.math.abs(p_gen) tf.debugging.check_numerics(final_output, "Nan's in the final_output" ) vocab_dist_ = tf.math.softmax(final_output, axis=-1) # vocab_dist (batch_size, tar_seq_len, target_vocab_size) vocab_dist = p_gen * vocab_dist_ tf.debugging.assert_non_negative(p_gen, message='negative_values_in_p_gen') # catches both zero and negative should be caught above tf.debugging.assert_greater(p_gen, tf.cast([0], dtype=tf.float32), message = 'zeros_in_p_gen') tf.debugging.assert_non_negative(vocab_dist_, message='negative_values_in_vocab_dist_') tf.debugging.assert_greater(vocab_dist_, tf.cast([0], dtype=tf.float32), message = 'zeros_in_vocab_dist_') # attention_dist (batch_size, tar_seq_len, inp_seq_len) # attention_weights is 4D so taking mean of the second dimension(i.e num_heads) attention_weights_ = tf.reduce_mean(attention_weights, axis=1) attention_dist = tf.math.softmax(attention_weights_, axis=-1) tf.debugging.check_numerics(attention_weights, "Nan's in the attention_weights" ) # updates (batch_size, tar_seq_len, inp_seq_len) updates = (1 - p_gen) * attention_dist shape = tf.shape(final_output) # represent the tokens indices in 3D using meshgrid and tile # https://stackoverflow.com/questions/45162998/proper-usage-of-tf-scatter-nd-in-tensorflow-r1-2 i1, i2 = tf.meshgrid(tf.range(batch), tf.range(tar_shape), indexing="ij") i1 = tf.tile(i1[:, :, tf.newaxis], [1, 1, inp_shape]) i2 = tf.tile(i2[:, :, tf.newaxis], [1, 1, inp_shape]) # convert to int32 since they are compatible with scatter_nd indices_ = tf.cast(encoder_input, dtype=tf.int32) #tile on tar_seq_len so that the input vocab can be copied to op indices_x = tf.tile(indices_[:, tf.newaxis,: ], [1, tar_shape, 1]) indices = tf.stack([i1, i2, indices_x], axis=-1) # copy_probs (batch_size, tar_seq_len, target_vocab_size) copy_probs = tf.scatter_nd(indices, updates, shape) #assert copy_probs.shape[1] == tar_shape, 'shape mismatch with the tensors in Generator' combined_probs = vocab_dist + copy_probs combined_probs += 0.001 combined_logits = tf.math.log(combined_probs) tf.debugging.check_numerics( combined_logits, "Nan's in the combined_logits" ) return combined_logits
with tf.name_scope('inputs') as scope: Inp0 = tf.placeholder(tf.int32, [None, None], name='sequence_factors1') Inp1 = tf.placeholder(tf.float32, [None, 4, None], name='sequence_factors2') labels = tf.placeholder(tf.float32, [None, 1], name='labels') dropout = tf.placeholder(tf.float32, name='dropout') with tf.name_scope('embedding') as scope: aa_embeddings = tf.get_variable('aa_embeddings', [20, 5]) embedded_word_ids = tf.gather(aa_embeddings, range(0, 20)) embed0 = tf.nn.embedding_lookup(aa_embeddings, Inp0, name='lookup') embed1 = tf.transpose(embed0, (0, 2, 1)) unstack0 = tf.unstack(Inp1, axis=-2, name='unstack0') unstack1 = tf.unstack(embed1, axis=-2, name='unstack1') layer0 = tf.stack([tf.stack(unstack0 + unstack1, axis=1)], -1, name='stack') with tf.name_scope('layer1') as scope: layer1_norm = batch_normalization(layer0, 'BN_layer0') layer1 = tf.layers.conv2d(layer1_norm, 32, (4, 4), padding='same', activation=tf.nn.relu) layer1_DO = tf.layers.dropout(layer1, rate=dropout, name='Drop1') with tf.name_scope('layer2') as scope: layer2_norm = batch_normalization(layer1_DO, 'BN_layer1') layer2 = tf.layers.conv2d(layer2_norm, 64, (4, 4), padding='same',
def complex2real(x, axis=-1): return tf.stack((tf.math.real(x), tf.math.imag(x)), axis=axis)
def ioi_model(input_x, input_x_mask, input_y, input_y_mask, word_emb, keep_rate, conf): if True: embed_dim = 200 max_turn = conf["max_turn_num"] max_word_len = conf["max_turn_len"] max_word_len_a = input_y.shape[1] num_layer = 7 context = input_x context_mask = tf.to_float(input_x_mask) response = input_y response_mask = tf.to_float(input_y_mask) expand_response_mask = tf.tile(tf.expand_dims(response_mask, 1), [1, max_turn, 1]) expand_response_mask = tf.reshape(expand_response_mask, [-1, max_word_len_a]) parall_context_mask = tf.reshape(context_mask, [-1, max_word_len]) context_embeddings = tf.nn.embedding_lookup(word_emb, context) response_embeddings = tf.nn.embedding_lookup(word_emb, response) context_embeddings = tf.layers.dropout(context_embeddings, rate=1.0-keep_rate) response_embeddings = tf.layers.dropout(response_embeddings, rate=1.0-keep_rate) context_embeddings = tf.multiply(context_embeddings, tf.expand_dims(context_mask, axis=-1)) response_embeddings = tf.multiply(response_embeddings, tf.expand_dims(response_mask, axis=-1)) expand_response_embeddings = tf.tile(tf.expand_dims(response_embeddings, 1), [1, max_turn, 1, 1]) expand_response_embeddings = tf.reshape(expand_response_embeddings, [-1, max_word_len_a, embed_dim]) parall_context_embeddings = tf.reshape(context_embeddings, [-1, max_word_len, embed_dim]) context_rep, response_rep = parall_context_embeddings, expand_response_embeddings losses_list = [] y_pred_list = [] logits_list=[] fea_list = [] for k in range(num_layer): inter_feat_collection = [] with tf.variable_scope('dense_interaction_{}'.format(k)): # get the self rep context_self_rep = self_attention(context_rep, context_rep, embed_dim, query_masks=parall_context_mask, key_masks=parall_context_mask, num_blocks=1, num_heads=1, dropout_rate=1.0-keep_rate, use_residual=True, use_feed=True, scope='context_self_attention')[1] # [batch*turn, len_utt, embed_dim, 2] response_self_rep = self_attention(response_rep, response_rep, embed_dim, query_masks=expand_response_mask, key_masks=expand_response_mask, num_blocks=1, num_heads=1, dropout_rate=1.0-keep_rate, use_residual=True, use_feed=True, scope='response_self_attention')[1] # [batch*turn, len_res, embed_dims, 2] # get the attended rep context_cross_rep = self_attention(context_rep, response_rep, embed_dim, query_masks=parall_context_mask, key_masks=expand_response_mask, num_blocks=1, num_heads=1, dropout_rate=1.0-keep_rate, use_residual=True, use_feed=True, scope='context_cross_attention')[1] # [batch*turn, len_utt, embed_dim] response_cross_rep = self_attention(response_rep, context_rep, embed_dim, query_masks=expand_response_mask, key_masks=parall_context_mask, num_blocks=1, num_heads=1, dropout_rate=1.0-keep_rate, use_residual=True, use_feed=True, scope='response_cross_attention')[1] # [batch*turn, len_res, embed_dim] context_inter_feat_multi = tf.multiply(context_rep, context_cross_rep) response_inter_feat_multi = tf.multiply(response_rep, response_cross_rep) context_concat_rep = tf.concat([context_rep, context_self_rep, context_cross_rep, context_inter_feat_multi], axis=-1) response_concat_rep = tf.concat([response_rep, response_self_rep, response_cross_rep, response_inter_feat_multi], axis=-1) context_concat_dense_rep = tf.layers.dense(context_concat_rep, embed_dim, activation=tf.nn.relu, use_bias=True, name='context_dense1') context_concat_dense_rep = tf.layers.dropout(context_concat_dense_rep, rate=1.0-keep_rate) response_concat_dense_rep = tf.layers.dense(response_concat_rep, embed_dim, activation=tf.nn.relu, use_bias=True, name='response_dense1') response_concat_dense_rep = tf.layers.dropout(response_concat_dense_rep, rate=1.0-keep_rate) inter_feat = tf.matmul(context_rep, tf.transpose(response_rep, perm=[0, 2, 1])) / tf.sqrt(tf.to_float(embed_dim)) inter_feat_self = tf.matmul(context_self_rep, tf.transpose(response_self_rep, perm=[0, 2, 1])) / tf.sqrt(tf.to_float(embed_dim)) inter_feat_cross = tf.matmul(context_cross_rep, tf.transpose(response_cross_rep, perm=[0, 2, 1])) / tf.sqrt(tf.to_float(embed_dim)) inter_feat_collection.append(inter_feat) inter_feat_collection.append(inter_feat_self) inter_feat_collection.append(inter_feat_cross) if k==0: context_rep = tf.add(context_rep, context_concat_dense_rep) response_rep = tf.add(response_rep, response_concat_dense_rep) else: context_rep = tf.add_n([parall_context_embeddings, context_rep, context_concat_dense_rep]) response_rep = tf.add_n([expand_response_embeddings, response_rep, response_concat_dense_rep]) context_rep = normalize(context_rep, scope='layer_context_normalize') response_rep = normalize(response_rep, scope='layer_response_normalize') context_rep = tf.multiply(context_rep, tf.expand_dims(parall_context_mask, axis=-1)) response_rep = tf.multiply(response_rep, tf.expand_dims(expand_response_mask, axis=-1)) matching_feat = tf.stack(inter_feat_collection, axis=-1) #matrix_trans = tf.reshape(matching_feat, [-1, max_turn, max_word_len, max_word_len, len(inter_feat_collection)]) # embed_dim with tf.variable_scope('CRNN_{}'.format(k)): conv1 = tf.layers.conv2d(matching_feat, filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=tf.nn.relu, name='conv1') pool1 = tf.layers.max_pooling2d(conv1, (3, 3), strides=(3, 3), padding='same', name='max_pooling1') conv2 = tf.layers.conv2d(pool1, filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', activation=tf.nn.relu, name='conv2') pool2 = tf.layers.max_pooling2d(conv2, (3, 3), strides=(3, 3), padding='same', name='max_pooling2') flatten = tf.contrib.layers.flatten(pool2) flatten = tf.layers.dropout(flatten, rate=1.0-keep_rate) matching_vector = tf.layers.dense(flatten, embed_dim, kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.tanh, name='dense_feat') matching_vector = tf.reshape(matching_vector, [-1, max_turn, embed_dim]) final_gru_cell = tf.contrib.rnn.GRUCell(embed_dim, kernel_initializer=tf.orthogonal_initializer()) _, last_hidden = tf.nn.dynamic_rnn(final_gru_cell, matching_vector, dtype=tf.float32, scope='final_GRU') # TODO: check time_major fea_list.append(last_hidden) #logits = tf.layers.dense(last_hidden, 2, kernel_initializer=tf.contrib.layers.xavier_initializer(), name='final_v') last_hidden = tf.concat(fea_list, axis=-1) tf.layers.dense(last_hidden, 50, kernel_initializer=tf.contrib.layers.xavier_initializer(), name='final_v') return last_hidden, fea_list if True: loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=logits) loss = tf.reduce_mean(tf.clip_by_value(loss, -FLAGS.clip_value, FLAGS.clip_value)) y_pred = tf.nn.softmax(logits) losses_list.append(loss) y_pred_list.append(y_pred) logits_list.append(logits) if FLAGS.use_loss_decay: loss =sum([((idx+1)/float(FLAGS.num_layer))*item for idx, item in enumerate(losses_list)]) else: loss = sum(losses_list) loss_list = losses_list y_pred = sum(y_pred_list) if FLAGS.use_globalLoss: logits_sum = tf.add_n(logits_list) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=logits_sum) loss = tf.reduce_mean(tf.clip_by_value(loss, -FLAGS.clip_value, FLAGS.clip_value)) loss_list = [loss] y_pred = tf.nn.softmax(logits_sum) correct = tf.equal(tf.cast(tf.argmax(y_pred, axis=1), tf.int32), tf.to_int32(target)) accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
def position_sensitive_crop_regions(image, boxes, crop_size, num_spatial_bins, global_pool): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 3-D tensor of shape `[image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError('crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): if split.shape.is_fully_defined() and box.shape.is_fully_defined(): crop = tf.squeeze( matmul_crop_and_resize( tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0), bin_crop_size), axis=0) else: crop = tf.image.crop_and_resize( tf.expand_dims(split, 0), box, tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keep_dims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [tf.space_to_depth( crop, block_size=block_size) for crop in image_crops] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze( tf.batch_to_space_nd(position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), squeeze_dims=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def _build_network(self, layers): network = tf.transpose(self.input_tensor, [0, 2, 3, 1]) # [batch, assets, window, features] network = network / network[:, :, -1, 0, None, None] for layer_number, layer in enumerate(layers): if layer["type"] == "DenseLayer": network = tflearn.layers.core.fully_connected(network, int(layer["neuron_number"]), layer["activation_function"], regularizer=layer["regularizer"], weight_decay=layer["weight_decay"] ) elif layer["type"] == "DropOut": network = tflearn.layers.core.dropout(network, layer["keep_probability"]) elif layer["type"] == "EIIE_Dense": width = network.get_shape()[2] network = tflearn.layers.conv_2d(network, int(layer["filter_number"]), [1, width], [1, 1], "valid", layer["activation_function"], regularizer=layer["regularizer"], weight_decay=layer["weight_decay"]) elif layer["type"] == "ConvLayer": network = tflearn.layers.conv_2d(network, int(layer["filter_number"]), allint(layer["filter_shape"]), allint(layer["strides"]), layer["padding"], layer["activation_function"], regularizer=layer["regularizer"], weight_decay=layer["weight_decay"]) elif layer["type"] == "MaxPooling": network = tflearn.layers.conv.max_pool_2d(network, layer["strides"]) elif layer["type"] == "AveragePooling": network = tflearn.layers.conv.avg_pool_2d(network, layer["strides"]) elif layer["type"] == "LocalResponseNormalization": network = tflearn.layers.normalization.local_response_normalization(network) elif layer["type"] == "EIIE_Output": width = network.get_shape()[2] network = tflearn.layers.conv_2d(network, 1, [1, width], padding="valid", regularizer=layer["regularizer"], weight_decay=layer["weight_decay"]) network = network[:, :, 0, 0] btc_bias = tf.ones((self.input_num, 1)) network = tf.concat([btc_bias, network], 1) network = tflearn.layers.core.activation(network, activation="softmax") elif layer["type"] == "Output_WithW": network = tflearn.flatten(network) network = tf.concat([network,self.previous_w], axis=1) network = tflearn.fully_connected(network, self._rows+1, activation="softmax", regularizer=layer["regularizer"], weight_decay=layer["weight_decay"]) elif layer["type"] == "EIIE_Output_WithW": width = network.get_shape()[2] height = network.get_shape()[1] features = network.get_shape()[3] network = tf.reshape(network, [self.input_num, int(height), 1, int(width*features)]) w = tf.reshape(self.previous_w, [-1, int(height), 1, 1]) network = tf.concat([network, w], axis=3) network = tflearn.layers.conv_2d(network, 1, [1, 1], padding="valid", regularizer=layer["regularizer"], weight_decay=layer["weight_decay"]) network = network[:, :, 0, 0] btc_bias = tf.zeros((self.input_num, 1)) network = tf.concat([btc_bias, network], 1) self.voting = network network = tflearn.layers.core.activation(network, activation="softmax") elif layer["type"] == "EIIE_LSTM" or\ layer["type"] == "EIIE_RNN": network = tf.transpose(network, [0, 2, 3, 1]) resultlist = [] reuse = False for i in range(self._rows): if i > 0: reuse = True if layer["type"] == "EIIE_LSTM": result = tflearn.layers.lstm(network[:, :, :, i], int(layer["neuron_number"]), dropout=layer["dropouts"], scope="lstm"+str(layer_number), reuse=reuse) else: result = tflearn.layers.simple_rnn(network[:, :, :, i], int(layer["neuron_number"]), dropout=layer["dropouts"], scope="rnn"+str(layer_number), reuse=reuse) resultlist.append(result) network = tf.stack(resultlist) network = tf.transpose(network, [1, 0, 2]) network = tf.reshape(network, [-1, self._rows, 1, int(layer["neuron_number"])]) else: raise ValueError("the layer {} not supported.".format(layer["type"])) return network
def inference_mem(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True): """ infer depth image from multi-view images and cameras """ # dynamic gpu params depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval feature_c = 32 feature_h = FLAGS.max_h / 4 feature_w = FLAGS.max_w / 4 # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=True) ref_feature = ref_tower.get_output() ref_feature2 = tf.square(ref_feature) view_features = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UniNetDS2({'data': view_image}, is_training=True, reuse=True) view_features.append(view_tower.get_output()) view_features = tf.stack(view_features, axis=0) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) view_homographies = tf.stack(view_homographies, axis=0) # build cost volume by differentialble homography with tf.name_scope('cost_volume_homography'): depth_costs = [] for d in range(depth_num): # compute cost (standard deviation feature) ave_feature = tf.Variable( tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]), name='ave', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) ave_feature2 = tf.Variable( tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]), name='ave2', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) ave_feature = tf.assign(ave_feature, ref_feature) ave_feature2 = tf.assign(ave_feature2, ref_feature2) def body(view, ave_feature, ave_feature2): """Loop body.""" homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3]) homography = tf.squeeze(homography, axis=1) warped_view_feature = homography_warping( view_features[view], homography) ave_feature = tf.assign_add(ave_feature, warped_view_feature) ave_feature2 = tf.assign_add(ave_feature2, tf.square(warped_view_feature)) view = tf.add(view, 1) return view, ave_feature, ave_feature2 view = tf.constant(0) cond = lambda view, *_: tf.less(view, FLAGS.view_num - 1) _, ave_feature, ave_feature2 = tf.while_loop( cond, body, [view, ave_feature, ave_feature2], back_prop=False, parallel_iterations=1) ave_feature = tf.assign( ave_feature, tf.square(ave_feature) / (FLAGS.view_num * FLAGS.view_num)) ave_feature2 = tf.assign( ave_feature2, ave_feature2 / FLAGS.view_num - ave_feature) depth_costs.append(ave_feature2) cost_volume = tf.stack(depth_costs, axis=1) # filtered cost volume, size of (B, D, H, W, 1) if is_master_gpu: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=False) else: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=True) filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(), axis=-1) # depth map by softArgmin with tf.name_scope('soft_arg_min'): # probability volume by soft max probability_volume = tf.nn.softmax(tf.scalar_mul( -1, filtered_cost_volume), axis=1, name='prob_volume') # depth image by soft argmin volume_shape = tf.shape(probability_volume) soft_2d = [] for i in range(FLAGS.batch_size): soft_1d = tf.linspace(depth_start[i], depth_end[i], tf.cast(depth_num, tf.int32)) soft_2d.append(soft_1d) soft_2d = tf.reshape(tf.stack(soft_2d, axis=0), [volume_shape[0], volume_shape[1], 1, 1]) soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]]) estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume, axis=1) estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3) # probability map prob_map = get_propability_map(probability_volume, estimated_depth_map, depth_start, depth_interval) filtered_depth_map = tf.cast(tf.greater_equal(prob_map, 0.8), dtype='float32') * estimated_depth_map return filtered_depth_map, prob_map
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = data.dataset.get_split(FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = get_resnet_func(num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### # TODO: preprocessing def random_crop(input, size): return tf.random_crop(input, [size, 5000]) preprocessing_fn = random_crop ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [ix0, ix1, values, shape] = provider.get( ['tweets/ix0', 'tweets/ix1', 'tweets/values', 'tweets/shape']) indices = tf.stack([ix0, ix1], axis=1) tweet = tf.SparseTensor(indices=indices, values=values, dense_shape=shape) tweet = sparse_ops.sparse_tensor_to_dense(tweet, validate_indices=False) [label] = provider.get(['label']) label -= FLAGS.labels_offset train_tweet_size = FLAGS.train_image_size or network_fn.default_image_size tweet = preprocessing_fn(tweet, train_tweet_size) tweets, labels = tf.train.batch( [tweet, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [tweets, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" tweets, labels = batch_queue.dequeue() logits, end_points = network_fn(tweets) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: tf.losses.softmax_cross_entropy( logits=end_points['AuxLogits'], onehot_labels=labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None, session_config=tf.ConfigProto(gpu_options=tf.GPUOptions( visible_device_list='0')))
def _predict_mean(self, Fmu, Fvar): possible_outputs = [tf.fill(tf.stack([tf.shape(Fmu)[0], 1]), np.array(i, dtype=np.int64)) for i in range(self.num_classes)] ps = [self._density(Fmu, Fvar, po) for po in possible_outputs] ps = tf.transpose(tf.stack([tf.reshape(p, (-1,)) for p in ps])) return ps
def inference(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True): """ infer depth image from multi-view images and cameras """ # dynamic gpu params depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=True) view_towers = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UniNetDS2({'data': view_image}, is_training=True, reuse=True) view_towers.append(view_tower) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) # build cost volume by differentialble homography with tf.name_scope('cost_volume_homography'): depth_costs = [] for d in range(depth_num): # compute cost (variation metric) ave_feature = ref_tower.get_output() ave_feature2 = tf.square(ref_tower.get_output()) for view in range(0, FLAGS.view_num - 1): homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3]) homography = tf.squeeze(homography, axis=1) warped_view_feature = homography_warping( view_towers[view].get_output(), homography) ave_feature = ave_feature + warped_view_feature ave_feature2 = ave_feature2 + tf.square(warped_view_feature) ave_feature = ave_feature / FLAGS.view_num ave_feature2 = ave_feature2 / FLAGS.view_num cost = ave_feature2 - tf.square(ave_feature) depth_costs.append(cost) cost_volume = tf.stack(depth_costs, axis=1) # filtered cost volume, size of (B, D, H, W, 1) if is_master_gpu: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=False) else: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=True) filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(), axis=-1) # depth map by softArgmin with tf.name_scope('soft_arg_min'): # probability volume by soft max probability_volume = tf.nn.softmax(tf.scalar_mul( -1, filtered_cost_volume), axis=1, name='prob_volume') # depth image by soft argmin volume_shape = tf.shape(probability_volume) soft_2d = [] for i in range(FLAGS.batch_size): soft_1d = tf.linspace(depth_start[i], depth_end[i], tf.cast(depth_num, tf.int32)) soft_2d.append(soft_1d) soft_2d = tf.reshape(tf.stack(soft_2d, axis=0), [volume_shape[0], volume_shape[1], 1, 1]) soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]]) estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume, axis=1) estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3) # probability map prob_map = get_propability_map(probability_volume, estimated_depth_map, depth_start, depth_interval) return estimated_depth_map, prob_map #, filtered_depth_map, probability_volume
def create_training_operations(self, config): num_actions = sum( util.prod(config.actions[name].shape) for name in sorted(self.action)) # Get hidden layers from network generator, then add NAF outputs, same for target network flat_mean = layers['linear'](x=self.training_network.output, size=num_actions, scope='naf_action_means') n = 0 for name in sorted(self.action): shape = config.actions[name].shape self.action_taken[name] = tf.reshape( tensor=flat_mean[:, n:n + util.prod(shape)], shape=((-1, ) + shape)) n += util.prod(shape) # Advantage computation # Network outputs entries of lower triangular matrix L lower_triangular_size = num_actions * (num_actions + 1) // 2 l_entries = layers['linear'](x=self.training_network.output, size=lower_triangular_size, scope='naf_matrix_entries') l_matrix = tf.exp( x=tf.map_fn(fn=tf.diag, elems=l_entries[:, :num_actions])) if num_actions > 1: offset = num_actions l_columns = list() for zeros, size in enumerate(xrange(num_actions - 1, -1, -1), 1): column = tf.pad(tensor=l_entries[:, offset:offset + size], paddings=((0, 0), (zeros, 0))) l_columns.append(column) offset += size l_matrix += tf.stack(values=l_columns, axis=1) # P = LL^T p_matrix = tf.matmul(a=l_matrix, b=tf.transpose(a=l_matrix, perm=(0, 2, 1))) flat_action = list() for name in sorted(self.action): shape = config.actions[name].shape flat_action.append( tf.reshape(tensor=self.action[name], shape=(-1, util.prod(shape)))) flat_action = tf.concat(values=flat_action, axis=1) difference = flat_action - flat_mean # A = -0.5 (a - mean)P(a - mean) advantage = tf.matmul(a=p_matrix, b=tf.expand_dims(input=difference, axis=2)) advantage = tf.matmul(a=tf.expand_dims(input=difference, axis=1), b=advantage) advantage = tf.squeeze(input=(-advantage / 2.0), axis=2) # Q = A + V # State-value function value = layers['linear'](x=self.training_network.output, size=num_actions) q_value = value + advantage q_values = dict() n = 0 for name in sorted(self.action): shape = (-1, ) + config.actions[name].shape flat_size = util.prod(shape[1:]) q_values[name] = tf.reshape(tensor=q_value[:, n:n + flat_size], shape=shape) n += flat_size return q_values
def get_propability_map(cv, depth_map, depth_start, depth_interval): """ get probability map from cost volume """ def _repeat_(x, num_repeats): """ repeat each element num_repeats times """ x = tf.reshape(x, [-1]) ones = tf.ones((1, num_repeats), dtype='int32') x = tf.reshape(x, shape=(-1, 1)) x = tf.matmul(x, ones) return tf.reshape(x, [-1]) shape = tf.shape(depth_map) batch_size = shape[0] height = shape[1] width = shape[2] depth = tf.shape(cv)[1] # byx coordinate, batched & flattened b_coordinates = tf.range(batch_size) y_coordinates = tf.range(height) x_coordinates = tf.range(width) b_coordinates, y_coordinates, x_coordinates = tf.meshgrid( b_coordinates, y_coordinates, x_coordinates) b_coordinates = _repeat_(b_coordinates, batch_size) y_coordinates = _repeat_(y_coordinates, batch_size) x_coordinates = _repeat_(x_coordinates, batch_size) # d coordinate (floored and ceiled), batched & flattened d_coordinates = tf.reshape((depth_map - depth_start) / depth_interval, [-1]) d_coordinates_left0 = tf.clip_by_value( tf.cast(tf.floor(d_coordinates), 'int32'), 0, depth - 1) d_coordinates_left1 = tf.clip_by_value(d_coordinates_left0 - 1, 0, depth - 1) d_coordinates1_right0 = tf.clip_by_value( tf.cast(tf.ceil(d_coordinates), 'int32'), 0, depth - 1) d_coordinates1_right1 = tf.clip_by_value(d_coordinates1_right0 + 1, 0, depth - 1) # voxel coordinates voxel_coordinates_left0 = tf.stack( [b_coordinates, d_coordinates_left0, y_coordinates, x_coordinates], axis=1) voxel_coordinates_left1 = tf.stack( [b_coordinates, d_coordinates_left1, y_coordinates, x_coordinates], axis=1) voxel_coordinates_right0 = tf.stack( [b_coordinates, d_coordinates1_right0, y_coordinates, x_coordinates], axis=1) voxel_coordinates_right1 = tf.stack( [b_coordinates, d_coordinates1_right1, y_coordinates, x_coordinates], axis=1) # get probability image by gathering and interpolation prob_map_left0 = tf.gather_nd(cv, voxel_coordinates_left0) prob_map_left1 = tf.gather_nd(cv, voxel_coordinates_left1) prob_map_right0 = tf.gather_nd(cv, voxel_coordinates_right0) prob_map_right1 = tf.gather_nd(cv, voxel_coordinates_right1) prob_map = prob_map_left0 + prob_map_left1 + prob_map_right0 + prob_map_right1 prob_map = tf.reshape(prob_map, [batch_size, height, width, 1]) return prob_map
def __init__(self, emb_dim, char_vocab_size, phoneme_vocab_size, seqlen): tf.reset_default_graph() # define placeholders chars = tf.placeholder(tf.int32, [None, seqlen], 'chars') phonemes = tf.placeholder(tf.int32, [None, seqlen], 'phonemes') # expose placeholders self.placeholders = { 'chars' : chars, 'phonemes' : phonemes } # infer dimensions of batch batch_size_, seq_len_ = tf.unstack(tf.shape(chars)) # actual length of sequences considering padding seqlens = tf.count_nonzero(chars, axis=-1) # Character and Phoneme Embedding Matrices chE = tf.get_variable('chE', [char_vocab_size, emb_dim], tf.float32, initializer=tf.random_uniform_initializer(-0.01, 0.01) ) phE = tf.get_variable('phE', [1 + phoneme_vocab_size, emb_dim], tf.float32, initializer=tf.random_uniform_initializer(-0.01, 0.01) ) # +1 corresponds to <START> token to signal "start generating" # <START> token PH_START = tf.tile([phE[-1]], [batch_size_, 1]) # lookup character embedding chars_emb = tf.nn.embedding_lookup(chE, tf.transpose(chars)) # break into iterable list # batch_major to time_major chars_emb_list = chars_emb #tf.transpose(chars_emb, [1, 0, 2])) # encoder encoder_outputs = [] with tf.variable_scope('encoder') as scope: enc_cell = tf.nn.rnn_cell.LSTMCell(emb_dim) enc_state = enc_cell.zero_state(batch_size_, tf.float32) for i in range(seqlen): output, enc_state = enc_cell(chars_emb_list[i], enc_state) # accumulate outputs at each step encoder_outputs.append(output) # output projection parameters Wo = tf.get_variable('Wo', shape=[emb_dim, phoneme_vocab_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-0.01, 0.01)) bo = tf.get_variable('bo', shape=[phoneme_vocab_size], dtype=tf.float32, initializer=tf.random_uniform_initializer(-0.01, 0.01)) llogits = [] with tf.variable_scope('decoder') as scope: dec_cell = tf.nn.rnn_cell.LSTMCell(emb_dim, name='decoder_cell') dec_state = enc_state input_ = PH_START # start generation for i in range(seqlen): output, dec_state = dec_cell(input_, dec_state) logits = tf.matmul(output, Wo) + bo # tf.linear llogits.append(logits) prediction = tf.argmax(tf.nn.softmax(logits), axis=-1) input_ = tf.nn.embedding_lookup(phE, prediction) # stack list of logits # convert to time_major logits = tf.transpose(tf.stack(llogits), [1, 0, 2]) # probability distribution across vocabulary probs = tf.nn.softmax(logits) # predictions preds = tf.argmax(probs, axis=-1) # Cross Entropy ce = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=phonemes ) # reduce to scalar loss = tf.reduce_mean(ce) # Accuracy accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.cast(preds, tf.int32), phonemes), tf.float32 ) ) self.out = { 'loss' : loss, 'prob' : probs, 'pred' : preds, 'logits' : logits, 'accuracy' : accuracy } # training operation self.trainop = tf.train.AdamOptimizer().minimize(loss)
def __init__(self, actor, critic, experts, obs_dim, memory, observation_shape, action_shape, expert_is_np=False, param_noise=None, action_noise=None, gamma=0.95, tau=0.001, normalize_returns=False, enable_popart=False, normalize_observations=True, batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.), return_range=(-np.inf, np.inf), critic_l2_reg=0., actor_lr=1e-4, critic_lr=1e-3, clip_norm=None, reward_scale=1.): # Inputs. self.obs0 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs0') self.obs1 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs1') self.terminals1 = tf.placeholder(tf.float32, shape=(None, 1), name='terminals1') self.rewards = tf.placeholder(tf.float32, shape=(None, 1), name='rewards') self.actions = tf.placeholder(tf.float32, shape=(None,) + action_shape, name='actions') self.critic_target = tf.placeholder(tf.float32, shape=(None, 1), name='critic_target') self.param_noise_stddev = tf.placeholder(tf.float32, shape=(), name='param_noise_stddev') # Parameters. self.gamma = gamma self.tau = tau self.memory = memory self.normalize_observations = normalize_observations self.normalize_returns = normalize_returns self.action_noise = action_noise self.param_noise = param_noise self.action_range = action_range self.return_range = return_range self.observation_range = observation_range self.critic = critic self.actor = copy(actor) self.actor_lr = actor_lr self.critic_lr = critic_lr self.clip_norm = clip_norm self.enable_popart = enable_popart self.reward_scale = reward_scale self.batch_size = batch_size self.stats_sample = None self.critic_l2_reg = critic_l2_reg self.experts = experts self.obs_dim = obs_dim # self.critic_obs0 = self.experts[0].obs0 # self.critic_obs1 = self.experts[0].obs1 # self.critic_actor = self.experts[0].use_tf_actor # Observation normalization. if self.normalize_observations: with tf.variable_scope('obs_rms'): self.obs_rms = RunningMeanStd(shape=observation_shape) else: self.obs_rms = None normalized_obs0 = tf.clip_by_value(normalize(self.obs0, self.obs_rms), self.observation_range[0], self.observation_range[1]) normalized_obs1 = tf.clip_by_value(normalize(self.obs1, self.obs_rms), self.observation_range[0], self.observation_range[1]) # Return normalization. if self.normalize_returns: with tf.variable_scope('ret_rms'): self.ret_rms = RunningMeanStd() else: self.ret_rms = None # Create target networks. target_actor = copy(self.actor) target_actor.name = 'target_actor' self.target_actor = target_actor target_critic = copy(critic) target_critic.name = 'target_critic' self.target_critic = target_critic expert0_normalize_obs0 = [tf.clip_by_value(normalize(self.obs0[:, :self.obs_dim], self.experts[i].obs_rms), self.observation_range[0], self.observation_range[1]) for i in range(len(self.experts))] expert_qv0 = tf.squeeze(tf.stack([experts[i].critic(expert0_normalize_obs0[i], self.actions)\ for i in range(len(self.experts))]), axis=2) # expert_qv0 = tf.Print(expert_qv0, [expert_qv0], '>>>> qv0 :', summarize=10) expert_qv0 = tf.reduce_sum(self.obs0[:, self.obs_dim:] * tf.transpose(expert_qv0), axis=1) # Create networks and core TF parts that are shared across setup parts. self.actor_tf = self.actor(normalized_obs0) self.normalized_critic_tf = critic(normalized_obs0, self.actions, tf.stop_gradient(expert_qv0)) self.critic_tf = tf.clip_by_value(self.normalized_critic_tf, self.return_range[0], self.return_range[1]) expert_qv0_with_actor_tf = tf.squeeze(tf.stack([experts[i].critic(expert0_normalize_obs0[i], self.actor_tf) for i in range(len(self.experts))]), axis=2) expert_qv0_with_actor_tf = tf.reduce_sum(self.obs0[:, self.obs_dim:] * tf.transpose(expert_qv0_with_actor_tf), axis=1) self.normalized_critic_with_actor_tf = critic(normalized_obs0, self.actor_tf, tf.stop_gradient(expert_qv0_with_actor_tf)) self.critic_with_actor_tf = denormalize(tf.clip_by_value(self.normalized_critic_with_actor_tf, self.return_range[0], self.return_range[1]), self.ret_rms) action1 = target_actor(normalized_obs1) expert0_normalize_obs1 = [tf.clip_by_value(normalize(self.obs1[:, :self.obs_dim], self.experts[i].obs_rms), self.observation_range[0], self.observation_range[1]) for i in range(len(self.experts))] expert_qv1 = tf.squeeze(tf.stack([(experts[i].critic(expert0_normalize_obs1[i], action1)) for i in range(len(self.experts))]), axis=2) expert_qv1 = tf.reduce_sum(self.obs1[:, self.obs_dim:] * tf.transpose(expert_qv1), axis=1) self.Q_obs1 = target_critic(normalized_obs1, action1, tf.stop_gradient(expert_qv1)) # self.Q_obs1 = tf.Print(self.Q_obs1, [self.Q_obs1], '>>>> Q :', summarize=10) # self.terminals1 = tf.Print(self.terminals1, [self.terminals1], '>>>> terminal :', summarize=10) self.target_Q = self.rewards + (1. - self.terminals1) * gamma * self.Q_obs1 self.expert_qv1 = expert_qv1 # Set up parts. if self.param_noise is not None: self.setup_param_noise(normalized_obs0) if self.normalize_returns and self.enable_popart: self.setup_popart() self.setup_stats() self.setup_target_network_updates() self.initial_state = None # recurrent architectures not supported yet