def __call__(self, numberlayers): graph = tf.Graph() with graph.as_default(): initweight = tf.random_normal_initializer(0, 0.3) initbias = tf.constant_initializer(0.1) with tf.name_scope('inputs'): states_ = tf.placeholder(tf.float32, [None, self.stateDim]) nextstates_ = tf.placeholder(tf.float32, [None, self.stateDim]) reward_ = tf.placeholder(tf.float32, [ None, ]) action_ = tf.placeholder(tf.float32, [None, self.actionDim]) tf.add_to_collection('states_', states_) tf.add_to_collection('nextstates_', nextstates_) tf.add_to_collection("reward_", reward_) tf.add_to_collection("action_", action_) with tf.name_scope("trainingParams"): learningRate_ = tf.constant(0, dtype=tf.float32) gamma_ = tf.constant(0, dtype=tf.float32) tf.add_to_collection("learningRate_", learningRate_) tf.add_to_collection("gamma_", gamma_) with tf.variable_scope('evalnet'): with tf.variable_scope('layer1'): Weight1 = tf.get_variable("w1", [self.stateDim, numberlayers], initializer=initweight) Bias1 = tf.get_variable("b1", [1, numberlayers], initializer=initbias) layer1 = tf.nn.relu(tf.matmul(states_, Weight1) + Bias1) tf.add_to_collection('Weight1', Weight1) tf.add_to_collection('Bias1', Bias1) tf.add_to_collection('layer1', layer1) with tf.variable_scope('layer2'): Weight2 = tf.get_variable("w2", [numberlayers, self.actionDim], initializer=initweight) Bias2 = tf.get_variable("b2", [1, self.actionDim], initializer=initbias) Qevalvalue_ = tf.matmul(layer1, Weight2) + Bias2 tf.add_to_collection('Weight2', Weight2) tf.add_to_collection('Bias2', Bias2) tf.add_to_collection('Qevalvalue_', Qevalvalue_) with tf.variable_scope('targetnet'): with tf.variable_scope('layer1'): weight1 = tf.get_variable("w1", [self.stateDim, numberlayers], initializer=initweight) bias1 = tf.get_variable("b1", [1, numberlayers], initializer=initbias) layer1 = tf.nn.relu( tf.matmul(nextstates_, weight1) + bias1) tf.add_to_collection('weight1', weight1) tf.add_to_collection('bias1', bias1) tf.add_to_collection('layer1', layer1) with tf.variable_scope('layer2'): weight2 = tf.get_variable("w2", [numberlayers, self.actionDim], initializer=initweight) bias2 = tf.get_variable("b2", [1, self.actionDim], initializer=initbias) Qnext = tf.matmul(layer1, weight2) + bias2 tf.add_to_collection('weight2', weight2) tf.add_to_collection('bias2', bias2) tf.add_to_collection('Qnext', Qnext) with tf.variable_scope('Qtarget'): qtarget = reward_ + gamma_ * tf.reduce_max(Qnext, axis=1) Qtarget = tf.stop_gradient(qtarget) tf.add_to_collection("Qtarget", Qtarget) with tf.variable_scope('Qevalaction'): Qevalaction = tf.reduce_sum(tf.multiply(Qevalvalue_, action_), reduction_indices=1) tf.add_to_collection("Qevalaction", Qevalaction) with tf.variable_scope('loss'): loss_ = tf.reduce_mean( tf.squared_difference(Qtarget, Qevalaction)) tf.add_to_collection("loss_", loss_) with tf.variable_scope('train'): trainopt = tf.train.RMSPropOptimizer( learningRate_, name='adamOptimizer').minimize(loss_) tf.add_to_collection("trainopt", trainopt) with tf.name_scope("replaceParameters"): evalParams_ = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='evalnet') targetParams_ = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='targetnet') ReplaceTargetParam_ = [ tf.assign(targetParams_, evalParams_) for targetParams_, evalParams_ in zip( targetParams_, evalParams_) ] tf.add_to_collection("evalParams_", evalParams_) tf.add_to_collection("targetParams_", targetParams_) tf.add_to_collection("ReplaceTargetParam_", ReplaceTargetParam_) fullSummary = tf.summary.merge_all() tf.add_to_collection("summaryOps", fullSummary) model = tf.Session(graph=graph) model.run(tf.global_variables_initializer()) Writer = tf.summary.FileWriter('/path/to/logs', graph=graph) tf.add_to_collection("Writer", Writer) return Writer, model
def coordinates_to_iou(y_grid, x_grid, blist, channels_onehot, weights=None): """Computes a per-pixel IoU with groundtruth boxes. At each pixel, we return the IoU assuming that we predicted the ideal height and width for the box at that location. Args: y_grid: A 2D tensor with shape [height, width] which contains the grid y-coordinates given in the (output) image dimensions. x_grid: A 2D tensor with shape [height, width] which contains the grid x-coordinates given in the (output) image dimensions. blist: A BoxList object with `num_instances` number of boxes. channels_onehot: A 2D tensor with shape [num_instances, num_channels] representing the one-hot encoded channel labels for each point. weights: A 1D tensor with shape [num_instances] corresponding to the weight of each instance. Returns: iou_heatmap: A [height, width, num_channels] shapes float tensor denoting the IoU based heatmap. """ image_height, image_width = tf.shape(y_grid)[0], tf.shape(y_grid)[1] num_pixels = image_height * image_width _, _, height, width = blist.get_center_coordinates_and_sizes() num_boxes = tf.shape(height)[0] per_pixel_ymin = (y_grid[tf.newaxis, :, :] - (height[:, tf.newaxis, tf.newaxis] / 2.0)) per_pixel_xmin = (x_grid[tf.newaxis, :, :] - (width[:, tf.newaxis, tf.newaxis] / 2.0)) per_pixel_ymax = (y_grid[tf.newaxis, :, :] + (height[:, tf.newaxis, tf.newaxis] / 2.0)) per_pixel_xmax = (x_grid[tf.newaxis, :, :] + (width[:, tf.newaxis, tf.newaxis] / 2.0)) # [num_boxes, height, width] -> [num_boxes * height * width] per_pixel_ymin = tf.reshape( per_pixel_ymin, [num_pixels * num_boxes]) per_pixel_xmin = tf.reshape( per_pixel_xmin, [num_pixels * num_boxes]) per_pixel_ymax = tf.reshape( per_pixel_ymax, [num_pixels * num_boxes]) per_pixel_xmax = tf.reshape( per_pixel_xmax, [num_pixels * num_boxes]) per_pixel_blist = box_list.BoxList( tf.stack([per_pixel_ymin, per_pixel_xmin, per_pixel_ymax, per_pixel_xmax], axis=1)) target_boxes = tf.tile( blist.get()[:, tf.newaxis, :], [1, num_pixels, 1]) # [num_boxes, height * width, 4] -> [num_boxes * height * wdith, 4] target_boxes = tf.reshape(target_boxes, [num_pixels * num_boxes, 4]) target_blist = box_list.BoxList(target_boxes) ious = box_list_ops.matched_iou(target_blist, per_pixel_blist) ious = tf.reshape(ious, [num_boxes, image_height, image_width]) per_class_iou = ( ious[:, :, :, tf.newaxis] * channels_onehot[:, tf.newaxis, tf.newaxis, :]) if weights is not None: per_class_iou = ( per_class_iou * weights[:, tf.newaxis, tf.newaxis, tf.newaxis]) per_class_iou = tf.maximum(per_class_iou, 0.0) return tf.reduce_max(per_class_iou, axis=0)
def _build_single_q_network(self, observations, head, state_t, state_tp1, done_mask, reward_t, error_weight): """Builds the computational graph for a single Q network. Briefly, this part is calculating the following two quantities: 1. q_value = q_fn(observations) 2. td_error = q_fn(state_t) - reward_t - gamma * q_fn(state_tp1) The optimization target is to minimize the td_error. Args: observations: shape = [batch_size, hparams.fingerprint_length]. The input of the Q function. head: shape = [1]. The index of the head chosen for decision in bootstrap DQN. state_t: shape = [batch_size, hparams.fingerprint_length]. The state at time step t. state_tp1: a list of tensors, with total number of batch_size, each has shape = [num_actions, hparams.fingerprint_length]. Note that the num_actions can be different for each tensor. The state at time step t+1, tp1 is short for t plus 1. done_mask: shape = [batch_size, 1] Whether state_tp1 is the terminal state. reward_t: shape = [batch_size, 1] the reward at time step t. error_weight: shape = [batch_size, 1] weight for the loss. Returns: q_values: Tensor of [batch_size, 1]. The q values for the observations. td_error: Tensor of [batch_size, 1]. The TD error. weighted_error: Tensor of [batch_size, 1]. The TD error weighted by error_weight. q_fn_vars: List of tf.Variables. The variables of q_fn when computing the q_values of state_t q_fn_vars: List of tf.Variables. The variables of q_fn when computing the q_values of state_tp1 """ with tf.variable_scope('q_fn'): # q_value have shape [batch_size, 1]. q_values = tf.gather(self.q_fn(observations), head, axis=-1) # calculating q_fn(state_t) # The Q network shares parameters with the action graph. with tf.variable_scope('q_fn', reuse=True): q_t = self.q_fn(state_t, reuse=True) q_fn_vars = tf.trainable_variables(scope=tf.get_variable_scope().name + '/q_fn') # calculating q_fn(state_tp1) with tf.variable_scope('q_tp1', reuse=tf.AUTO_REUSE): q_tp1 = [self.q_fn(s_tp1, reuse=tf.AUTO_REUSE) for s_tp1 in state_tp1] q_tp1_vars = tf.trainable_variables(scope=tf.get_variable_scope().name + '/q_tp1') if self.double_q: with tf.variable_scope('q_fn', reuse=True): q_tp1_online = [self.q_fn(s_tp1, reuse=True) for s_tp1 in state_tp1] if self.num_bootstrap_heads: num_heads = self.num_bootstrap_heads else: num_heads = 1 # determine the action to choose based on online Q estimator. q_tp1_online_idx = [ tf.stack( [tf.argmax(q, axis=0), tf.range(num_heads, dtype=tf.int64)], axis=1) for q in q_tp1_online ] # use the index from max online q_values to compute the value # function v_tp1 = tf.stack( [tf.gather_nd(q, idx) for q, idx in zip(q_tp1, q_tp1_online_idx)], axis=0) else: v_tp1 = tf.stack([tf.reduce_max(q) for q in q_tp1], axis=0) # if s_{t+1} is the terminal state, we do not evaluate the Q value of # the state. q_tp1_masked = (1.0 - done_mask) * v_tp1 q_t_target = reward_t + self.gamma * q_tp1_masked # stop gradient from flowing to the computating graph which computes # the Q value of s_{t+1}. # td_error has shape [batch_size, 1] td_error = q_t - tf.stop_gradient(q_t_target) # If use bootstrap, each head is trained with a different subset of the # training sample. Like the idea of dropout. if self.num_bootstrap_heads: head_mask = tf.keras.backend.random_binomial( shape=(1, self.num_bootstrap_heads), p=0.6) td_error = tf.reduce_mean(td_error * head_mask, axis=1) # The loss comes from a traditional trick in convex optimization: # http://web.stanford.edu/~boyd/cvxbook/. # See Chapter 6 pp. 298 # It will makes the optimization robust. # Specifically, the loss will use l1 instead of l2 loss when the td error # gets larger than 1.0. The l2 loss has the disadvantage that it has # the tendency to be dominated by outliers. In terms of estimation theory, # the asymptotic relative efficiency of the l1 loss estimator is better # for heavy-tailed distributions. errors = tf.where( tf.abs(td_error) < 1.0, tf.square(td_error) * 0.5, 1.0 * (tf.abs(td_error) - 0.5)) weighted_error = tf.reduce_mean(error_weight * errors) return q_values, td_error, weighted_error, q_fn_vars, q_tp1_vars
def ensemble_q(self, qs): lambda_ = self._ensemble_q_lambda return (lambda_ * tf.reduce_min(qs, axis=-1) + (1 - lambda_) * tf.reduce_max(qs, axis=-1))
def build_embedding_layer(features, mode, params, reuse=False): """Common embedding layer for feature and kernel functions. Args: features: A dictionary containing features, directly copied from `model_fn`. mode: Mode. params: Contains parameters, directly copied from `model_fn`. reuse: Reuse variables. Returns: `(x, q)` where `x` is embedded representation of context, and `q` is the embedded representation of the question. """ with tf.variable_scope('embedding_layer', reuse=reuse): training = mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope('embedding'): if params.get('use_char', True): tf.logging.info('# Char embeddings') # self-trained character embedding char_emb_mat = tf.get_variable( 'char_emb_mat', [params['char_vocab_size'], params['char_emb_size']]) if training: char_emb_mat = tf.nn.dropout( char_emb_mat, keep_prob=1.0 - params['char_embedding_dropout'], noise_shape=[params['char_vocab_size'], 1]) xc = tf.nn.embedding_lookup( char_emb_mat, features['indexed_context_chars'][:, 1:-1, :]) qc = tf.nn.embedding_lookup( char_emb_mat, features['indexed_question_chars'][:, 1:-1, :]) xc = tf.reduce_max(xc, 2) qc = tf.reduce_max(qc, 2) else: xc, qc = None, None # glove embedding if params['use_glove']: _, xw, qw = squad_helper.glove_layer(features, mode, params) else: xw, qw = None, None # MT ELMO x_mt, q_mt = None, None gpu_id = 1 if params['mt_elmo']: tf.logging.info('# MT ELMO gpu_id %d/%d', gpu_id, params['num_gpus']) with tf.device( misc_util.get_device_str(gpu_id, params['num_gpus'])): # Translation vectors x_mt = squad_helper.embed_translation( features['context_words'], features['context_num_words'], params['mt_ckpt_path'], params['include_mt_embeddings']) q_mt = squad_helper.embed_translation( features['question_words'], features['question_num_words'], params['mt_ckpt_path'], params['include_mt_embeddings']) # ELMO x_elmo, q_elmo = None, None if params['elmo']: gpu_id += 1 tf.logging.info('# ELMO gpu_id %d/%d', gpu_id, params['num_gpus']) with tf.device( misc_util.get_device_str(gpu_id, params['num_gpus'])): # elmo vectors if params['elmo_option'] == 'elmo': x_elmo = squad_helper.embed_elmo_chars( features['indexed_context_chars'], 128, params['elmo_path'], training, params['num_gpus'], params['base_gpu_elmo']) q_elmo = squad_helper.embed_elmo_chars( features['indexed_question_chars'], 128, params['elmo_path'], training, params['num_gpus'], params['base_gpu_elmo']) else: x_elmo = squad_helper.embed_elmo_sentences( features['tokenized_context'], MAX_BATCH_SIZE, params['elmo_path'], training, params['elmo_option']) q_elmo = squad_helper.embed_elmo_sentences( features['tokenized_question'], MAX_BATCH_SIZE, params['elmo_path'], training, params['elmo_option']) tf.logging.info('# Done build_embedding_layer') return xc, qc, xw, qw, x_mt, q_mt, x_elmo, q_elmo
def model_function(features, labels, mode, params, embeddings): """A model function satisfying the tf.estimator API. Args: features: Dictionary of feature tensors with keys: - question_tok: <string> [batch_size, max_question_len] - context_tok: <string> [batch_size, max_num_context, max_context_len] - question_tok_len: <int32> [batch_size] - num_context: <int32> [batch_size] - context_tok_len: <int32> [batch_size] - question_tok_wid: <int32> [batch_size, max_question_len] - context_tok_wid: <int32> [batch_size, max_num_context, max_context_len] - long_answer_indices: <int32> [batch_size] labels: <int32> [batch_size] for answer index (-1 = NULL). mode: One of the keys from tf.estimator.ModeKeys. params: Dictionary of hyperparameters. embeddings: An embedding_utils.PretrainedWordEmbeddings object. Returns: estimator_spec: A tf.estimator.EstimatorSpec object. """ del params # Unused. if mode == tf_estimator.ModeKeys.PREDICT: # Add a dummy batch dimension if we are exporting the predictor. features = {k: tf.expand_dims(v, 0) for k, v in features.items()} embedding_weights, embedding_scaffold = embeddings.get_params( trainable=False) # Features. question_tok_len = features["question_tok_len"] question_tok_wid = features["question_tok_wid"] context_tok_wid = features["context_tok_wid"] num_context = features["num_context"] context_tok_len = features["context_tok_len"] # Truncate the contexts and labels to a certain maximum length. context_tok_wid, num_context, context_tok_len = ( nq_long_utils.truncate_contexts(context_token_ids=context_tok_wid, num_contexts=num_context, context_len=context_tok_len, max_contexts=FLAGS.max_contexts, max_context_len=FLAGS.max_context_len)) non_null_context_scores = nq_long_decatt_model.build_model( question_tok_wid=question_tok_wid, question_lens=question_tok_len, context_tok_wid=context_tok_wid, context_lens=context_tok_len, embedding_weights=embedding_weights, mode=mode) # Mask out contexts that are padding. num_context_mask = tf.log( tf.sequence_mask(num_context, tensor_utils.shape(non_null_context_scores, 1), dtype=tf.float32)) non_null_context_scores += num_context_mask # <float> [batch_size, 1] null_score = tf.zeros([tf.shape(question_tok_wid)[0], 1]) # Offset everything by 1 to account for null context. # [batch_size, 1 + max_contexts] context_scores = tf.concat([null_score, non_null_context_scores], 1) if mode != tf_estimator.ModeKeys.PREDICT: labels = nq_long_utils.truncate_labels(labels, FLAGS.max_contexts) # In the data, NULL is given index -1 but this is not compatible with # softmax so shift by 1. labels = labels + 1 # Reweight null examples. weights = nq_long_utils.compute_null_weights(labels, FLAGS.null_weight) # When computing the loss we take only the first label. loss_labels = labels[:, 0] # [] loss = tf.losses.sparse_softmax_cross_entropy(labels=loss_labels, logits=context_scores, weights=weights) optimizer = tf.train.AdagradOptimizer( learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) # <int32> [batch_size] eval_predictions = tf.to_int32(tf.argmax(context_scores, 1)) non_null_match, non_null_gold, non_null_predictions = ( nq_long_utils.compute_match_stats(eval_predictions, labels)) precision, precision_op = (tf.metrics.mean( non_null_match, weights=non_null_predictions)) recall, recall_op = (tf.metrics.mean(non_null_match, weights=non_null_gold)) f1, f1_op = (nq_long_utils.f1_metric(precision=precision, precision_op=precision_op, recall=recall, recall_op=recall_op)) # Bogus metric until we figure out how to connect Ming Wei's eval code. eval_metric_ops = { "precision": (precision, precision_op), "recall": (recall, recall_op), "f1": (f1, f1_op) } else: loss = None train_op = None eval_metric_ops = {} # In the export, we never predict NULL since the eval metric will compute the # best possible F1. export_long_answer_idx = tf.to_int32(tf.argmax(non_null_context_scores, 1)) export_long_answer_score = tf.reduce_max(non_null_context_scores, 1) predictions = dict(idx=export_long_answer_idx, score=export_long_answer_score) if mode == tf_estimator.ModeKeys.PREDICT: # Remove the dummy batch dimension if we are exporting the predictor. predictions = {k: tf.squeeze(v, 0) for k, v in predictions.items()} estimator_spec = tf_estimator.EstimatorSpec( mode=mode, loss=loss, predictions=predictions, train_op=train_op, eval_metric_ops=eval_metric_ops, scaffold=embedding_scaffold) return estimator_spec
def add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs, max_detection_points=anchors.MAX_DETECTION_POINTS): """Selects top-k predictions and adds the selected to metric_fn_inputs. Args: params: a parameter dictionary that includes `min_level`, `max_level`, `batch_size`, and `num_classes`. cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. metric_fn_inputs: a dictionary that will hold the top-k selections. max_detection_points: an integer specifing the maximum detection points to keep before NMS. Keep all anchors if max_detection_points <= 0. """ batch_size = params['batch_size'] num_classes = params['num_classes'] cls_outputs_all = [] box_outputs_all = [] # Concatenates class and box of all levels into one tensor. for level in range(params['min_level'], params['max_level'] + 1): if params['data_format'] == 'channels_first': cls_outputs[level] = tf.transpose(cls_outputs[level], [0, 2, 3, 1]) box_outputs[level] = tf.transpose(box_outputs[level], [0, 2, 3, 1]) cls_outputs_all.append( tf.reshape(cls_outputs[level], [batch_size, -1, num_classes])) box_outputs_all.append( tf.reshape(box_outputs[level], [batch_size, -1, 4])) cls_outputs_all = tf.concat(cls_outputs_all, 1) box_outputs_all = tf.concat(box_outputs_all, 1) if max_detection_points > 0: # Prune anchors and detections to only keep max_detection_points. # Due to some issues, top_k is currently slow in graph model. cls_outputs_all_reshape = tf.reshape(cls_outputs_all, [batch_size, -1]) _, cls_topk_indices = tf.math.top_k(cls_outputs_all_reshape, k=max_detection_points, sorted=False) indices = cls_topk_indices // num_classes classes = cls_topk_indices % num_classes cls_indices = tf.stack([indices, classes], axis=2) cls_outputs_all_after_topk = tf.gather_nd(cls_outputs_all, cls_indices, batch_dims=1) box_outputs_all_after_topk = tf.gather_nd(box_outputs_all, tf.expand_dims(indices, 2), batch_dims=1) else: # Keep all anchors, but for each anchor, just keep the max probablity for # each class. cls_outputs_idx = tf.math.argmax(cls_outputs_all, axis=-1, output_type=tf.int32) num_anchors = cls_outputs_all.shape[1] classes = cls_outputs_idx indices = tf.tile(tf.expand_dims(tf.range(num_anchors), axis=0), [batch_size, 1]) cls_outputs_all_after_topk = tf.reduce_max(cls_outputs_all, -1) box_outputs_all_after_topk = box_outputs_all metric_fn_inputs['cls_outputs_all'] = cls_outputs_all_after_topk metric_fn_inputs['box_outputs_all'] = box_outputs_all_after_topk metric_fn_inputs['indices_all'] = indices metric_fn_inputs['classes_all'] = classes
def create_policy(*infeed_data): """Act according to current policy and generate action probability. """ dis_obs = list(infeed_data[:4]) cont_obs = list(infeed_data[4:8]) state_in = infeed_data[-1] # Look up embedding for all the discrete obs emb_lookup = [] with tf.variable_scope("popnn_lookup"): for index, obs in enumerate(dis_obs): emb_matrix = tf.get_variable( f'emb_matrix{index}', [DIS_OBS_CARDINALITY[index], DIS_OBS_EMB_SIZE[index]], DTYPE) emb_lookup.append( embedding_ops.embedding_lookup(emb_matrix, obs, name=f'emb_lookup{index}')) # Clip some continuous observations cont_obs[-1] = tf.clip_by_value(cont_obs[-1], -5.0, 5.0, name="clip") # Concat groups of observations obs_concat = [] for d_obs, c_obs in zip(emb_lookup, cont_obs): obs_concat.append(tf.concat([d_obs, c_obs], axis=3, name="concat_obs")) # Fully connected transformations num_output = 8 obs_concat[-1] = Dense(num_output, dtype=DTYPE)(obs_concat[-1]) # Reduce max obs_concat = [tf.reduce_max(obs, axis=2) for obs in obs_concat] # Final concat of all the observations lstm_input = tf.concat(obs_concat, axis=2, name="concat_all") # LSTM layer lstm_input = tf.transpose( lstm_input, perm=[1, 0, 2], name="pre_lstm_transpose") # PopnnLSTM uses time-major tensors lstm_cell = rnn_ops.PopnnLSTM(num_units=LSTM_HIDDEN_SIZE, dtype=DTYPE, partials_dtype=DTYPE, name="lstm") lstm_output, state_out = lstm_cell( lstm_input, training=True, initial_state=tf.nn.rnn_cell.LSTMStateTuple(state_in[:, 0], state_in[:, 1])) lstm_output = tf.transpose(lstm_output, perm=[1, 0, 2], name="post_lstm_transpose") logits = Dense(NUM_ACTIONS, name="logits", dtype=DTYPE)(lstm_output) log_prob = tf.nn.log_softmax(logits, name="prob") # make action selection op (outputs int actions, sampled from policy) actions = tf.random.categorical(logits=tf.reshape(logits, (-1, NUM_ACTIONS)), num_samples=1) actions = tf.reshape(actions, (args.batch_size, args.time_steps)) action_masks = tf.one_hot(actions, NUM_ACTIONS, dtype=DTYPE) action_prob = tf.reduce_sum(action_masks * log_prob, axis=-1) return action_prob
def box_matching(boxes, gt_boxes, gt_classes): """Match boxes to groundtruth boxes. Given the proposal boxes and the groundtruth boxes and classes, perform the groundtruth matching by taking the argmax of the IoU between boxes and groundtruth boxes. Args: boxes: a tensor of shape of [batch_size, N, 4] representing the box coordiantes to be matched to groundtruth boxes. gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing the groundtruth box coordinates. It is padded with -1s to indicate the invalid boxes. gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box classes. It is padded with -1s to indicate the invalid classes. Returns: matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing the matched groundtruth box coordinates for each input box. If the box does not overlap with any groundtruth boxes, the matched boxes of it will be set to all 0s. matched_gt_classes: a tensor of shape of [batch_size, N], representing the matched groundtruth classes for each input box. If the box does not overlap with any groundtruth boxes, the matched box classes of it will be set to 0, which corresponds to the background class. matched_gt_indices: a tensor of shape of [batch_size, N], representing the indices of the matched groundtruth boxes in the original gt_boxes tensor. If the box does not overlap with any groundtruth boxes, the index of the matched groundtruth will be set to -1. matched_iou: a tensor of shape of [batch_size, N], representing the IoU between the box and its matched groundtruth box. The matched IoU is the maximum IoU of the box and all the groundtruth boxes. iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix between boxes and the groundtruth boxes. The IoU between a box and the invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1. """ # Compute IoU between boxes and gt_boxes. # iou <- [batch_size, N, K] iou = box_utils.bbox_overlap(boxes, gt_boxes) # max_iou <- [batch_size, N] # 0.0 -> no match to gt, or -1.0 match to no gt matched_iou = tf.reduce_max(iou, axis=-1) # background_box_mask <- bool, [batch_size, N] background_box_mask = tf.less_equal(matched_iou, 0.0) argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32) argmax_iou_indices_shape = tf.shape(argmax_iou_indices) batch_indices = ( tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) * tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32)) gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1) matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices) matched_gt_boxes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices) matched_gt_classes = tf.where( background_box_mask, tf.zeros_like(matched_gt_classes), matched_gt_classes) matched_gt_indices = tf.where( background_box_mask, -tf.ones_like(argmax_iou_indices), argmax_iou_indices) return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, iou)
def _scan_step_fn(state, example, packed_length, queue_size, spacing, num_sequences, token_dtype): # pylint: disable=g-doc-args """Transform function used by tf.data.experimental.scan to process an example. This is written as a stateless function rather than a class method because we trace it with AutoGraph (in order to simplify the conditional), and this way we don't have to worry about handling re-tracing semantics. Args: See the SequenceDatasetPacker class. Returns: The updated queue state, and either a packed example or a dummy sequence which will be filtered out downstream. """ # Convert TensorArray tuples to lists since we'll need to replace them. availability, contents, top_index = state lengths = tf.concat([tf.shape(i) for i in example], axis=0) start_availability = availability.stack() can_fit = tf.reduce_all(tf.greater_equal(start_availability, lengths), axis=1) any_can_fit = tf.reduce_any(can_fit, axis=0) # AutoGraph will convert this block to a tf.cond if any_can_fit: # This indicates where in the FFD queue rotation a given index sits shifted_range = (tf.range(queue_size, dtype=INDEX_DTYPE) - top_index) % queue_size # Mark any indices which cannot accommodate the current example. exclusion_mask = tf.cast(tf.logical_not(can_fit), INDEX_DTYPE) * queue_size # Index in [0, queue_size) in which to place the sample. Note, this index # is the position in the actual TensorArray, not the index of the FFD queue. queue_index = (tf.reduce_min(shifted_range + exclusion_mask) + top_index) % queue_size # NOTE(taylorrobie): We emit a non-empty Tensor for downstream checks. output_contents = -tf.ones((1, num_sequences), dtype=token_dtype) else: index_range = top_index * packed_length + tf.range(packed_length) output_contents = contents.gather(index_range) # Reset the queue state. availability = availability.write( top_index, packed_length * tf.ones((num_sequences, ), dtype=INDEX_DTYPE)) empty_contents = tf.zeros((packed_length, num_sequences * 2), dtype=token_dtype) contents = contents.scatter(index_range, empty_contents) queue_index = top_index top_index = (top_index + 1) % queue_size pre_assign_availability = availability.read(queue_index) space_left = pre_assign_availability - lengths - spacing availability = availability.write(queue_index, space_left) # ============================================================================ # == Update contents ========================================================= # ============================================================================ # Consider the following case for a seq-to-seq packing: # (padding is represented as underscores) # # Queue starting state: # [1, 3, 2, 4, 6, 1, _, _, _, _, _, ...] # [5, 9, _, _, _, _, _, _, _, _, _, ...] # # Examples: # [4, 2, 4], [3] # # Desired new queue state: # [1, 3, 2, 4, 6, 1, _, _, 4, 2, 4, _, _, ...] # [5, 9, _, _, 3, _, _, _, _, _, _, _, _, ...] # # This could be acomplished by creating a TensorArray for each of the two # sequences, and scattering into the respective arrays. However TensorArray # writes are extremely expensive relative to other operations. So instead we # store the contents in a single TensorArray of shape (packed_length, 2), and # we pad and concatenate the examples such that they can be added in a single # assign: # # [_, _, _, _, 4, 2, 4] # [3, _, _, _, _, _, _] # + # [1, 3, 2, 4, 6, 1, _, _, _, _, _, ...] # [5, 9, _, _, _, _, _, _, _, _, _, ...] # # And in practice, the extra work of padding is neglidgable compared to # the gain from vectorizing the TensorArray assign. We also store a bit mask # denoting where sequences start which is used to compute segment and # position metadata: # # [_, _, _, _, 1, _, _] # [1, _, _, _, _, _, _] # + # [1, _, _, _, _, _, _, _, _, _, _, ...] # [1, _, _, _, _, _, _, _, _, _, _, ...] # # Both the contents and the mask are concatenated in the same TensorArray # for performance. start_index = packed_length - pre_assign_availability end_index = start_index + lengths leftmost = tf.reduce_min(start_index, axis=0) rightmost = tf.reduce_max(end_index, axis=0) delta = rightmost - leftmost pad_indices = [ tf.stack((start_index[i] - leftmost, rightmost - end_index[i])) for i in range(num_sequences) ] padded_examples = [ tf.pad(ex, padding[tf.newaxis, :]) for ex, padding in zip(example, pad_indices) ] padded_examples = tf.transpose(tf.stack(padded_examples)) mask_update = tf.one_hot(start_index - leftmost, delta, dtype=contents.dtype, axis=0) content_update = tf.concat([padded_examples, mask_update], axis=1) index_range = ( queue_index * packed_length + # Offset into the right section. tf.range(delta, dtype=INDEX_DTYPE) + leftmost) contents = contents.scatter(index_range, contents.gather(index_range) + content_update) state = (availability, contents, top_index) return state, (tf.logical_not(any_can_fit), output_contents)
def test_max_reduce(self): input = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32) output = tf.reduce_max(input, axis=3, keepdims=True) self._test_conversion('max_reduce')
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, use_one_hot_embeddings, use_tpu): """Creates a classification model.""" tpu_split = FLAGS.tpu_split if use_tpu else 1 model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) output_final_layer = model.get_sequence_output() # shape: bze, max_seq_len, hidden if FLAGS.emb_rep == "cls": embedding = tf.squeeze(output_final_layer[:, 0:1, :], axis=1) elif FLAGS.emb_rep == "mean": embedding = tf.reduce_mean(output_final_layer, axis=1) tf.logging.info("per tpu slice") tf.logging.info("emebdding size: %s", embedding.shape) tf.logging.info("label size: %s", labels.shape) tf.logging.info("=======" * 10) if use_tpu: # for tpu usage: combine embeddings after splitting 8 ways # [global_batch_size] labels = tpu_utils.cross_shard_concat(labels) tf.logging.info("label size: %s", labels.shape) tf.logging.info("=======" * 10) # [global_batch_size, hidden_size] embedding = tpu_utils.cross_shard_concat(embedding) tf.logging.info("Global batch size: %s", tensor_utils.shape(embedding, 0)) tf.logging.info("emebdding size: %s", embedding.shape) tf.logging.info("label size: %s", labels.shape) tf.logging.info("num tpu shards: %s", tpu_utils.num_tpu_shards()) tf.logging.info("=======" * 10) num_known_classes = FLAGS.num_domains * FLAGS.num_labels_per_domain num_unknown_classes = NUM_CLASSES - num_known_classes if FLAGS.continual_learning == "pretrain": num_classes = num_known_classes n_examples = FLAGS.known_num_shots elif FLAGS.continual_learning == "few_shot": num_classes = num_unknown_classes n_examples = FLAGS.few_shot if FLAGS.few_shot_known_neg: num_classes = NUM_CLASSES real_num_classes = num_unknown_classes # remove padding in each batch if use_tpu: real_shift = math.ceil( num_classes / FLAGS.batch_size) * FLAGS.batch_size # if use TPU, then embedding.shape[0] will be (num_classes + pad_num) * 8 real_indices = tf.range(num_classes) for i in range(1, tpu_split): real_indices = tf.concat( [real_indices, tf.range(num_classes) + real_shift * i], axis=0) embedding = tf.gather(embedding, real_indices) labels = tf.gather(labels, real_indices) tf.logging.info("emebdding size after removing padding in batch: %s", embedding.shape) tf.logging.info("label size after removing padding in batch: %s", labels.shape) # remove padded batch if n_examples < tpu_split: real_batch_total = n_examples * num_classes embedding = embedding[:real_batch_total] labels = labels[:real_batch_total] real_num = n_examples else: real_num = tpu_split else: # not use TPUs if n_examples < tpu_split: real_num = n_examples else: real_num = tpu_split real_batch_total = real_num * num_classes embedding = embedding[:real_batch_total] labels = labels[:real_batch_total] tf.logging.info("real emebdding size: %s", embedding.shape) tf.logging.info("real label size: %s", labels.shape) n = embedding.shape[0].value assert n == real_num * num_classes, "n: %d; real_num: %d: num_classes: %d" % ( n, real_num, num_classes) with tf.variable_scope("loss", reuse=tf.AUTO_REUSE): if is_training: # I.e., 0.1 dropout embedding = tf.nn.dropout(embedding, keep_prob=1 - DROPOUT_PROB) logits = tf.matmul(embedding, embedding, transpose_b=True) diagonal_matrix = tf.eye(n, n) logits = logits - diagonal_matrix * logits logits_reshape = tf.reshape(logits, [n, real_num, num_classes]) if FLAGS.reduce_method == "mean": all_logits_sum = tf.reduce_sum(logits_reshape, 1) num_counts = tf.ones([n, num_classes]) * real_num label_diagonal = tf.eye(num_classes, num_classes) label_diagonal = tf.tile(label_diagonal, tf.constant([real_num, 1])) num_counts = num_counts - label_diagonal mean_logits = tf.divide(all_logits_sum, num_counts) if FLAGS.few_shot_known_neg: real_logits_indices = tf.range(real_num_classes) for i in range(1, n_examples): real_logits_indices = tf.concat([ real_logits_indices, tf.range(real_num_classes) + num_classes * i ], axis=0) mean_logits = tf.gather(mean_logits, real_logits_indices) label_diagonal = tf.eye(real_num_classes, num_classes) label_diagonal = tf.tile(label_diagonal, tf.constant([real_num, 1])) probabilities = tf.nn.softmax(mean_logits, axis=-1) log_probs = tf.nn.log_softmax(mean_logits, axis=-1) return_logits = mean_logits elif FLAGS.reduce_method == "max": max_logits = tf.reduce_max(logits_reshape, 1) if FLAGS.min_max: # Because the diagnoal is 0, we need to assign a large number to get the # true min. large_number = 50000 added_logits = logits + diagonal_matrix * large_number added_reshape_logits = tf.reshape(added_logits, [n, real_num, num_classes]) min_logits = tf.reduce_min(added_reshape_logits, 1) # n * num_classes masks = tf.tile(tf.eye(num_classes, num_classes), tf.constant([real_num, 1])) max_logits = masks * min_logits + (1 - masks) * max_logits label_diagonal = tf.eye(num_classes, num_classes) if FLAGS.few_shot_known_neg: real_logits_indices = tf.range(real_num_classes) # WARNING: current implementation may not be correct for few_shot > 8 on # tpus in the following for loop, it should be for i in # range(1, real_num) instead of in range(1, n_examples). assert n_examples < 8, ( "current implementation may not be correct for " "few_shot > 8 on tpus. Need to check") # Note: n_examples here is 2 or 5, which is less than tpu_slit. for i in range(1, n_examples): real_logits_indices = tf.concat([ real_logits_indices, tf.range(real_num_classes) + num_classes * i ], axis=0) max_logits = tf.gather(max_logits, real_logits_indices) label_diagonal = label_diagonal[:real_num_classes] label_diagonal = tf.tile(label_diagonal, tf.constant([real_num, 1])) probabilities = tf.nn.softmax(max_logits, axis=-1) log_probs = tf.nn.log_softmax(max_logits, axis=-1) return_logits = max_logits elif FLAGS.reduce_method == "random": indice_0 = tf.expand_dims(tf.range(n), axis=1) # n x 1 indice_1 = tf.random.uniform([n, 1], minval=0, maxval=real_num, dtype=tf.dtypes.int32) random_indices = tf.concat([indice_0, indice_1], axis=1) random_logits = tf.gather_nd(logits_reshape, random_indices) label_diagonal = tf.eye(num_classes, num_classes) if FLAGS.few_shot_known_neg: real_logits_indices = tf.range(real_num_classes) for i in range(1, n_examples): real_logits_indices = tf.concat([ real_logits_indices, tf.range(real_num_classes) + num_classes * i ], axis=0) random_logits = tf.gather(random_logits, real_logits_indices) label_diagonal = label_diagonal[:real_num_classes] label_diagonal = tf.tile(label_diagonal, tf.constant([real_num, 1])) probabilities = tf.nn.softmax(random_logits, axis=-1) log_probs = tf.nn.log_softmax(random_logits, axis=-1) return_logits = random_logits per_example_loss = -tf.reduce_sum(label_diagonal * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, return_logits, probabilities)
def rescale(image): """Rescale to full [0, 255] range.""" image = tf.cast(image, tf.float32) image = ((image - tf.reduce_min(image)) / (tf.reduce_max(image) - tf.reduce_min(image)) * 255.) return image
def get_model(point_cloud, is_training, num_class, bn_decay=None): """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = point_cloud[:, :, :3] l0_points = point_cloud[:, :, 3:] end_points['l0_xyz'] = l0_xyz # Layer 1 l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=1024, radius=0.1, nsample=32, mlp=[32, 32, 64], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1') l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=256, radius=0.2, nsample=32, mlp=[64, 64, 128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2') l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=64, radius=0.4, nsample=32, mlp=[128, 128, 256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3') l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=16, radius=0.8, nsample=32, mlp=[256, 256, 512], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer4') # Feature Propagation layers l3_points_sem = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256, 256], is_training, bn_decay, scope='sem_fa_layer1') l2_points_sem = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points_sem, [256, 256], is_training, bn_decay, scope='sem_fa_layer2') l1_points_sem = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points_sem, [256, 128], is_training, bn_decay, scope='sem_fa_layer3') l0_points_sem = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points_sem, [128, 128, 128], is_training, bn_decay, scope='sem_fa_layer4') # FC layers net_sem = tf_util.conv1d(l0_points_sem, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='sem_fc1', bn_decay=bn_decay) net_sem_cache = tf_util.conv1d(net_sem, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='sem_cache', bn_decay=bn_decay) # ins l3_points_ins = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256, 256], is_training, bn_decay, scope='ins_fa_layer1') l2_points_ins = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points_ins, [256, 256], is_training, bn_decay, scope='ins_fa_layer2') l1_points_ins = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points_ins, [256, 128], is_training, bn_decay, scope='ins_fa_layer3') l0_points_ins = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points_ins, [128, 128, 128], is_training, bn_decay, scope='ins_fa_layer4') net_ins = tf_util.conv1d(l0_points_ins, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='ins_fc1', bn_decay=bn_decay) net_ins = net_ins + net_sem_cache net_ins = tf_util.dropout(net_ins, keep_prob=0.5, is_training=is_training, scope='ins_dp1') net_ins = tf_util.conv1d(net_ins, 5, 1, padding='VALID', activation_fn=None, scope='ins_fc4') k = 30 adj_matrix = tf_util.pairwise_distance_l1(net_ins) nn_idx = tf_util.knn_thres(adj_matrix, k=k) nn_idx = tf.stop_gradient(nn_idx) net_sem = tf_util.get_local_feature(net_sem, nn_idx=nn_idx, k=k) # [b, n, k, c] net_sem = tf.reduce_max(net_sem, axis=-2, keep_dims=False) net_sem = tf_util.dropout(net_sem, keep_prob=0.5, is_training=is_training, scope='sem_dp1') net_sem = tf_util.conv1d(net_sem, num_class, 1, padding='VALID', activation_fn=None, scope='sem_fc4') return net_sem, net_ins
def _log_prob(self, data, num_samples=1): """Compute a lower bound on the log likelihood.""" # Due to memory issues, we need to use num_samples=1 here num_samples, proposal_num_samples = 1, num_samples batch_size = tf.shape(data)[0] # Sample from the proposal and compute the weighs of the "unseen" samples. # We share these across the batch dimension. # [num_samples, K, data_size] proposal_samples = self.proposal.sample(num_samples * (self.K - 1)) if not self.reparameterize_proposal_samples: proposal_samples = tf.stop_gradient(proposal_samples) # [num_samples, K] log_energy_proposal = tf.reshape( self.energy_fn(tf.reshape(proposal_samples, [-1] + self.data_dim)), [num_samples, self.K - 1]) tf.summary.histogram("log_energy_proposal", log_energy_proposal) tf.summary.scalar("min_log_energy_proposal", tf.reduce_min(log_energy_proposal)) tf.summary.scalar("max_log_energy_proposal", tf.reduce_max(log_energy_proposal)) # [num_samples] proposal_lse = tf.reduce_logsumexp(log_energy_proposal, axis=1) # [batch_size, num_samples] tiled_proposal_lse = tf.tile(proposal_lse[tf.newaxis, :], [batch_size, 1]) # Compute the weights of the observed data. # [batch_size, 1] log_energy_data = tf.reshape(self.energy_fn(data), [batch_size]) tf.summary.histogram("log_energy_data", log_energy_data) tf.summary.scalar("min_log_energy_data", tf.reduce_min(log_energy_data)) tf.summary.scalar("max_log_energy_data", tf.reduce_max(log_energy_data)) # [batch_size, num_samples] tiled_log_energy_data = tf.tile(log_energy_data[:, tf.newaxis], [1, num_samples]) # Add the weights of the proposal samples with the true data weights. # [batch_size, num_samples] # pylint: disable=invalid-name Z_hat = tf.reduce_logsumexp(tf.stack( [tiled_log_energy_data, tiled_proposal_lse], axis=-1), axis=-1) Z_hat -= tf.log(tf.to_float(self.K)) # Perform the log-sum-exp reduction for IWAE # [batch_size] Z_hat = tf.reduce_logsumexp(Z_hat, axis=1) - tf.log( tf.to_float(num_samples)) # pylint: enable=invalid-name try: # Try giving the proposal lower bound num_samples if it can use it. proposal_lp = self.proposal.log_prob( data, num_samples=proposal_num_samples) except TypeError: proposal_lp = self.proposal.log_prob(data) lower_bound = proposal_lp + log_energy_data - Z_hat return lower_bound
def __call__(self, inputs, training): """Add operations to classify a batch of input images. Args: inputs: A Tensor representing a batch of input images. training: A boolean. Set to True to add operations required only when training the classifier. Returns: A logits Tensor with shape [<batch_size>, self.num_classes]. """ with self._model_variable_scope(): if self.data_format == 'channels_first': # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). # This provides a large performance boost on GPU. See # https://www.tensorflow.org/performance/performance_guide#data_formats inputs = tf.transpose(a=inputs, perm=[0, 3, 1, 2]) inputs = conv2d_fixed_padding(inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, strides=self.conv_stride, data_format=self.data_format, name='initial_input') inputs = tf.identity(inputs, 'initial_conv') # We do not include batch normalization or activation functions in V2 # for the initial conv1 because the first ResNet unit will perform these # for both the shortcut and non-shortcut paths as part of the first # block's projection. Cf. Appendix of [2]. if self.resnet_version == 1: inputs = batch_norm(inputs, training, self.data_format) inputs = tf.nn.relu(inputs) if self.first_pool_size: inputs = tf.compat.v1.layers.max_pooling2d( inputs=inputs, pool_size=self.first_pool_size, strides=self.first_pool_stride, padding='SAME', data_format=self.data_format) inputs = tf.identity(inputs, 'initial_max_pool') for i, num_blocks in enumerate(self.block_sizes): # We now have 4 block layers, but the last does not # double the number of filters. # We also skip the projection shortcut in the first block layer. num_filters = self.num_filters * min((2**i), 4) shortcut = i != 0 inputs = block_layer(inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, block_fn=self.block_fn, blocks=num_blocks, strides=self.block_strides[i], training=training, name='block_layer{}'.format(i + 1), data_format=self.data_format, shortcut=shortcut) # Skip the last BN+relu. # Only apply the BN and ReLU for model that does pre_activation in each # building/bottleneck block, eg resnet V2. # if self.pre_activation: # inputs = batch_norm(inputs, training, self.data_format, # name='pre_act'+'batch_norm') # inputs = tf.nn.relu(inputs,name='pre_act'+'relu') # The current top layer has shape # `batch_size x pool_size x pool_size x final_size`. # ResNet does an Average Pooling layer over pool_size, # but that is the same as doing a reduce_mean. We do a reduce_mean # here because it performs better than AveragePooling2D. # Also perform max-pooling, and concat results. axes = [2, 3] if self.data_format == 'channels_first' else [1, 2] avg_pooled = tf.reduce_mean(input_tensor=inputs, axis=axes, keepdims=True) avg_pooled = tf.squeeze(avg_pooled, axes) max_pooled = tf.reduce_max(input_tensor=inputs, axis=axes, keepdims=True) max_pooled = tf.squeeze(max_pooled, axes) inputs = tf.concat([avg_pooled, max_pooled], axis=1) inputs = tf.identity(inputs, 'final_pooling') inputs = tf.compat.v1.layers.dense(inputs=inputs, units=self.num_classes, reuse=tf.AUTO_REUSE) inputs = tf.identity(inputs, 'final_dense') return inputs
def softmax(self, target, axis, name=None): max_axis = tf.reduce_max(target, axis, keepdims=True) target_exp = tf.exp(target - max_axis) normalize = tf.reduce_sum(target_exp, axis, keepdims=True) softmax = tf.div(target_exp, normalize, name) return softmax
def call(self, inputs, training=True): """ Run one step of prediction with the model Parameters ---------- inputs : list of tensors the input tensors training : bool if the model is run in training or test mode Returns ------- res : list of tensors the prediction output """ raw_observations, actions, initial_observations, initial_state, \ info, step, noise = inputs if self.param['problem'] == 'pushing': # inform the context about the current objects (for dealing # with different rotational symmetries) self.context.ob = info[0] # get the initial segmented glimpse initial_image, initial_tip_pos = initial_observations initial_glimpse, initial_pix, _ = \ self.context.get_initial_glimpse(initial_image, training) raw_observations = \ tuple(list(raw_observations) + [tf.tile(initial_glimpse[:, None, :, :, :], [1, self.sequence_length, 1, 1, 1])]) initial_observations = \ tuple(list(initial_observations) + [tf.tile(initial_glimpse[:, None, :, :, :], [1, self.sequence_length, 1, 1, 1])]) if self.param['add_initial_noise']: # rescale the noise ns = [] for k in range(self.dim_x): ns += [noise[:, k] / self.scale] noise = tf.stack(ns, axis=-1) if self.param['problem'] == 'kitti': # set the position and orientation noise to zero for kitti noise = tf.concat([tf.zeros_like(noise[:, :3]), noise[:, 3:]], axis=-1) elif self.param['problem'] == 'pushing': # set the orientation noise to zero for pushing noise = tf.concat( [noise[:, :2], tf.zeros_like(noise[:, 2:3]), noise[:, 3:]], axis=-1) initial_state = initial_state + noise if self.param['problem'] == 'pushing': initial_state = self.context.correct_state(initial_state, diff=False) else: initial_state = self.context.correct_state(initial_state) # ---------------------------------------------------------------- # # construct the initial filter state # ---------------------------------------------------------------- if self.param['filter'] == 'lstm' and \ self.param['lstm_structure'] == 'lstm2': init_state = \ (tf.zeros([self.batch_size, self.cell.num_units], dtype=tf.float32), tf.zeros([self.batch_size, self.cell.num_units], dtype=tf.float32), tf.zeros([self.batch_size, self.cell.num_units], dtype=tf.float32), tf.zeros([self.batch_size, self.cell.num_units], dtype=tf.float32), initial_state, tf.reshape(self.covar_start, [self.batch_size, -1]), tf.zeros([self.batch_size, 1])) elif self.param['filter'] == 'lstm' and \ self.param['lstm_structure'] == 'lstm1': init_state = \ (tf.zeros([self.batch_size, self.cell.num_units], dtype=tf.float32), tf.zeros([self.batch_size, self.cell.num_units], dtype=tf.float32), initial_state, tf.reshape(self.covar_start, [self.batch_size, -1]), tf.zeros([self.batch_size, 1])) elif self.param['filter'] != 'pf': init_state = \ (initial_state, tf.reshape(self.covar_start, [self.batch_size, -1]), tf.zeros([self.batch_size, 1])) else: particles, weights = \ self.cell.sample_from_start(initial_state, self.covar_start_raw) particles = tf.reshape(particles, [-1, self.dim_x]) if self.param['problem'] == 'pushing': particles = self.context.correct_state(particles, diff=False) else: particles = self.context.correct_state(particles) particles = tf.reshape(particles, [self.batch_size, -1, self.dim_x]) if self.param['problem'] == 'kitti': # set the position and orientation variations of particles # to zero for kitti init_pose = tf.tile(initial_state[:, None, :3], [1, self.param['num_samples'], 1]) particles = \ tf.concat([init_pose, particles[:, :, 3:]], axis=-1) elif self.param['problem'] == 'pushing': # set the orientation variations of particles to zero for # pushing particles = \ tf.concat([particles[:, :, :2], tf.zeros_like(particles[:, :, 2:3]), particles[:, :, 3:]], axis=-1) init_state = \ (tf.reshape(particles, [self.batch_size, -1]), tf.reshape(weights, [self.batch_size, -1]), tf.zeros([self.batch_size, 1])) # ---------------------------------------------------------------- # run the filter # ---------------------------------------------------------------- # inputs are the raw observation inputs and actions inputs = (raw_observations, actions) out = self.rnn_layer(inputs, training=training, initial_state=init_state) # ---------------------------------------------------------------- # collect the results # ---------------------------------------------------------------- # out contains the full sequence of values for the outputs defined # in the cell if self.param['filter'] == 'lstm': state_sequence, covariance_sequence, z = out Q = tf.tile(self.context.Q[None, None, :, :], [self.batch_size, self.sequence_length, 1, 1]) R = tf.tile(self.context.R[None, None, :, :], [self.batch_size, self.sequence_length, 1, 1]) particles = tf.zeros([1, 1, self.dim_x, self.dim_x]) weights = tf.zeros([]) elif self.param['filter'] != 'pf': state_sequence, covariance_sequence, z, R, Q = out particles = tf.zeros([1, 1, self.dim_x, self.dim_x]) weights = tf.zeros([]) else: particles, weights, state_sequence, covariance_sequence, \ z, R, Q = out particles = tf.reshape( particles, [self.batch_size, -1, self.cell.num_particles, self.dim_x]) weights = tf.reshape( weights, [self.batch_size, -1, self.cell.num_particles]) # weights are in log scale, to turn them into a distribution, # we exponentiate and normalize them == apply the softmax # transform weights = tf.nn.softmax(weights, axis=-1) # remove nans and infs weights = tf.where(tf.math.is_finite(weights), weights, tf.zeros_like(weights)) Q = tf.reshape(Q, [self.batch_size, -1, self.dim_x, self.dim_x]) R = tf.reshape(R, [self.batch_size, -1, self.dim_z, self.dim_z]) z = tf.reshape(z, [self.batch_size, -1, self.dim_z]) covars = \ tf.reshape(covariance_sequence, [self.batch_size, -1, self.dim_x, self.dim_x]) res = [ particles, weights, state_sequence, covars, initial_state, self.covar_start, z, R, Q ] ####################################################################### # add summaries ###################################################################### for i in range(min(self.sequence_length, 3)): diag_r = tf.linalg.diag_part( tf.slice(R, [0, i, 0, 0], [1, 1, -1, -1])) diag_r = tf.squeeze(tf.sqrt(tf.abs(diag_r))) for k in range(self.dim_z): name = 'r/' + self.context.z_names[k] + '_' + str(i) tf.summary.histogram(name, diag_r[k:k + 1] * self.scale) if self.param['diagonal_covar']: diag_q = tf.linalg.diag_part( tf.slice(Q, [0, i, 0, 0], [1, 1, -1, -1])) diag_q = tf.squeeze(tf.sqrt(tf.abs(diag_q))) for k in range(self.dim_x): name = 'q/' + self.context.x_names[k] + '_' + str(i) tf.summary.histogram(name, diag_q[k:k + 1] * self.scale) else: for k in range(self.dim_x): for j in np.arange(k, self.dim_x): if k != j: n = 'q_sqr/' + self.context.x_names[k] + \ '_' + self.context.x_names[j] else: n = 'q_sqr/' + self.context.x_names[k] tf.summary.histogram(n, Q[0, i, k, j] * self.scale**2) diag_c = tf.linalg.diag_part( tf.slice(covars, [0, i, 0, 0], [1, 1, -1, -1])) diag_c = tf.squeeze(tf.sqrt(tf.abs(diag_c))) for k in range(self.dim_x): name = 'c/' + self.context.x_names[k] + '_' + str(i) tf.summary.histogram(name, diag_c[k:k + 1] * self.scale) if self.param['filter'] == 'pf': tf.summary.histogram('weights/' + str(i), weights[0, i]) tf.summary.histogram('weights/max_' + str(i), tf.reduce_max(weights[:, i, :], axis=-1)) # count the number of extremely small weights small = \ tf.where(tf.less(weights, 1e-7), tf.ones_like(weights), tf.zeros_like(weights)) small = tf.reduce_sum(small, axis=-1) small = tf.reshape(small, [self.batch_size, -1]) tf.summary.histogram('weights/small_' + str(i), small[:, i]) for k in range(self.dim_z): tf.summary.histogram( 'observations/start' + self.context.z_names[k], z[0, 0, k:k + 1] * self.scale) tf.summary.histogram('observations/end' + self.context.z_names[k], z[0, -1, k:k + 1] * self.scale) if self.param['filter'] == 'pf': tf.summary.histogram('weights/small_end', small[:, -1]) diag_c = tf.squeeze(tf.linalg.diag_part(covars[0, -1])) diag_c = tf.sqrt(tf.abs(diag_c + 1e-3)) for k in range(self.dim_x): name = 'c_end/' + self.context.x_names[k] + '_' + str(i) tf.summary.histogram(name, diag_c[k:k + 1] * self.scale) return res
def _stitch(features): """Stitch features on the first dimension.""" full_mask = tf.greater(features['task'], 1) step_mask = tf.reduce_any(full_mask, axis=-1) step_mask_exclude_last = tf.pad(step_mask, [[0, 0], [0, 1]], constant_values=False)[:, 1:] num_sequences = common_layers.shape_list(features['task'])[0] num_steps = common_layers.shape_list(features['task'])[1] connectors = tf.constant(PADDED_CONCATENATORS) # Select connectors connector_indices = tf.random.uniform([num_sequences * num_steps], minval=0, maxval=len(PADDED_CONCATENATORS), dtype=tf.int32) selected_connectors = tf.reshape( tf.gather(connectors, connector_indices), [num_sequences, num_steps, len(PADDED_CONCATENATORS[0])]) selected_connectors = tf.multiply(selected_connectors, tf.expand_dims( tf.to_int32(step_mask_exclude_last), 2), name='connector_mask') features['task'] = tf.concat([features['task'], selected_connectors], axis=-1) ref_offsets = tf.expand_dims( tf.cumsum(tf.reduce_sum(tf.to_int32(tf.greater(features['task'], 1)), -1), exclusive=True, axis=-1), 2) features['task'] = tf.reshape(features['task'], [num_sequences, -1]) full_mask = tf.greater(features['task'], 1) full_mask_int = tf.to_int32(full_mask) indices = tf.where( tf.sequence_mask(lengths=tf.reduce_sum(full_mask_int, -1))) values = tf.boolean_mask(tf.reshape(features['task'], [-1]), tf.reshape(full_mask, [-1])) sparse_task = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=tf.to_int64( tf.shape(features['task']))) # Stitch task and raw_task stitched_features = {} stitched_features['task'] = tf.sparse_tensor_to_dense(sparse_task) max_len = tf.reduce_max( tf.reduce_sum(tf.to_int32(tf.greater(stitched_features['task'], 1)), -1)) stitched_features['task'] = stitched_features['task'][:, :max_len] if 'raw_task' in features: connector_strs = tf.reshape( tf.gather(tf.constant(CONCATENATORS_STR), connector_indices), [num_sequences, num_steps]) masked_connector_strs = tf.where(step_mask_exclude_last, connector_strs, tf.fill(tf.shape(connector_strs), '')) stitched_features['raw_task'] = tf.strings.reduce_join( tf.strings.reduce_join(tf.concat([ tf.expand_dims(features['raw_task'], 2), tf.expand_dims(masked_connector_strs, 2) ], axis=2), axis=-1), -1) # Stitch screen sequences action_lengths = tf.reduce_sum( tf.to_int32( tf.greater(features['verb_refs'][:, :, 0, 1], features['verb_refs'][:, :, 0, 0])), -1) max_action_length = tf.reduce_max(action_lengths) def _pad(tensor, padding_value=0): shape_list = common_layers.shape_list(tensor) assert len(shape_list) >= 2 padding_list = [[0, 0], [0, 1]] + [[0, 0]] * (len(shape_list) - 2) return tf.pad(tensor[:, :max_action_length], padding_list, constant_values=padding_value) for key in features.keys(): if key.endswith('_refs'): features[key] = tf.squeeze(features[key], 2) ref_mask = tf.expand_dims( tf.to_int32( tf.not_equal(features[key][:, :, 0], features[key][:, :, 1])), 2) stitched_features[key] = tf.multiply((features[key] + ref_offsets), ref_mask, name='ref_mask') stitched_features[key] = _pad(stitched_features[key]) elif key in [ 'verbs', 'objects', 'consumed', 'obj_dom_pos', 'obj_text', 'obj_type', 'obj_clickable', 'obj_screen_pos', 'verb_refs', 'obj_refs', 'input_refs', 'obj_dom_dist' ]: features[key] = tf.squeeze(features[key], 2) stitched_features[key] = features[key] stitched_features[key] = _pad( stitched_features[key], padding_value=-1 if key == 'obj_type' else 0) elif key not in ['task', 'raw_task']: stitched_features[key] = features[key][:, 0] # Append eos to 'task' stitched_features['task'] = tf.pad(stitched_features['task'], [[0, 0], [0, 1]]) task_mask = tf.to_int32(tf.greater(stitched_features['task'], 1)) task_eos_mask = tf.pad(task_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1] stitched_features['task'] = stitched_features['task'] + (task_eos_mask - task_mask) # Append eos verb_mask = tf.to_int32(tf.greater(stitched_features['verbs'], 1)) verb_eos_mask = tf.pad(verb_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1] verb_eos = verb_eos_mask - verb_mask stitched_features['verbs'] = stitched_features['verbs'] + verb_eos # Append last step refs to 'verb_refs' task_lengths = tf.where(tf.equal(stitched_features['task'], 1))[:, 1] eos_pos = tf.to_int32(tf.stack([task_lengths, task_lengths + 1], axis=1)) action_mask = tf.to_int32( tf.sequence_mask(action_lengths, max_action_length + 1)) action_and_eos_mask = tf.pad(action_mask, [[0, 0], [1, 0]], constant_values=1)[:, :-1] verb_ref_eos = action_and_eos_mask - action_mask eos_refs = tf.multiply(tf.tile(tf.expand_dims(eos_pos, 1), [1, max_action_length + 1, 1]), tf.expand_dims(verb_ref_eos, 2), name='verb_ref_eos') stitched_features['verb_refs'] += eos_refs return stitched_features
def assertTensorsAlmostEqual(self, x, y, msg=None): abs_delta = tf.abs(tf.cast(x - y, tf.float32)) msg += "(max delta: {:.4E})".format(tf.reduce_max(abs_delta).numpy()) self.assertTrue( tf.reduce_all(abs_delta < eps), msg=msg)
def specgram_summaries(spec, name, hparams, rows=4, columns=4, image=True, phase=True, audio=True): """Post summaries of a specgram (Image and Audio). For image summaries, creates a rows x columns composite image from the batch. Also can create audio summaries for raw audio, but hparams.raw_audio must be True. Args: spec: Batch of spectrograms. name: String prepended to summaries. hparams: Hyperparamenters. rows: Int, number of rows in image. columns: Int, number of columns in image. image: Bool, create image summary. phase: Bool, create image summary from second channel in the batch. audio: Bool, create audio summaries for each spectrogram in the batch. """ batch_size, n_freq, n_time, unused_channels = spec.get_shape().as_list() # Must divide minibatch evenly b = min(batch_size, rows * columns) if hparams.raw_audio: spec = tf.squeeze(spec) spec /= tf.expand_dims(tf.reduce_max(spec, axis=1), axis=1) tf.summary.audio( name, tf.squeeze(spec), hparams.samples_per_second, max_outputs=b) else: if image: if b % columns != 0: rows = np.floor(np.sqrt(b)) columns = rows else: rows = b / columns tf.summary.image("Mag/%s" % name, form_image_grid(spec[:b, :, :, :1], [rows, columns], [n_freq, n_time], 1)) if phase: tf.summary.image("Phase/%s" % name, form_image_grid(spec[:b, :, :, 1:], [rows, columns], [n_freq, n_time], 1)) if audio: tf.summary.audio( name, tf_ispecgram( spec, n_fft=hparams.n_fft, hop_length=hparams.hop_length, mask=hparams.mask, log_mag=hparams.log_mag, pad=hparams.pad, re_im=hparams.re_im, dphase=hparams.dphase, mag_only=hparams.mag_only), hparams.samples_per_second, max_outputs=b)
def __init__(self, player, session, optimizer, store_replay_every=5, max_gradient=5, summary_writer=None): self.player = player self.session = session self.optimizer = optimizer self.summary_writer = summary_writer self.memory_buffer = MemoryBuffer(queue_length=REPLAY_QUEUE_SIZE) self.batch_size = BATCH_UPDATE_SIZE self.exploration = INITIAL_EPSILON self.init_exp = INITIAL_EPSILON self.final_exp = FINAL_EPSILON self.anneal_steps = ANNEAL_EPSILON self.discount_factor = DISCOUNT_RATE self.target_update_rate = TARGET_UPDATE_RATE self.max_gradient = max_gradient self.reg_param = REGULARIZATION_CONSTANT self.store_replay_every = store_replay_every self.store_experience_cnt = 0 self.train_iteration = 0 with tf.name_scope("predict_actions"): self.states = tf.placeholder(tf.float32, (None, BOARD_DIMENSIONS), name="states") with tf.variable_scope("q_network"): self.q_outputs = create_network(self.states, self.player) self.action_scores = tf.identity(self.q_outputs, name="action_scores") self.predicted_actions = tf.argmax(self.action_scores, dimension=1, name="predicted_actions") # Calculate rewards using next state: r(s_t,a_t) + argmax_a Q(s_{t+1}, a) with tf.name_scope("estimate_future_rewards"): self.next_states = tf.placeholder(tf.float32, (None, BOARD_DIMENSIONS), name="next_states") self.next_state_mask = tf.placeholder(tf.float32, (None,), name="next_state_masks") with tf.variable_scope("target_network"): self.target_outputs = create_network(self.next_states, self.player) self.next_action_scores = tf.stop_gradient(self.target_outputs) self.target_values = tf.reduce_max(self.next_action_scores, reduction_indices=[1, ]) * self.next_state_mask self.rewards = tf.placeholder(tf.float32, (None,), name="rewards") self.future_rewards = self.rewards + self.discount_factor * self.target_values with tf.name_scope("compute_temporal_differences"): self.action_mask = tf.placeholder(tf.float32, (None, TOTAL_ACTIONS), name="action_mask") self.masked_action_scores = tf.reduce_sum(self.action_scores * self.action_mask, reduction_indices=[1, ]) # Defining the mean-squared loss function self.temp_diff = self.masked_action_scores - self.future_rewards self.td_loss = tf.reduce_mean(tf.square(self.temp_diff)) # regularization loss learning_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="q_network") self.reg_loss = self.reg_param * tf.reduce_sum( [tf.reduce_sum(tf.square(x)) for x in learning_network_variables]) self.loss = self.td_loss + self.reg_loss gradients = self.optimizer.compute_gradients(self.loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, self.max_gradient), var) # add histograms for gradients. for grad, var in gradients: tf.summary.histogram(var.name, var) if grad is not None: tf.summary.histogram(var.name + '/gradients', grad) self.train_op = self.optimizer.apply_gradients(gradients) # update target network with Q-Learning network with tf.name_scope("update_target_network"): self.target_network_update = [] # slowly update target network parameters with Q network parameters learning_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="q_network") target_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="target_network") for v_source, v_target in zip(learning_network_variables, target_network_variables): # this is equivalent to target = (1-alpha) * target + alpha * source update_op = v_target.assign_sub(self.target_update_rate * (v_target - v_source)) self.target_network_update.append(update_op) self.target_network_update = tf.group(*self.target_network_update) self.summarize = tf.summary.merge_all() self.no_op = tf.no_op() tf.summary.histogram("action_scores", self.action_scores) tf.summary.histogram("next_action_scores", self.next_action_scores) var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self.session.run(tf.variables_initializer(var_lists)) self.session.run(tf.assert_variables_initialized()) if self.summary_writer is not None: self.summary_writer.add_graph(self.session.graph) self.summary_every = SUMMARY_PRINT_RATE
def debugprint(x, name=''): """Small wrapper for tf.Print which prints summary statistics.""" name += '\t' + x.name return tf.Print(x, [tf.reduce_min(x), tf.reduce_mean(x), tf.reduce_max(x)], name)
def _body(i, posterior, activation, center, masses): """Body of the EM while loop.""" del activation beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) # beta = final_beta # route: [outdim, height?, width?, batch, indim] vote_conf = posterior * input_activation # masses: [batch, 1, outdim, 1, height, width, 1, 1] masses = tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) + 0.0000001 preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] center = .9 * tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( preactivate_unrolled, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses + .1 * center noise = (wx - center) * (wx - center) variance = min_var + tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf * noise, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True) log_2pi = tf.log(2 * math.pi) win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0)) logit = beta * (win - activation_biases * 5000) activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) # return activation, center log_det_sigma = -1 * p_i sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = activation_update - sigma_update - exp_update max_prior_update = tf.reduce_max(tf.reduce_max(tf.reduce_max( tf.reduce_max(prior_update, axis=-1, keep_dims=True), axis=-2, keep_dims=True), axis=-3, keep_dims=True), axis=-4, keep_dims=True) prior_normal = tf.add(prior_update, -1 * max_prior_update) prior_exp = tf.exp(prior_normal) t_prior = tf.transpose(prior_exp, [0, 1, 2, 3, 4, 6, 5, 7]) c_prior = tf.reshape(t_prior, [-1, n * k, n * k, 1]) pad_prior = tf.pad(c_prior, [[0, 0], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [0, 0]], 'CONSTANT') patch_prior = tf.extract_image_patches(images=pad_prior, ksizes=[1, k, k, 1], strides=[1, k, k, 1], rates=[1, k - 1, k - 1, 1], padding='VALID') sum_prior = tf.reduce_sum(patch_prior, axis=-1, keep_dims=True) sum_prior_patch = tf.extract_image_patches(images=sum_prior, ksizes=[1, k, k, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='VALID') sum_prior_reshape = tf.reshape( sum_prior_patch, [-1, input_dim, output_dim, 1, n, n, k, k]) + 0.0000001 posterior = prior_exp / sum_prior_reshape return (posterior, logit, center, masses)
def _quantizable_concat(self, inputs, axis, is_training, is_quantized=True, default_min=0, default_max=6, ema_decay=0.999, scope='quantized_concat'): """Concat replacement with quantization option. Allows concat inputs to share the same min max ranges, from experimental/gazelle/synthetic/model/tpu/utils.py. Args: inputs: list of tensors to concatenate. axis: dimension along which to concatenate. is_training: true if the graph is a training graph. is_quantized: flag to enable/disable quantization. default_min: default min value for fake quant op. default_max: default max value for fake quant op. ema_decay: the moving average decay for the quantization variables. scope: Optional scope for variable_scope. Returns: Tensor resulting from concatenation of input tensors """ if is_quantized: with tf.variable_scope(scope): min_var = self._quant_var('min', default_min) max_var = self._quant_var('max', default_max) if not is_training: # If we are building an eval graph just use the values in the # variables. quant_inputs = [ tf.fake_quant_with_min_max_vars(t, min_var, max_var) for t in inputs ] else: concat_tensors = tf.concat(inputs, axis=axis) tf.logging.info( 'concat_tensors: {}'.format(concat_tensors)) # TFLite requires that 0.0 is always in the [min; max] range. range_min = tf.minimum(tf.reduce_min(concat_tensors), 0.0, name='SafeQuantRangeMin') range_max = tf.maximum(tf.reduce_max(concat_tensors), 0.0, name='SafeQuantRangeMax') # Otherwise we need to keep track of the moving averages of the min # and of the elements of the input tensor max. min_val = moving_averages.assign_moving_average( min_var, range_min, ema_decay, name='AssignMinEma') max_val = moving_averages.assign_moving_average( max_var, range_max, ema_decay, name='AssignMaxEma') quant_inputs = [ tf.fake_quant_with_min_max_vars(t, min_val, max_val) for t in inputs ] outputs = tf.concat(quant_inputs, axis=axis) else: outputs = tf.concat(inputs, axis=axis) return outputs
def model_fn(features, labels, mode, params=None): """Build model and optimizer.""" is_training = mode == tf.estimator.ModeKeys.TRAIN # Check training mode. if FLAGS.train_mode == 'pretrain': num_transforms = FLAGS.num_transforms if FLAGS.fine_tune_after_block > -1: raise ValueError( 'Does not support layer freezing during pretraining,' 'should set fine_tune_after_block<=-1 for safety.') elif FLAGS.train_mode == 'finetune': num_transforms = 1 else: raise ValueError('Unknown train_mode {}'.format(FLAGS.train_mode)) # Split channels, and optionally apply extra batched augmentation. features_list = tf.split(features, num_or_size_splits=num_transforms, axis=-1) if FLAGS.use_blur and is_training and FLAGS.train_mode == 'pretrain': features_list = data_util.batch_random_blur( features_list, FLAGS.image_size, FLAGS.image_size) features = tf.concat(features_list, 0) # (num_transforms * bsz, h, w, c) # Base network forward pass. with tf.variable_scope('base_model'): if FLAGS.train_mode == 'finetune' and FLAGS.fine_tune_after_block >= 4: # Finetune just supervised (linear) head will not update BN stats. model_train_mode = False else: # Pretrain or finetuen anything else will update BN stats. model_train_mode = is_training hiddens = model(features, is_training=model_train_mode) # Add head and loss. if FLAGS.train_mode == 'pretrain': tpu_context = params['context'] if 'context' in params else None hiddens_proj = model_util.projection_head(hiddens, is_training) loss_func = obj_lib.add_contrastive_loss if FLAGS.num_transforms > 2: loss_func = obj_lib.add_contrastive_loss_multi_aug contrast_loss, logits_con, labels_con = loss_func( hiddens_proj, hidden_norm=FLAGS.hidden_norm, temperature=FLAGS.temperature, tpu_context=tpu_context if is_training else None) logits_sup = tf.zeros([params['batch_size'], num_classes]) else: contrast_loss = tf.zeros([]) logits_con = tf.zeros([params['batch_size'], 10]) labels_con = tf.zeros([params['batch_size'], 10]) logits_sup = model_util.supervised_head(hiddens, num_classes, is_training) obj_lib.add_supervised_loss(labels=labels['labels'], logits=logits_sup, weights=labels['mask']) # Add weight decay to loss, for non-LARS optimizers. model_util.add_weight_decay(adjust_per_optimizer=True) loss = tf.losses.get_total_loss() if FLAGS.train_mode == 'pretrain': variables_to_train = tf.trainable_variables() else: collection_prefix = 'trainable_variables_inblock_' variables_to_train = [] for j in range(FLAGS.fine_tune_after_block + 1, 6): variables_to_train += tf.get_collection(collection_prefix + str(j)) assert variables_to_train, 'variables_to_train shouldn\'t be empty!' tf.logging.info( '===============Variables to train (begin)===============') tf.logging.info(variables_to_train) tf.logging.info( '================Variables to train (end)================') learning_rate = model_util.learning_rate_schedule( FLAGS.learning_rate, num_train_examples) if is_training: if FLAGS.train_summary_steps > 0: # Compute stats for the summary. prob_con = tf.nn.softmax(logits_con) entropy_con = -tf.reduce_mean( tf.reduce_sum(prob_con * tf.math.log(prob_con + 1e-8), -1)) summary_writer = tf2.summary.create_file_writer( FLAGS.model_dir) # TODO(iamtingchen): remove this control_dependencies in the future. with tf.control_dependencies([summary_writer.init()]): with summary_writer.as_default(): should_record = tf.math.equal( tf.math.floormod(tf.train.get_global_step(), FLAGS.train_summary_steps), 0) with tf2.summary.record_if(should_record): contrast_acc = tf.equal( tf.argmax(labels_con, 1), tf.argmax(logits_con, axis=1)) contrast_acc = tf.reduce_mean( tf.cast(contrast_acc, tf.float32)) label_acc = tf.equal( tf.argmax(labels['labels'], 1), tf.argmax(logits_sup, axis=1)) label_acc = tf.reduce_mean( tf.cast(label_acc, tf.float32)) tf2.summary.scalar('train_contrast_loss', contrast_loss, step=tf.train.get_global_step()) tf2.summary.scalar('train_contrast_acc', contrast_acc, step=tf.train.get_global_step()) tf2.summary.scalar('train_label_accuracy', label_acc, step=tf.train.get_global_step()) tf2.summary.scalar('contrast_entropy', entropy_con, step=tf.train.get_global_step()) tf2.summary.scalar('learning_rate', learning_rate, step=tf.train.get_global_step()) tf2.summary.scalar('input_mean', tf.reduce_mean(features), step=tf.train.get_global_step()) tf2.summary.scalar('input_max', tf.reduce_max(features), step=tf.train.get_global_step()) tf2.summary.scalar('input_min', tf.reduce_min(features), step=tf.train.get_global_step()) tf2.summary.scalar('num_labels', tf.reduce_mean( tf.reduce_sum( labels['labels'], -1)), step=tf.train.get_global_step()) if FLAGS.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum, use_nesterov=True) elif FLAGS.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif FLAGS.optimizer == 'lars': optimizer = LARSOptimizer( learning_rate, momentum=FLAGS.momentum, weight_decay=FLAGS.weight_decay, exclude_from_weight_decay=['batch_normalization', 'bias']) else: raise ValueError('Unknown optimizer {}'.format( FLAGS.optimizer)) if FLAGS.use_tpu: optimizer = tf.tpu.CrossShardOptimizer(optimizer) control_deps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if FLAGS.train_summary_steps > 0: control_deps.extend(tf.summary.all_v2_summary_ops()) with tf.control_dependencies(control_deps): train_op = optimizer.minimize( loss, global_step=tf.train.get_or_create_global_step(), var_list=variables_to_train) if FLAGS.checkpoint: def scaffold_fn(): """Scaffold function to restore non-logits vars from checkpoint.""" tf.train.init_from_checkpoint( FLAGS.checkpoint, { v.op.name: v.op.name for v in tf.global_variables(FLAGS.variable_schema) }) if FLAGS.zero_init_logits_layer: # Init op that initializes output layer parameters to zeros. output_layer_parameters = [ var for var in tf.trainable_variables() if var.name.startswith('head_supervised') ] tf.logging.info( 'Initializing output layer parameters %s to zero', [x.op.name for x in output_layer_parameters]) with tf.control_dependencies( [tf.global_variables_initializer()]): init_op = tf.group([ tf.assign(x, tf.zeros_like(x)) for x in output_layer_parameters ]) return tf.train.Scaffold(init_op=init_op) else: return tf.train.Scaffold() else: scaffold_fn = None return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, train_op=train_op, loss=loss, scaffold_fn=scaffold_fn) else: def metric_fn(logits_sup, labels_sup, logits_con, labels_con, mask, **kws): """Inner metric function.""" metrics = { k: tf.metrics.mean(v, weights=mask) for k, v in kws.items() } metrics['label_top_1_accuracy'] = tf.metrics.accuracy( tf.argmax(labels_sup, 1), tf.argmax(logits_sup, axis=1), weights=mask) metrics['label_top_5_accuracy'] = tf.metrics.recall_at_k( tf.argmax(labels_sup, 1), logits_sup, k=5, weights=mask) metrics['contrastive_top_1_accuracy'] = tf.metrics.accuracy( tf.argmax(labels_con, 1), tf.argmax(logits_con, axis=1), weights=mask) metrics['contrastive_top_5_accuracy'] = tf.metrics.recall_at_k( tf.argmax(labels_con, 1), logits_con, k=5, weights=mask) return metrics metrics = { 'logits_sup': logits_sup, 'labels_sup': labels['labels'], 'logits_con': logits_con, 'labels_con': labels_con, 'mask': labels['mask'], 'contrast_loss': tf.fill((params['batch_size'], ), contrast_loss), 'regularization_loss': tf.fill((params['batch_size'], ), tf.losses.get_regularization_loss()), } return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, loss=loss, eval_metrics=(metric_fn, metrics), scaffold_fn=None)
def main(_): game = pyspiel.load_game(FLAGS.game) # Information state length info_state_shape = game.observation_tensor_shape() flat_info_state_length = np.prod(info_state_shape) # Output num_actions = game.num_distinct_actions() with tf.Session() as sess: net_input = tf.placeholder(tf.float32, [None, flat_info_state_length], name="input") # pylint: disable=unused-variable output = tf.placeholder(tf.float32, [None, num_actions], name="output") legals_mask = tf.placeholder(tf.float32, [None, num_actions], name="legals_mask") policy_net = tf.layers.dense(net_input, 128, activation=tf.nn.relu) policy_net = tf.layers.dense(policy_net, 128, activation=tf.nn.relu) policy_net = tf.layers.dense(policy_net, num_actions) # Note: subtracting the max here is to help with numerical stability. # However, there can still be numerical problems. If you are doing a softmax # here, it can return NaN when the max for the policy net is high on one of # the illegal actions, because policy_net - max will be small for legal # actions, giving all exp(small) == 0 in the denominator, returning NaN at # the end. One fix is to set the logits to -inf and define a custom cross # entropy op that ignores over the illegal actions. policy_net = policy_net - tf.reduce_max( policy_net, axis=-1, keepdims=True) masked_exp_logit = tf.multiply(tf.exp(policy_net), legals_mask) renormalizing_factor = tf.reduce_sum(masked_exp_logit, axis=-1, keepdims=True) # pylint: disable=unused-variable policy_softmax = tf.where(tf.equal(legals_mask, 0.), tf.zeros_like(masked_exp_logit), tf.divide(masked_exp_logit, renormalizing_factor), name="policy_softmax") policy_targets = tf.placeholder(shape=[None, num_actions], dtype=tf.float32) policy_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=policy_net, labels=policy_targets), axis=0) # We make one sample. sampled_actions = tf.random.categorical(tf.log(policy_softmax), 1, name="sampled_actions") # pylint: disable=unused-variable optimizer = tf.train.AdamOptimizer(0.0001).minimize(policy_cost, name="train") # pylint: disable=unused-variable init = tf.variables_initializer(tf.global_variables(), name="init_all_vars_op") print("Writing file: {}/{}".format(FLAGS.dir, FLAGS.filename)) tf.train.write_graph(sess.graph_def, FLAGS.dir, FLAGS.filename, as_text=False)
def crop_proposal(): rand_vec = lambda minval, maxval: tf.random_uniform(shape=( ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval, dtype=tf.float32) width, height = rand_vec(0.3, 1), rand_vec(0.3, 1) left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height) right = left + width bottom = top + height ltrb = tf.concat([left, top, right, bottom], axis=1) min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] ious = calc_iou_tensor(ltrb, boxes) # discard any bboxes whose center not in the cropped image xc, yc = [ tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :], (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2) ] masks = tf.reduce_all(tf.stack([ tf.greater(xc, tf.tile(left, (1, num_boxes))), tf.less(xc, tf.tile(right, (1, num_boxes))), tf.greater(yc, tf.tile(top, (1, num_boxes))), tf.less(yc, tf.tile(bottom, (1, num_boxes))), ], axis=2), axis=2) # Checks of whether a crop is valid. valid_aspect = tf.logical_and(tf.less(height / width, 2), tf.less(width / height, 2)) valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True) valid_masks = tf.reduce_any(masks, axis=1, keepdims=True) valid_all = tf.cast( tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32) # One indexed, as zero is needed for the case of no matches. index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32) # Either one-hot, or zeros if there is no valid crop. selection = tf.equal(tf.reduce_max(index * valid_all), index) use_crop = tf.reduce_any(selection) output_ltrb = tf.reduce_sum(tf.multiply( ltrb, tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0) output_masks = tf.reduce_any(tf.logical_and( masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))), axis=0) return use_crop, output_ltrb, output_masks
def __init__( self, *, scope, ob_space, ac_space, stochpol_fn, nsteps, nepochs=4, nminibatches=1, gamma=0.99, gamma_ext=0.99, lam=0.95, ent_coef=0, cliprange=0.2, max_grad_norm=1.0, vf_coef=1.0, lr=30e-5, adam_hps=None, testing=False, comm=None, comm_train=None, use_news=False, update_ob_stats_every_step=True, int_coeff=None, ext_coeff=None, obs_save_flag=False, ): self.lr = lr self.ext_coeff = ext_coeff self.int_coeff = int_coeff self.use_news = use_news self.update_ob_stats_every_step = update_ob_stats_every_step self.abs_scope = (tf.get_variable_scope().name + '/' + scope).lstrip('/') self.testing = testing self.comm_log = MPI.COMM_SELF if comm is not None and comm.Get_size() > 1: self.comm_log = comm assert not testing or comm.Get_rank( ) != 0, "Worker number zero can't be testing" if comm_train is not None: self.comm_train, self.comm_train_size = comm_train, comm_train.Get_size( ) else: self.comm_train, self.comm_train_size = self.comm_log, self.comm_log.Get_size( ) self.is_log_leader = self.comm_log.Get_rank() == 0 self.is_train_leader = self.comm_train.Get_rank() == 0 self.obs_save_flag = obs_save_flag if self.is_log_leader: self.obs_rec = [{'acs': [], 'obs': []} for i in range(100)] with tf.variable_scope(scope): self.best_ret = -np.inf self.local_best_ret = -np.inf self.rooms = [] self.local_rooms = [] self.scores = [] self.ob_space = ob_space self.ac_space = ac_space self.stochpol = stochpol_fn() self.nepochs = nepochs self.cliprange = cliprange self.nsteps = nsteps self.nminibatches = nminibatches self.gamma = gamma self.gamma_ext = gamma_ext self.lam = lam self.adam_hps = adam_hps or dict() self.ph_adv = tf.placeholder(tf.float32, [None, None]) self.ph_ret_int = tf.placeholder(tf.float32, [None, None]) self.ph_ret_ext = tf.placeholder(tf.float32, [None, None]) self.ph_oldnlp = tf.placeholder(tf.float32, [None, None]) self.ph_oldvpred = tf.placeholder(tf.float32, [None, None]) self.ph_lr = tf.placeholder(tf.float32, []) self.ph_lr_pred = tf.placeholder(tf.float32, []) self.ph_cliprange = tf.placeholder(tf.float32, []) #Define loss. neglogpac = self.stochpol.pd_opt.neglogp(self.stochpol.ph_ac) entropy = tf.reduce_mean(self.stochpol.pd_opt.entropy()) vf_loss_int = (0.5 * vf_coef) * tf.reduce_mean( tf.square(self.stochpol.vpred_int_opt - self.ph_ret_int)) vf_loss_ext = (0.5 * vf_coef) * tf.reduce_mean( tf.square(self.stochpol.vpred_ext_opt - self.ph_ret_ext)) vf_loss = vf_loss_int + vf_loss_ext ratio = tf.exp(self.ph_oldnlp - neglogpac) # p_new / p_old negadv = -self.ph_adv pg_losses1 = negadv * ratio pg_losses2 = negadv * tf.clip_by_value( ratio, 1.0 - self.ph_cliprange, 1.0 + self.ph_cliprange) pg_loss = tf.reduce_mean(tf.maximum(pg_losses1, pg_losses2)) ent_loss = (-ent_coef) * entropy approxkl = .5 * tf.reduce_mean( tf.square(neglogpac - self.ph_oldnlp)) maxkl = .5 * tf.reduce_max(tf.square(neglogpac - self.ph_oldnlp)) clipfrac = tf.reduce_mean( tf.to_float(tf.greater(tf.abs(ratio - 1.0), self.ph_cliprange))) loss = pg_loss + ent_loss + vf_loss + self.stochpol.aux_loss #Create optimizer. params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.abs_scope) logger.info("PPO: using MpiAdamOptimizer connected to %i peers" % self.comm_train_size) trainer = MpiAdamOptimizer(self.comm_train, learning_rate=self.ph_lr, **self.adam_hps) grads_and_vars = trainer.compute_gradients(loss, params) grads, vars = zip(*grads_and_vars) if max_grad_norm: _, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) global_grad_norm = tf.global_norm(grads) grads_and_vars = list(zip(grads, vars)) self._train = trainer.apply_gradients(grads_and_vars) #Quantities for reporting. self._losses = [ loss, pg_loss, vf_loss, entropy, clipfrac, approxkl, maxkl, self.stochpol.aux_loss, self.stochpol.feat_var, self.stochpol.max_feat, global_grad_norm ] self.loss_names = [ 'tot', 'pg', 'vf', 'ent', 'clipfrac', 'approxkl', 'maxkl', "auxloss", "featvar", "maxfeat", "gradnorm" ] self.I = None self.disable_policy_update = None allvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.abs_scope) if self.is_log_leader: tf_util.display_var_info(allvars) tf.get_default_session().run(tf.variables_initializer(allvars)) sync_from_root(tf.get_default_session(), allvars) #Syncs initialization across mpi workers. self.t0 = time.time() self.global_tcount = 0
def softmax(x, axis=-1): x = x - tf.reduce_max(x, axis=axis, keepdims=True) ex = tf.exp(x) return ex / tf.reduce_sum(ex, axis=axis, keepdims=True)