def compute_grid_positions(boxes, boundaries, output_size, sample_offset): """Compute the grid position w.r.t. the corresponding feature map. Args: boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the information of each box w.r.t. the corresponding feature map. boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) in terms of the number of pixels of the corresponding feature map size. boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing the boundary (in (y, x)) of the corresponding feature map for each box. Any resampled grid points that go beyond the bounary will be clipped. output_size: a scalar indicating the output crop size. sample_offset: a float number in [0, 1] indicates the subpixel sample offset from grid point. Returns: kernel_y: Tensor of size [batch_size, boxes, output_size, 2, 1]. kernel_x: Tensor of size [batch_size, boxes, output_size, 2, 1]. box_grid_y0y1: Tensor of size [batch_size, boxes, output_size, 2] box_grid_x0x1: Tensor of size [batch_size, boxes, output_size, 2] """ batch_size, num_boxes, _ = boxes.get_shape().as_list() box_grid_x = [] box_grid_y = [] for i in range(output_size): box_grid_x.append(boxes[:, :, 1] + (i + sample_offset) * boxes[:, :, 3] / output_size) box_grid_y.append(boxes[:, :, 0] + (i + sample_offset) * boxes[:, :, 2] / output_size) box_grid_x = tf.stack(box_grid_x, axis=2) box_grid_y = tf.stack(box_grid_y, axis=2) box_grid_y0 = tf.floor(box_grid_y) box_grid_x0 = tf.floor(box_grid_x) box_grid_x0 = tf.maximum(0., box_grid_x0) box_grid_y0 = tf.maximum(0., box_grid_y0) box_grid_x0 = tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)) box_grid_x1 = tf.minimum(box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1)) box_grid_y0 = tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)) box_grid_y1 = tf.minimum(box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1)) box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1) box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1) # The RoIAlign feature f can be computed by bilinear interpolation of four # neighboring feature points f0, f1, f2, and f3. # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T # [f10, f11]] # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11 # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11 ly = box_grid_y - box_grid_y0 lx = box_grid_x - box_grid_x0 hy = 1.0 - ly hx = 1.0 - lx kernel_y = tf.reshape(tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1]) kernel_x = tf.reshape(tf.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1]) return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1
def lrelu(x, leak=0.2, name="lrelu"): return tf.maximum(x, leak * x)
def train_dvrl(self, perf_metric): """Trains DVRL based on the specified objective function. Args: perf_metric: 'auc', 'accuracy', 'log-loss' for classification 'mae', 'mse', 'rmspe' for regression """ # Generates selected probability est_data_value = self.data_value_evaluator() # Generator loss (REINFORCE algorithm) prob = tf.reduce_sum(self.s_input * tf.log(est_data_value + self.epsilon) +\ (1-self.s_input) * \ tf.log(1 - est_data_value + self.epsilon)) dve_loss = (-self.reward_input * prob) + \ 1e3 * (tf.maximum(tf.reduce_mean(est_data_value) \ - self.threshold, 0) + \ tf.maximum((1-self.threshold) - \ tf.reduce_mean(est_data_value), 0)) # Variable dve_vars = [v for v in tf.trainable_variables() \ if v.name.startswith('data_value_estimator')] # Solver dve_solver = tf.train.AdamOptimizer(self.learning_rate).minimize( dve_loss, var_list=dve_vars) # Baseline performance if self.flag_sgd: y_valid_hat = self.ori_model.predict(self.x_valid) else: if self.problem == 'classification': y_valid_hat = self.ori_model.predict_proba(self.x_valid) elif self.problem == 'regression': y_valid_hat = self.ori_model.predict(self.x_valid) if perf_metric == 'auc': valid_perf = metrics.roc_auc_score(self.y_valid, y_valid_hat[:, 1]) elif perf_metric == 'accuracy': valid_perf = metrics.accuracy_score(self.y_valid, np.argmax(y_valid_hat, axis=1)) elif perf_metric == 'log_loss': valid_perf = -metrics.log_loss(self.y_valid, y_valid_hat) elif perf_metric == 'rmspe': valid_perf = dvrl_metrics.rmspe(self.y_valid, y_valid_hat) elif perf_metric == 'mae': valid_perf = metrics.mean_absolute_error(self.y_valid, y_valid_hat) elif perf_metric == 'mse': valid_perf = metrics.mean_squared_error(self.y_valid, y_valid_hat) # Prediction differences if self.flag_sgd: y_train_valid_pred = self.val_model.predict(self.x_train) else: if self.problem == 'classification': y_train_valid_pred = self.val_model.predict_proba(self.x_train) elif self.problem == 'regression': y_train_valid_pred = self.val_model.predict(self.x_train) y_train_valid_pred = np.reshape(y_train_valid_pred, [-1, 1]) if self.problem == 'classification': y_pred_diff = np.abs(self.y_train_onehot - y_train_valid_pred) elif self.problem == 'regression': y_pred_diff = \ np.abs(self.y_train_onehot - y_train_valid_pred)/self.y_train_onehot # Main session sess = tf.Session() sess.run(tf.global_variables_initializer()) # Model save at the end saver = tf.train.Saver(dve_vars) for _ in tqdm.tqdm(range(self.outer_iterations)): # Batch selection batch_idx = \ np.random.permutation(len(self.x_train[:, 0]))[:self.batch_size] x_batch = self.x_train[batch_idx, :] y_batch_onehot = self.y_train_onehot[batch_idx] y_batch = self.y_train[batch_idx] y_hat_batch = y_pred_diff[batch_idx] # Generates selection probability est_dv_curr = sess.run(est_data_value, feed_dict={ self.x_input: x_batch, self.y_input: y_batch_onehot, self.y_hat_input: y_hat_batch }) # Samples the selection probability sel_prob_curr = np.random.binomial(1, est_dv_curr, est_dv_curr.shape) # Exception (When selection probability is 0) if np.sum(sel_prob_curr) == 0: est_dv_curr = 0.5 * np.ones(np.shape(est_dv_curr)) sel_prob_curr = np.random.binomial(1, est_dv_curr, est_dv_curr.shape) # Trains predictor # If the predictor is neural network if 'summary' in dir(self.pred_model): new_model = self.pred_model new_model.load_weights('tmp/pred_model.h5') # Train the model new_model.fit(x_batch, y_batch_onehot, sample_weight=sel_prob_curr[:, 0], batch_size=self.batch_size_predictor, epochs=self.inner_iterations, verbose=False) y_valid_hat = new_model.predict(self.x_valid) else: new_model = self.pred_model new_model.fit(x_batch, y_batch, sel_prob_curr[:, 0]) # Prediction if 'summary' in dir(new_model): y_valid_hat = new_model.predict(self.x_valid) else: if self.problem == 'classification': y_valid_hat = new_model.predict_proba(self.x_valid) elif self.problem == 'regression': y_valid_hat = new_model.predict(self.x_valid) # Reward computation if perf_metric == 'auc': dvrl_perf = metrics.roc_auc_score(self.y_valid, y_valid_hat[:, 1]) elif perf_metric == 'accuracy': dvrl_perf = metrics.accuracy_score( self.y_valid, np.argmax(y_valid_hat, axis=1)) elif perf_metric == 'log_loss': dvrl_perf = -metrics.log_loss(self.y_valid, y_valid_hat) elif perf_metric == 'rmspe': dvrl_perf = dvrl_metrics.rmspe(self.y_valid, y_valid_hat) elif perf_metric == 'mae': dvrl_perf = metrics.mean_absolute_error( self.y_valid, y_valid_hat) elif perf_metric == 'mse': dvrl_perf = metrics.mean_squared_error(self.y_valid, y_valid_hat) if self.problem == 'classification': reward_curr = dvrl_perf - valid_perf elif self.problem == 'regression': reward_curr = valid_perf - dvrl_perf # Trains the generator _, _ = sess.run( [dve_solver, dve_loss], feed_dict={ self.x_input: x_batch, self.y_input: y_batch_onehot, self.y_hat_input: y_hat_batch, self.s_input: sel_prob_curr, self.reward_input: reward_curr }) # Saves trained model saver.save(sess, self.checkpoint_file_name) # Trains DVRL predictor # Generate data values final_data_value = sess.run(est_data_value, feed_dict={ self.x_input: self.x_train, self.y_input: self.y_train_onehot, self.y_hat_input: y_pred_diff })[:, 0] # Trains final model # If the final model is neural network if 'summary' in dir(self.pred_model): self.final_model.load_weights('tmp/pred_model.h5') # Train the model self.final_model.fit(self.x_train, self.y_train_onehot, sample_weight=final_data_value, batch_size=self.batch_size_predictor, epochs=self.inner_iterations, verbose=False) else: self.final_model.fit(self.x_train, self.y_train, final_data_value)
def _leaky_relu(x): return tf.maximum(0.2 * x, x)
def _update_critic_td3(self, obs, action, next_obs, reward, mask): """Updates parameters of td3 critic given samples from the batch. Args: obs: A tfe.Variable with a batch of observations. action: A tfe.Variable with a batch of actions. next_obs: A tfe.Variable with a batch of next observations. reward: A tfe.Variable with a batch of rewards. mask: A tfe.Variable with a batch of masks. """ # Avoid using tensorflow random functions since it's impossible to get # the state of the random number generator used by TensorFlow. target_action_noise = np.random.normal( size=action.get_shape(), scale=self.policy_noise).astype('float32') target_action_noise = contrib_eager_python_tfe.Variable( target_action_noise) target_action_noise = tf.clip_by_value(target_action_noise, -self.policy_noise_clip, self.policy_noise_clip) noisy_action_targets = self.actor_target( next_obs) + target_action_noise clipped_noisy_action_targets = tf.clip_by_value( noisy_action_targets, -1, 1) if self.use_absorbing_state: # Starting from the goal state we can execute only non-actions. a_mask = tf.maximum(0, mask) q_next1, q_next2 = self.critic_target( next_obs, clipped_noisy_action_targets * a_mask) q_next = tf.reduce_min(tf.concat([q_next1, q_next2], -1), -1, keepdims=True) q_target = reward + self.discount * q_next else: q_next1, q_next2 = self.critic_target( next_obs, clipped_noisy_action_targets) q_next = tf.reduce_min(tf.concat([q_next1, q_next2], -1), -1, keepdims=True) q_target = reward + self.discount * mask * q_next with tf.GradientTape() as tape: q_pred1, q_pred2 = self.critic(obs, action) critic_loss = tf.losses.mean_squared_error( q_target, q_pred1) + tf.losses.mean_squared_error( q_target, q_pred2) grads = tape.gradient(critic_loss, self.critic.variables) self.critic_optimizer.apply_gradients(zip(grads, self.critic.variables), global_step=self.critic_step) if self.use_absorbing_state: with contrib_summary.record_summaries_every_n_global_steps( 100, self.critic_step): a_mask = tf.maximum(0, -mask) if tf.reduce_sum(a_mask).numpy() > 0: contrib_summary.scalar('critic/absorbing_reward', tf.reduce_sum(reward * a_mask) / tf.reduce_sum(a_mask), step=self.critic_step) with contrib_summary.record_summaries_every_n_global_steps( 100, self.critic_step): contrib_summary.scalar('critic/loss', critic_loss, step=self.critic_step)
def leaky_relu(input_, **kwargs): if input_.dtype in [tf.complex64, tf.complex128]: raise TypeError('leaky-relu currently does not support complex input') leak = kwargs.get('leak', 0.1) return tf.maximum(input_, input_ * leak, name='lrelu')
def mask(config: configure_pretraining.PretrainingConfig, inputs: pretrain_data.Inputs, mask_prob, proposal_distribution=1.0, disallow_from_mask=None, already_masked=None): """Implementation of dynamic masking. The optional arguments aren't needed for BERT/ELECTRA and are from early experiments in "strategically" masking out tokens instead of uniformly at random. Args: config: configure_pretraining.PretrainingConfig inputs: pretrain_data.Inputs containing input input_ids/input_mask mask_prob: percent of tokens to mask proposal_distribution: for non-uniform masking can be a [B, L] tensor of scores for masking each position. disallow_from_mask: a boolean tensor of [B, L] of positions that should not be masked out already_masked: a boolean tensor of [B, N] of already masked-out tokens for multiple rounds of masking Returns: a pretrain_data.Inputs with masking added """ # Get the batch size, sequence length, and max masked-out tokens N = config.max_predictions_per_seq B, L = modeling.get_shape_list(inputs.input_ids) # Find indices where masking out a token is allowed vocab = tokenization.FullTokenizer( config.vocab_file, config.model_sentencepiece_path, do_lower_case=config.do_lower_case).vocab candidates_mask = _get_candidates_mask(inputs, vocab, disallow_from_mask) # Set the number of tokens to mask out per example num_tokens = tf.cast(tf.reduce_sum(inputs.input_mask, -1), tf.float32) num_to_predict = tf.maximum(1, tf.minimum( N, tf.cast(tf.round(num_tokens * mask_prob), tf.int32))) masked_lm_weights = tf.cast( tf.sequence_mask(num_to_predict, N), tf.float32) if already_masked is not None: masked_lm_weights *= (1 - already_masked) # Get a probability of masking each position in the sequence candidate_mask_float = tf.cast(candidates_mask, tf.float32) sample_prob = (proposal_distribution * candidate_mask_float) sample_prob /= tf.reduce_sum(sample_prob, axis=-1, keepdims=True) # Sample the positions to mask out sample_prob = tf.stop_gradient(sample_prob) sample_logits = tf.log(sample_prob) masked_lm_positions = tf.random.categorical( sample_logits, N, dtype=tf.int32) masked_lm_positions *= tf.cast(masked_lm_weights, tf.int32) # Get the ids of the masked-out tokens shift = tf.expand_dims(L * tf.range(B), -1) flat_positions = tf.reshape(masked_lm_positions + shift, [-1, 1]) masked_lm_ids = tf.gather_nd(tf.reshape(inputs.input_ids, [-1]), flat_positions) masked_lm_ids = tf.reshape(masked_lm_ids, [B, -1]) masked_lm_ids *= tf.cast(masked_lm_weights, tf.int32) # Update the input ids replace_with_mask_positions = masked_lm_positions * tf.cast( tf.less(tf.random.uniform([B, N]), 0.85), tf.int32) inputs_ids, _ = scatter_update( inputs.input_ids, tf.fill([B, N], vocab["[MASK]"]), replace_with_mask_positions) return pretrain_data.get_updated_inputs( inputs, input_ids=tf.stop_gradient(inputs_ids), masked_lm_positions=masked_lm_positions, masked_lm_ids=masked_lm_ids, masked_lm_weights=masked_lm_weights )
def compute_mel_filterbank_features(waveforms, sample_rate=16000, dither=1.0 / np.iinfo(np.int16).max, preemphasis=0.97, frame_length=25, frame_step=10, fft_length=None, window_fn=functools.partial( tf.signal.hann_window, periodic=True), lower_edge_hertz=80.0, upper_edge_hertz=7600.0, num_mel_bins=80, log_noise_floor=1e-3, apply_mask=True): """Implement mel-filterbank extraction using tf ops. Args: waveforms: float32 tensor with shape [batch_size, max_len] sample_rate: sampling rate of the waveform dither: stddev of Gaussian noise added to waveform to prevent quantization artefacts preemphasis: waveform high-pass filtering constant frame_length: frame length in ms frame_step: frame_Step in ms fft_length: number of fft bins window_fn: windowing function lower_edge_hertz: lowest frequency of the filterbank upper_edge_hertz: highest frequency of the filterbank num_mel_bins: filterbank size log_noise_floor: clip small values to prevent numeric overflow in log apply_mask: When working on a batch of samples, set padding frames to zero Returns: filterbanks: a float32 tensor with shape [batch_size, len, num_bins, 1] """ # `stfts` is a complex64 Tensor representing the short-time Fourier # Transform of each signal in `signals`. Its shape is # [batch_size, ?, fft_unique_bins] # where fft_unique_bins = fft_length // 2 + 1 # Find the wave length: the largest index for which the value is !=0 # note that waveforms samples that are exactly 0.0 are quite common, so # simply doing sum(waveforms != 0, axis=-1) will not work correctly. wav_lens = tf.reduce_max( tf.expand_dims(tf.range(tf.shape(waveforms)[1]), 0) * tf.to_int32(tf.not_equal(waveforms, 0.0)), axis=-1) + 1 if dither > 0: waveforms += tf.random_normal(tf.shape(waveforms), stddev=dither) if preemphasis > 0: waveforms = waveforms[:, 1:] - preemphasis * waveforms[:, :-1] wav_lens -= 1 frame_length = int(frame_length * sample_rate / 1e3) frame_step = int(frame_step * sample_rate / 1e3) if fft_length is None: fft_length = int(2**(np.ceil(np.log2(frame_length)))) stfts = tf.signal.stft(waveforms, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length, window_fn=window_fn, pad_end=True) stft_lens = (wav_lens + (frame_step - 1)) // frame_step masks = tf.to_float( tf.less_equal(tf.expand_dims(tf.range(tf.shape(stfts)[1]), 0), tf.expand_dims(stft_lens, 1))) # An energy spectrogram is the magnitude of the complex-valued STFT. # A float32 Tensor of shape [batch_size, ?, 257]. magnitude_spectrograms = tf.abs(stfts) # Warp the linear-scale, magnitude spectrograms into the mel-scale. num_spectrogram_bins = magnitude_spectrograms.shape[-1].value linear_to_mel_weight_matrix = (tf.signal.linear_to_mel_weight_matrix( num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, upper_edge_hertz)) mel_spectrograms = tf.tensordot(magnitude_spectrograms, linear_to_mel_weight_matrix, 1) # Note: Shape inference for tensordot does not currently handle this case. mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( linear_to_mel_weight_matrix.shape[-1:])) log_mel_sgram = tf.log(tf.maximum(log_noise_floor, mel_spectrograms)) if apply_mask: log_mel_sgram *= tf.expand_dims(tf.to_float(masks), -1) return tf.expand_dims(log_mel_sgram, -1, name="mel_sgrams")
def make_graph(ops, op_types, interpreter): height = 144 width = 256 tensors = {} input_details = interpreter.get_input_details() # output_details = interpreter.get_output_details() print(input_details) for input_detail in input_details: tensors[input_detail['index']] = tf.placeholder( dtype=input_detail['dtype'], shape=input_detail['shape'], name=input_detail['name']) for op in ops: print('@@@@@@@@@@@@@@ op:', op) op_type = op_types[op['opcode_index']] if op_type == 'CONV_2D': input_tensor = tensors[op['inputs'][0]] weights = tensors[op['inputs'][1]].transpose(1, 2, 3, 0) bias = tensors[op['inputs'][2]] output_detail = interpreter._get_tensor_details(op['outputs'][0]) options = op['builtin_options'] output_tensor = tf.nn.conv2d( input_tensor, weights, strides=[1, options['stride_h'], options['stride_w'], 1], padding=options['padding'], dilations=[ 1, options['dilation_h_factor'], options['dilation_w_factor'], 1 ], name=output_detail['name'] + '/conv2d') output_tensor = tf.add(output_tensor, bias, name=output_detail['name']) if output_detail['name'].split('/')[-1] == 'Relu6': output_tensor = tf.nn.relu6(output_tensor) tensors[output_detail['index']] = output_tensor elif op_type == 'DEPTHWISE_CONV_2D': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] weights = tensors[op['inputs'][1]].transpose(1, 2, 3, 0) bias = tensors[op['inputs'][2]] options = op['builtin_options'] output_tensor = tf.nn.depthwise_conv2d( input_tensor, weights, strides=[1, options['stride_h'], options['stride_w'], 1], padding=options['padding'], # dilations=[1, options['dilation_h_factor'], options['dilation_w_factor'], 1], name=output_detail['name'] + '/depthwise_conv2d') output_tensor = tf.add(output_tensor, bias, name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'MAX_POOL_2D': input_tensor = tensors[op['inputs'][0]] output_detail = interpreter._get_tensor_details(op['outputs'][0]) options = op['builtin_options'] output_tensor = tf.nn.max_pool( input_tensor, ksize=[ 1, options['filter_height'], options['filter_width'], 1 ], strides=[1, options['stride_h'], options['stride_w'], 1], padding=options['padding'], name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'PAD': input_tensor = tensors[op['inputs'][0]] output_detail = interpreter._get_tensor_details(op['outputs'][0]) paddings_detail = interpreter._get_tensor_details(op['inputs'][1]) paddings_array = interpreter.get_tensor(paddings_detail['index']) paddings = tf.Variable(paddings_array, name=paddings_detail['name']) output_tensor = tf.pad(input_tensor, paddings, name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'RELU': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] output_tensor = tf.nn.relu(input_tensor, name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'PRELU': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] alpha_detail = interpreter._get_tensor_details(op['inputs'][1]) alpha_array = interpreter.get_tensor(alpha_detail['index']) with tf.variable_scope(name_or_scope=output_detail['name']): alphas = tf.Variable(alpha_array, name=alpha_detail['name']) output_tensor = tf.maximum(alphas * input_tensor, input_tensor) tensors[output_detail['index']] = output_tensor elif op_type == 'RELU6': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] output_tensor = tf.nn.relu6(input_tensor, name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'RESHAPE': input_tensor = tensors[op['inputs'][0]] output_detail = interpreter._get_tensor_details(op['outputs'][0]) options = op['builtin_options'] output_tensor = tf.reshape(input_tensor, options['new_shape'], name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'ADD': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor_0 = tensors[op['inputs'][0]] try: input_tensor_1 = tensors[op['inputs'][1]] except: param = interpreter._get_tensor_details(op['inputs'][1]) input_tensor_1 = interpreter.get_tensor(param['index']) output_tensor = tf.add(input_tensor_0, input_tensor_1, name=output_detail['name']) if output_detail['name'].split('/')[-1] == 'Relu6': output_tensor = tf.nn.relu6(output_tensor) tensors[output_detail['index']] = output_tensor elif op_type == 'CONCATENATION': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor_0 = tensors[op['inputs'][0]] input_tensor_1 = tensors[op['inputs'][1]] try: input_tensor_2 = tensors[op['inputs'][2]] options = op['builtin_options'] output_tensor = tf.concat( [input_tensor_0, input_tensor_1, input_tensor_2], options['axis'], name=output_detail['name']) except: options = op['builtin_options'] output_tensor = tf.concat([input_tensor_0, input_tensor_1], options['axis'], name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'LOGISTIC': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] output_tensor = tf.math.sigmoid(input_tensor, name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'TRANSPOSE_CONV': input_tensor = tensors[op['inputs'][2]] weights_detail = interpreter._get_tensor_details(op['inputs'][1]) output_shape_detail = interpreter._get_tensor_details( op['inputs'][0]) output_detail = interpreter._get_tensor_details(op['outputs'][0]) weights_array = interpreter.get_tensor(weights_detail['index']) weights_array = np.transpose(weights_array, (1, 2, 0, 3)) output_shape_array = interpreter.get_tensor( output_shape_detail['index']) weights = tf.Variable(weights_array, name=weights_detail['name']) shape = tf.Variable(output_shape_array, name=output_shape_detail['name']) options = op['builtin_options'] output_tensor = tf.nn.conv2d_transpose( input_tensor, weights, shape, [1, options['stride_h'], options['stride_w'], 1], padding=options['padding'], name=output_detail['name'] + '/conv2d_transpose') tensors[output_detail['index']] = output_tensor elif op_type == 'MUL': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor_0 = tensors[op['inputs'][0]] input_tensor_1 = None try: input_tensor_1 = tensors[op['inputs'][1]] except: param = interpreter._get_tensor_details(op['inputs'][1]) input_tensor_1 = interpreter.get_tensor(param['index']) output_tensor = tf.multiply(input_tensor_0, input_tensor_1, name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'HARD_SWISH': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] output_tensor = optimizing_hardswish_for_edgetpu( input_tensor, name=output_detail['name']) tensors[output_detail['index']] = output_tensor elif op_type == 'AVERAGE_POOL_2D': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] options = op['builtin_options'] pool_size = [options['filter_height'], options['filter_width']] strides = [options['stride_h'], options['stride_w']] padding = options['padding'] output_tensor = tf.keras.layers.AveragePooling2D( pool_size=pool_size, strides=strides, padding=padding, name=output_detail['name'])(input_tensor) tensors[output_detail['index']] = output_tensor elif op_type == 'FULLY_CONNECTED': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] weights = tensors[op['inputs'][1]].transpose(1, 0) bias = tensors[op['inputs'][2]] output_shape_detail = interpreter._get_tensor_details( op['inputs'][0]) output_shape_array = interpreter.get_tensor( output_shape_detail['index']) output_tensor = tf.keras.layers.Dense( units=output_shape_array.shape[3], use_bias=True, kernel_initializer=tf.keras.initializers.Constant(weights), bias_initializer=tf.keras.initializers.Constant(bias))( input_tensor) tensors[output_detail['index']] = output_tensor elif op_type == 'RESIZE_BILINEAR': output_detail = interpreter._get_tensor_details(op['outputs'][0]) input_tensor = tensors[op['inputs'][0]] size_detail = interpreter._get_tensor_details(op['inputs'][1]) size = interpreter.get_tensor(size_detail['index']) size_height = size[0] size_width = size[1] def upsampling2d_bilinear(x, size_height, size_width): if optimizing_for_edgetpu_flg: return tf.image.resize_bilinear(x, (size_height, size_width)) else: return tfv2.image.resize(x, [size_height, size_width], method='bilinear') output_tensor = tf.keras.layers.Lambda(upsampling2d_bilinear, arguments={ 'size_height': size_height, 'size_width': size_width })(input_tensor) tensors[output_detail['index']] = output_tensor elif op_type == 'DEQUANTIZE': output_detail = interpreter._get_tensor_details(op['outputs'][0]) weights_detail = interpreter._get_tensor_details(op['inputs'][0]) weights = interpreter.get_tensor(weights_detail['index']) output_tensor = weights.astype(np.float32) tensors[output_detail['index']] = output_tensor else: raise ValueError(op_type) # Convolution2DTransposeBias input_tensor = tensors[241] weights = np.load('weights/segment_Kernel').transpose(1, 2, 0, 3).astype(np.float32) bias = np.load('weights/segment_Bias').astype(np.float32) custom_trans = tf.nn.conv2d_transpose(input=input_tensor, filters=weights, output_shape=[1, height, width, 2], strides=[2, 2], padding='SAME', dilations=[1, 1]) output_tensor = tf.math.add(custom_trans, bias, name='segment') tensors[999] = output_tensor
def _body(i, posterior, center, wx, activation_biases, sigma_biases, input_activation, tile_filter): """Body of EM while loop.""" tf.logging.info(' Wx: %s', wx) beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) posterior = tf.Print(posterior, [ layer_name, i, h, ih, tf.reduce_min(posterior), tf.reduce_max(posterior) ], message='posterior') # route: [outdim, height?, width?, batch, indim] with tf.name_scope('vote_conf'): vote_conf = posterior * input_activation vote_conf = tf.maximum(vote_conf, 0.0) # masses: [batch, 1, outdim, 1, height, width, 1, 1] with tf.name_scope('masses'): masses = tf.reduce_sum(vote_conf, axis=[1, -1, -2], keepdims=True, name='masses_calculation') + 0.0000001 with tf.name_scope('preactivate_unrolled'): preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] with tf.name_scope('center'): center = .9 * tf.reduce_sum( preactivate_unrolled, axis=[1, -1, -2], keepdims=True) / masses + .1 * center # Rematerialization to save GPU memory. (+22ms/-1.6GB) # @tf.contrib.layers.recompute_grad def compute_noise_and_variance(wx, center, vote_conf, masses): noise = tf.squared_difference(wx, center) variance = min_var + tf.reduce_sum( vote_conf * noise, axis=[1, -1, -2], keepdims=True, name='variance_calculation') / masses return noise, variance with tf.name_scope('compute_noise_and_variance'): noise, variance = compute_noise_and_variance( wx, center, vote_conf, masses) with tf.name_scope('win'): log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keepdims=True) log_2pi = tf.log(2 * math.pi) sigma_b = tf.log(sigma_biases * sigma_biases + min_var) win = masses * (p_i - num_out_atoms * (sigma_b + log_2pi + 1.0)) with tf.name_scope('logit'): logit = beta * (win - activation_biases * 50 * num_out_atoms) with tf.name_scope('activation_update'): activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) with tf.name_scope('sigma_update'): log_det_sigma = -1 * p_i sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 with tf.name_scope('exp_update'): exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = tf.subtract(activation_update - sigma_update, exp_update, name='prior_update_sub') max_prior_update = tf.reduce_max(prior_update, axis=[2, 3, 4, 5, 6, 7], keepdims=True, name='max_prior_opdate') prior_normal = tf.add(prior_update, -1 * max_prior_update) prior_exp = tf.exp(prior_normal) prior_exp_out = tf.reduce_sum(prior_exp, axis=2, keepdims=True, name='prior_exp_out') prior_exp_reshape = tf.reshape(prior_exp_out, [-1, h, h, k * k], name='prior_exp_reshape') sum_prior = tf.nn.conv2d_transpose(prior_exp_reshape, tile_filter, output_shape=[b * c, ih, ih, 1], strides=[1, s, s, 1], padding='VALID') sum_prior = tf.maximum(1e-6, sum_prior) sum_prior_patch = utils.kernel_tile(sum_prior, k, s, 1, name='sum_prior_patch') with utils.maybe_jit_scope(), tf.name_scope('posterior'): sum_prior_reshape = tf.reshape( sum_prior_patch, [-1, input_dim, 1, 1, h, h, k, k]) posterior = prior_exp / sum_prior_reshape return (i + 1, posterior, logit, center, masses)
def prepare_model(self): # input rating vector self.input_R_U = tf.placeholder(dtype=tf.float32, shape=[None, self.num_cols], name="input_R_U") self.input_R_I = tf.placeholder(dtype=tf.float32, shape=[self.num_rows, None], name="input_R_I") self.input_OH_I = tf.placeholder(dtype=tf.float32, shape=[None, self.num_cols], name="input_OH_I") self.input_P_cor = tf.placeholder(dtype=tf.int32, shape=[None, 2], name="input_P_cor") self.input_N_cor = tf.placeholder(dtype=tf.int32, shape=[None, 2], name="input_N_cor") # input indicator vector indicator self.row_idx = tf.placeholder(dtype=tf.int32, shape=[None, 1], name="row_idx") self.col_idx = tf.placeholder(dtype=tf.int32, shape=[None, 1], name="col_idx") # user component # first layer weights UV = tf.get_variable(name="UV", initializer=tf.truncated_normal( shape=[self.num_cols, self.U_hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) # second layer weights UW = tf.get_variable(name="UW", initializer=tf.truncated_normal( shape=[self.U_hidden_neuron, self.num_cols], mean=0, stddev=0.03), dtype=tf.float32) # first layer bias Ub1 = tf.get_variable(name="Ub1", initializer=tf.truncated_normal( shape=[1, self.U_hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) # second layer bias Ub2 = tf.get_variable(name="Ub2", initializer=tf.truncated_normal( shape=[1, self.num_cols], mean=0, stddev=0.03), dtype=tf.float32) # item component # first layer weights IV = tf.get_variable(name="IV", initializer=tf.truncated_normal( shape=[self.num_rows, self.I_hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) # second layer weights IW = tf.get_variable(name="IW", initializer=tf.truncated_normal( shape=[self.I_hidden_neuron, self.num_rows], mean=0, stddev=0.03), dtype=tf.float32) # first layer bias Ib1 = tf.get_variable(name="Ib1", initializer=tf.truncated_normal( shape=[1, self.I_hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) # second layer bias Ib2 = tf.get_variable(name="Ib2", initializer=tf.truncated_normal( shape=[1, self.num_rows], mean=0, stddev=0.03), dtype=tf.float32) I_factor_vector = tf.get_variable( name="I_factor_vector", initializer=tf.random_uniform(shape=[1, self.num_cols]), dtype=tf.float32) # user component U_pre_Encoder = tf.matmul(self.input_R_U, UV) + Ub1 # input to the hidden layer self.U_Encoder = self.g_act( U_pre_Encoder) # output of the hidden layer U_pre_Decoder = tf.matmul(self.U_Encoder, UW) + Ub2 # input to the output layer self.U_Decoder = self.f_act( U_pre_Decoder) # output of the output layer # item component I_pre_mul = tf.transpose( tf.matmul(I_factor_vector, tf.transpose(self.input_OH_I))) I_pre_Encoder = tf.matmul(tf.transpose(self.input_R_I), IV) + Ib1 # input to the hidden layer self.I_Encoder = self.g_act(I_pre_Encoder * I_pre_mul) # output of the hidden layer I_pre_Decoder = tf.matmul(self.I_Encoder, IW) + Ib2 # input to the output layer self.I_Decoder = self.f_act( I_pre_Decoder) # output of the output layer # final output self.Decoder = ( (tf.transpose( tf.gather_nd(tf.transpose(self.U_Decoder), self.col_idx))) + tf.gather_nd(tf.transpose(self.I_Decoder), self.row_idx)) / 2.0 pos_data = tf.gather_nd(self.Decoder, self.input_P_cor) neg_data = tf.gather_nd(self.Decoder, self.input_N_cor) pre_cost1 = tf.maximum(neg_data - pos_data + self.margin, tf.zeros(tf.shape(neg_data)[0])) cost1 = tf.reduce_sum(pre_cost1) # prediction squared error pre_cost2 = tf.square(self.l2_norm(UW)) + tf.square(self.l2_norm(UV)) \ + tf.square(self.l2_norm(IW)) + tf.square(self.l2_norm(IV))\ + tf.square(self.l2_norm(Ib1)) + tf.square(self.l2_norm(Ib2))\ + tf.square(self.l2_norm(Ub1)) + tf.square(self.l2_norm(Ub2)) cost2 = self.lambda_value * 0.5 * pre_cost2 # regularization term self.cost = cost1 + cost2 # the loss function if self.optimizer_method == "Adam": optimizer = tf.train.AdamOptimizer(self.lr) elif self.optimizer_method == "Adadelta": optimizer = tf.train.AdadeltaOptimizer(self.lr) elif self.optimizer_method == "Adagrad": optimizer = tf.train.AdadeltaOptimizer(self.lr) elif self.optimizer_method == "RMSProp": optimizer = tf.train.RMSPropOptimizer(self.lr) elif self.optimizer_method == "GradientDescent": optimizer = tf.train.GradientDescentOptimizer(self.lr) elif self.optimizer_method == "Momentum": optimizer = tf.train.MomentumOptimizer(self.lr, 0.9) else: raise ValueError("Optimizer Key ERROR") gvs = optimizer.compute_gradients(self.cost) self.optimizer = optimizer.apply_gradients( gvs, global_step=self.global_step)
def __init__(self, environment, summary_dir="./"): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': True}) config.gpu_options.per_process_gpu_memory_fraction = 0.1 self.state_size, self.action_size = environment.observation_space.shape, environment.action_space.shape[ 0] self.action_bound_high = environment.action_space.high self.action_bound_low = environment.action_space.low self.actions = tf.placeholder(tf.float32, [None, self.action_size], 'action') self.beta = 0.01 self.learning_rate = 0.0001 self.minibatch = 32 self.epsilon = 0.21 self.critic_coefficient = 0.5 self.l2_regular = 0.001 self.sess = tf.Session(config=config) self.state = tf.placeholder(tf.float32, [None, 64, 64, 4], 'state') self.advantage = tf.placeholder(tf.float32, [None, 1], 'advantage') self.rewards = tf.placeholder(tf.float32, [None, 1], 'd_rewards') self.dataset = tf.data.Dataset.from_tensor_slices({ "state": self.state, "actions": self.actions, "rewards": self.rewards, "advantage": self.advantage }) self.dataset = self.dataset.shuffle(buffer_size=10000) self.dataset = self.dataset.batch(self.minibatch) self.dataset = self.dataset.cache() self.dataset = self.dataset.repeat(4) self.iterator = self.dataset.make_initializable_iterator() batch = self.iterator.get_next() old_policy, old_policy_params = self.Actor(batch["state"], 'oldpolicy') policy, policy_params = self.Actor(batch["state"], 'policy') policy_eval, _ = self.Actor(self.state, 'policy', reuse=True) old_value, old_value_params = self.Critic(batch["state"], "oldvalue") self.value, value_params = self.Critic(batch["state"], "value") self.value_eval, _ = self.Critic(self.state, 'value', reuse=True) self.sample_action = tf.squeeze(policy_eval.sample(1), axis=0, name="sample_action") self.global_step = tf.train.get_or_create_global_step() self.saver = tf.train.Saver() with tf.variable_scope('loss'): with tf.variable_scope('actor'): ratio = tf.maximum(policy.prob(batch["actions"]), 1e-6) / tf.maximum( old_policy.prob(batch["actions"]), 1e-6) ratio = tf.clip_by_value(ratio, 0, 10) surr1 = batch["advantage"] * ratio surr2 = batch["advantage"] * tf.clip_by_value( ratio, 1 - self.epsilon, 1 + self.epsilon) loss_policy = -tf.reduce_mean(tf.minimum(surr1, surr2)) tf.summary.scalar("loss", loss_policy) with tf.variable_scope('critic'): loss_actor = tf.reduce_mean( tf.square(self.value - batch["rewards"])) * 0.5 tf.summary.scalar("loss", loss_actor) with tf.variable_scope('entropy'): entropy = policy.entropy() pol_entpen = -self.beta * tf.reduce_mean(entropy) loss = loss_policy + loss_actor * self.critic_coefficient + pol_entpen tf.summary.scalar("total", loss) with tf.variable_scope('train'): opt = tf.train.AdamOptimizer(self.learning_rate) self.trainer = opt.minimize(loss, global_step=self.global_step, var_list=policy_params + value_params) with tf.variable_scope('update_old'): self.update_old_policy_op = [ oldp.assign(p) for p, oldp in zip(policy_params, old_policy_params) ] self.update_old_value_op = [ oldp.assign(p) for p, oldp in zip(value_params, old_value_params) ] self.writer = tf.summary.FileWriter(summary_dir, self.sess.graph) self.sess.run(tf.global_variables_initializer()) tf.summary.scalar("value", tf.reduce_mean(self.value)) tf.summary.scalar("policy_entropy", tf.reduce_mean(entropy)) tf.summary.scalar("sigma", tf.reduce_mean(policy.stddev())) self.board = tf.summary.merge(tf.get_collection( tf.GraphKeys.SUMMARIES))
def resize_image(image, bboxes=None, min_size=None, max_size=None): """ We need to resize image and (optionally) bounding boxes when the biggest side dimension is bigger than `max_size` or when the smaller side is smaller than `min_size`. If no max_size defined it won't scale down and if no min_size defined it won't scale up. Then, using the ratio we used, we need to properly scale the bounding boxes. Args: image: Tensor with image of shape (H, W, 3). bboxes: Optional Tensor with bounding boxes with shape (num_bboxes, 5). where we have (x_min, y_min, x_max, y_max, label) for each one. min_size: Min size of width or height. max_size: Max size of width or height. Returns: Dictionary containing: image: Tensor with scaled image. bboxes: Tensor with scaled (using the same factor as the image) bounding boxes with shape (num_bboxes, 5). scale_factor: Scale factor used to modify the image (1.0 means no change). """ image_shape = tf.to_float(tf.shape(image)) height = image_shape[0] width = image_shape[1] if min_size is not None: # We calculate the upscale factor, the rate we need to use to end up # with an image with it's lowest dimension at least `image_min_size`. # In case of being big enough the scale factor is 1. (no change) min_size = tf.to_float(min_size) min_dimension = tf.minimum(height, width) upscale_factor = tf.maximum(min_size / min_dimension, 1.0) else: upscale_factor = tf.constant(1.0) if max_size is not None: # We do the same calculating the downscale factor, to end up with an # image where the biggest dimension is less than `image_max_size`. # When the image is small enough the scale factor is 1. (no change) max_size = tf.to_float(max_size) max_dimension = tf.maximum(height, width) downscale_factor = tf.minimum(max_size / max_dimension, 1.0) else: downscale_factor = tf.constant(1.0) scale_factor = upscale_factor * downscale_factor # New size is calculate using the scale factor and rounding to int. new_height = height * scale_factor new_width = width * scale_factor # Resize image using TensorFlow's own `resize_image` utility. image = tf.image.resize_images(image, tf.stack( tf.to_int32([new_height, new_width])), method=tf.image.ResizeMethod.BILINEAR) if bboxes is not None: bboxes = adjust_bboxes(bboxes, old_height=height, old_width=width, new_height=new_height, new_width=new_width) return { "image": image, "bboxes": bboxes, "scale_factor": scale_factor, } return { "image": image, "scale_factor": scale_factor, }
def call(self, x): input_image, y_pred, y_true, true_boxes = x # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class] y_pred = tf.reshape( y_pred, tf.concat([tf.shape(input=y_pred)[:3], tf.constant([3, -1])], axis=0)) # initialize the masks object_mask = tf.expand_dims(y_true[..., 4], 4) # the variable to keep track of number of batches processed batch_seen = tf.Variable(0.) # compute grid factor and net factor grid_h = tf.shape(input=y_true)[1] grid_w = tf.shape(input=y_true)[2] grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1, 1, 1, 1, 2]) net_h = tf.shape(input=input_image)[1] net_w = tf.shape(input=input_image)[2] net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1, 1, 1, 1, 2]) """ Adjust prediction """ pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy pred_box_wh = y_pred[..., 2:4] # t_wh pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence pred_box_class = y_pred[..., 5:] # adjust class probabilities """ Adjust ground truth """ true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy) true_box_wh = y_true[..., 2:4] # t_wh true_box_conf = tf.expand_dims(y_true[..., 4], 4) true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1) """ Compare each predicted box to all true boxes """ # initially, drag all objectness of all boxes to 0 conf_delta = pred_box_conf - 0 # then, ignore the boxes which have good overlap with some true box true_xy = true_boxes[..., 0:2] / grid_factor true_wh = true_boxes[..., 2:4] / net_factor true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4) pred_wh = tf.expand_dims( tf.exp(pred_box_wh) * self.anchors / net_factor, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4) conf_delta *= tf.expand_dims( tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4) """ Compute some online statistics """ true_xy = true_box_xy / grid_factor true_wh = tf.exp(true_box_wh) * self.anchors / net_factor true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half pred_xy = pred_box_xy / grid_factor pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) iou_scores = object_mask * tf.expand_dims(iou_scores, 4) count = tf.reduce_sum(input_tensor=object_mask) count_noobj = tf.reduce_sum(input_tensor=1 - object_mask) detect_mask = tf.cast((pred_box_conf * object_mask) >= 0.5, dtype=tf.float32) class_mask = tf.expand_dims( tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1), true_box_class), dtype=tf.float32), 4) recall50 = tf.reduce_sum( input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) recall75 = tf.reduce_sum( input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3) avg_obj = tf.reduce_sum(input_tensor=pred_box_conf * object_mask) / (count + 1e-3) avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf * (1 - object_mask)) / (count_noobj + 1e-3) avg_cat = tf.reduce_sum(input_tensor=object_mask * class_mask) / (count + 1e-3) """ Warm-up training """ batch_seen = tf.assign_add(batch_seen, 1.) true_box_xy, true_box_wh, xywh_mask = tf.cond( pred=tf.less(batch_seen, self.warmup_batches + 1), true_fn=lambda: [ true_box_xy + (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) * (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) * (1 - object_mask), tf.ones_like(object_mask) ], false_fn=lambda: [true_box_xy, true_box_wh, object_mask]) """ Compare each true box to all anchor boxes """ wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor wh_scale = tf.expand_dims( 2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale xy_delta = xywh_mask * (pred_box_xy - true_box_xy) * wh_scale * self.xywh_scale wh_delta = xywh_mask * (pred_box_wh - true_box_wh) * wh_scale * self.xywh_scale conf_delta = object_mask * ( pred_box_conf - true_box_conf) * self.obj_scale + ( 1 - object_mask) * conf_delta * self.noobj_scale class_delta = object_mask * \ tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \ self.class_scale loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta), axis=list(range(1, 5))) loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta), axis=list(range(1, 5))) loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta), axis=list(range(1, 5))) loss_class = tf.reduce_sum(input_tensor=class_delta, axis=list(range(1, 5))) loss = loss_xy + loss_wh + loss_conf + loss_class loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000) loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000) loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000) loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000) loss = tf.Print(loss, [ grid_h, tf.reduce_sum(input_tensor=loss_xy), tf.reduce_sum(input_tensor=loss_wh), tf.reduce_sum(input_tensor=loss_conf), tf.reduce_sum(input_tensor=loss_class) ], message='loss xy, wh, conf, class: \t', summarize=1000) return loss * self.grid_scale
def last_value_quantize(self, inputs, per_channel=False, init_min=-6.0, init_max=6.0, name_prefix='FixedValueQuant', reuse=None, is_training=False, num_bits=8, narrow_range=False, relative_quantile=0, freeze=False, quant_delay=False): """Adds a layer that collects quantization ranges as last input ranges. LastValueQuantize creates variables called 'min' and 'max', representing the interval used for quantization and clamping. Args: inputs: a tensor containing values to be quantized. per_channel: (Optional) a boolean specifying whether to use different quantization ranges per output channel. init_min: a float scalar, the initial value for variable min. init_max: a float scalar, the initial value for variable max. name_prefix: name_prefix for created nodes. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. is_training: Whether the op is applied to a training or eval graph. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. relative_quantile: Specify the location of quantization min and max parameters. relative_quantile = 0 is equivalent to using min and max of input; relative_quantile = 1 set min and max the optimal location assuming the input distribution is uniform. In reality, a good value should be in the range [0 1]. freeze: If True, the min and max variables are calculated once at the begining of training and then freeze. This is used for quantized fine-tuning of a pretrained checkpoint. If False, the min and max are calculated and updated every cycle. quant_delay: The number of global steps after which the fake quantization are turned on. Used for performing fine-tuning experiment without starting from a pre-trained checkpoint. Returns: a tensor containing quantized values. """ with tf.variable_scope( None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope: scope.set_partitioner(None) input_shape = inputs.get_shape() input_dim = len(input_shape) if per_channel: # Only support quantizing 1-, 2- and 4-dimensional tensors. assert input_dim in [1, 2, 4] min_max_shape = [input_shape[-1]] else: min_max_shape = [] min_var = tf.get_variable('min', min_max_shape, tf.float32, initializer=tf.constant_initializer(init_min), trainable=False) max_var = tf.get_variable('max', min_max_shape, tf.float32, initializer=tf.constant_initializer(init_max), trainable=False) if not is_training: return self.delayed_quant( inputs, min_var, max_var, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range, quant_delay=None) if per_channel: if input_dim == 2: reduce_dims = [0] elif input_dim == 4: reduce_dims = [0, 1, 2] if num_bits >= 4: quantile = 0 else: quantile = (1.0 / 2.0**(num_bits + 1.0)) * relative_quantile * 100 if per_channel: if input_dim >= 2: batch_min = tfp.stats.percentile( inputs, q=quantile, axis=reduce_dims, name='BatchMin') else: batch_min = inputs else: batch_min = tfp.stats.percentile( inputs, q=quantile, name='BatchMin') if per_channel: if input_dim >= 2: batch_max = tfp.stats.percentile( inputs, q=100 - quantile, axis=reduce_dims, name='BatchMax') else: batch_max = inputs else: batch_max = tfp.stats.percentile( inputs, q=100 - quantile, name='BatchMax') if narrow_range: multiplier = 1.0 else: multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0) batch_abs_max = tf.maximum(tf.abs(batch_min), tf.abs(batch_max)) if narrow_range: batch_adjusted_min = 0 - batch_abs_max else: multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0) batch_adjusted_min = 0 - tf.scalar_mul(multiplier, batch_abs_max) batch_abs_max = tf.cast(batch_abs_max, tf.float32) batch_adjusted_min = tf.cast(batch_adjusted_min, tf.float32) if freeze: def make_var_op(var): def f(): return var return f quant_step = common.CreateOrGetQuantizationStep() min_max_assign = tf.less_equal( quant_step, 1, name='MinMaxAssign') min_value = tf.cond(min_max_assign, make_var_op(batch_adjusted_min), make_var_op(min_var), name='AssignMinCond') max_value = tf.cond(min_max_assign, make_var_op(batch_abs_max), make_var_op(max_var), name='AssignMaxCond') else: min_value = batch_adjusted_min max_value = batch_abs_max assign_min = tf.assign(min_var, min_value) assign_max = tf.assign(max_var, max_value) return self.delayed_quant( inputs, assign_min, assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range, quant_delay=quant_delay)
def _compute_model_loss( self, input_sequence, output_sequence, sequence_length, control_sequence): """Builds a model with loss for train/eval.""" hparams = self.hparams batch_size = hparams.batch_size input_sequence = tf.to_float(input_sequence) output_sequence = tf.to_float(output_sequence) max_seq_len = tf.minimum(tf.shape(output_sequence)[1], hparams.max_seq_len) input_sequence = input_sequence[:, :max_seq_len] if control_sequence is not None: control_depth = control_sequence.shape[-1] control_sequence = tf.to_float(control_sequence) control_sequence = control_sequence[:, :max_seq_len] # Shouldn't be necessary, but the slice loses shape information when # control depth is zero. control_sequence.set_shape([batch_size, None, control_depth]) # The target/expected outputs. x_target = output_sequence[:, :max_seq_len] # Inputs to be fed to decoder, including zero padding for the initial input. x_input = tf.pad(output_sequence[:, :max_seq_len - 1], [(0, 0), (1, 0), (0, 0)]) x_length = tf.minimum(sequence_length, max_seq_len) # Either encode to get `z`, or do unconditional, decoder-only. if hparams.z_size: # vae mode: q_z = self.encode(input_sequence, x_length, control_sequence) z = q_z.sample() # Prior distribution. p_z = ds.MultivariateNormalDiag( loc=[0.] * hparams.z_size, scale_diag=[1.] * hparams.z_size) # KL Divergence (nats) kl_div = ds.kl_divergence(q_z, p_z) # Concatenate the Z vectors to the inputs at each time step. else: # unconditional, decoder-only generation kl_div = tf.zeros([batch_size, 1], dtype=tf.float32) z = None r_loss, metric_map = self.decoder.reconstruction_loss( x_input, x_target, x_length, z, control_sequence)[0:2] free_nats = hparams.free_bits * tf.math.log(2.0) kl_cost = tf.maximum(kl_div - free_nats, 0) beta = ((1.0 - tf.pow(hparams.beta_rate, tf.to_float(self.global_step))) * hparams.max_beta) self.loss = tf.reduce_mean(r_loss) + beta * tf.reduce_mean(kl_cost) scalars_to_summarize = { 'loss': self.loss, 'losses/r_loss': r_loss, 'losses/kl_loss': kl_cost, 'losses/kl_bits': kl_div / tf.math.log(2.0), 'losses/kl_beta': beta, } return metric_map, scalars_to_summarize
def tensors_to_item(self, keys_to_tensors): unmapped_tensor = super(_ClassTensorHandler, self).tensors_to_item(keys_to_tensors) return tf.maximum(self._name_to_id_table.lookup(unmapped_tensor), self._display_name_to_id_table.lookup(unmapped_tensor))
def lrelu(input_, leak=0.2, name="lrelu"): return tf.maximum(input_, leak * input_, name=name)
def _get_final_index(sequence_length, time_major=True): indices = [tf.maximum(0, sequence_length - 1), tf.range(sequence_length.shape[0])] if not time_major: indices = indices[-1::-1] return tf.stack(indices, axis=1)
def compute_mask_prob_from_yao_schedule(i, n, pmin=0.1, pmax=0.9, alpha=0.7): wat = (pmax - pmin) * i / n return tf.maximum(pmin, pmax - wat / alpha)
def _legacy_sqrt_decay(step): """Decay like 1 / sqrt(step), multiplied by 500 to normalize.""" return 500.0 / tf.sqrt(tf.maximum(step, 1.0))
def _resource_apply_dense(self, grad, handle): var = handle grad = tf.to_float(grad) grad_squared = tf.square(grad) + self._epsilon1 grad_squared_mean = tf.reduce_mean(grad_squared) decay_rate = self._decay_rate update_scale = self._learning_rate old_val = var if var.dtype.base_dtype == tf.bfloat16: old_val = tf.to_float(self._parameter_encoding.decode(old_val)) if self._multiply_by_parameter_scale: update_scale *= tf.to_float(self._parameter_scale(old_val)) # HACK: Make things dependent on grad. # This confounds the XLA rewriter and keeps it from fusing computations # across different variables. This fusion is a bad for HBM usage, since # it causes the gradients to persist in memory. decay_rate += grad_squared_mean * 1e-30 update_scale += grad_squared_mean * 1e-30 # END HACK mixing_rate = 1.0 - decay_rate shape = var.get_shape().as_list() updates = [] if self._should_use_factored_second_moment_estimate(shape): grad_squared_row_mean = tf.reduce_mean(grad_squared, -1) grad_squared_col_mean = tf.reduce_mean(grad_squared, -2) vr = self.get_slot(var, "vr") new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean) vc = self.get_slot(var, "vc") new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean) vr_update = tf.assign(vr, new_vr, use_locking=self._use_locking) vc_update = tf.assign(vc, new_vc, use_locking=self._use_locking) updates = [vr_update, vc_update] long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True) r_factor = tf.rsqrt(new_vr / long_term_mean) c_factor = tf.rsqrt(new_vc) x = grad * tf.expand_dims(r_factor, -1) * tf.expand_dims( c_factor, -2) else: v = self.get_slot(var, "v") new_v = decay_rate * v + mixing_rate * grad_squared v_update = tf.assign(v, new_v, use_locking=self._use_locking) updates = [v_update] x = grad * tf.rsqrt(new_v) if self._clipping_threshold is not None: clipping_denom = tf.maximum( 1.0, reduce_rms(x) / self._clipping_threshold) x /= clipping_denom subtrahend = update_scale * x if self._beta1: m = self.get_slot(var, "m") new_m = self._beta1 * tf.to_float(m) + (1.0 - self._beta1) * subtrahend subtrahend = new_m new_m = common_layers.cast_like(new_m, var) updates.append(tf.assign(m, new_m, use_locking=self._use_locking)) new_val = tf.to_float(old_val) - subtrahend if var.dtype.base_dtype == tf.bfloat16: new_val = self._parameter_encoding.encode(new_val, self._quantization_noise) if self._simulated_quantize_bits: new_val = quantization.simulated_quantize( var - subtrahend, self._simulated_quantize_bits, self._quantization_noise) var_update = tf.assign(var, new_val, use_locking=self._use_locking) updates = [var_update] + updates return tf.group(*updates)
def __init__(self, sess, model, batch_size=1, confidence=CONFIDENCE, targeted=TARGETED, learning_rate=LEARNING_RATE, binary_search_steps=BINARY_SEARCH_STEPS, max_iterations=MAX_ITERATIONS, print_every=100, early_stop_iters=0, abort_early=ABORT_EARLY, initial_const=INITIAL_CONST, use_log=False, use_tanh=True, use_resize=False, adam_beta1=0.9, adam_beta2=0.999, reset_adam_after_found=False, solver="adam", save_ckpts="", load_checkpoint="", start_iter=0, init_size=32, use_importance=True): """ The L_2 optimized attack. This attack is the most efficient and should be used as the primary attack to evaluate potential defenses. Returns adversarial examples for the supplied model. confidence: Confidence of adversarial examples: higher produces examples that are farther away, but more strongly classified as adversarial. batch_size: Number of gradient evaluations to run simultaneously. targeted: True if we should perform a targetted attack, False otherwise. learning_rate: The learning rate for the attack algorithm. Smaller values produce better results but are slower to converge. binary_search_steps: The number of times we perform binary search to find the optimal tradeoff-constant between distance and confidence. max_iterations: The maximum number of iterations. Larger values are more accurate; setting too small will require a large learning rate and will produce poor results. abort_early: If true, allows early aborts if gradient descent gets stuck. initial_const: The initial tradeoff-constant to use to tune the relative importance of distance and confidence. If binary_search_steps is large, the initial constant is not important. """ image_size, num_channels, num_labels = model.image_size, model.num_channels, model.num_labels self.model = model self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.print_every = print_every self.early_stop_iters = early_stop_iters if early_stop_iters != 0 else max_iterations // 10 print("early stop:", self.early_stop_iters) self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.start_iter = start_iter self.batch_size = batch_size self.num_channels = num_channels self.resize_init_size = init_size self.use_importance = use_importance if use_resize: self.small_x = self.resize_init_size self.small_y = self.resize_init_size else: self.small_x = image_size self.small_y = image_size self.use_tanh = use_tanh self.use_resize = use_resize self.save_ckpts = save_ckpts if save_ckpts: os.system("mkdir -p {}".format(save_ckpts)) self.repeat = binary_search_steps >= 10 # each batch has a different modifier value (see below) to evaluate # small_shape = (None,self.small_x,self.small_y,num_channels) shape = (None, image_size, image_size, num_channels) single_shape = (image_size, image_size, num_channels) small_single_shape = (self.small_x, self.small_y, num_channels) # the variable we're going to optimize over # support multiple batches # support any size image, will be resized to model native size if self.use_resize: self.modifier = tf.placeholder(tf.float32, shape=(None, None, None, None)) # scaled up image self.scaled_modifier = tf.image.resize_images( self.modifier, [image_size, image_size]) # operator used for resizing image self.resize_size_x = tf.placeholder(tf.int32) self.resize_size_y = tf.placeholder(tf.int32) self.resize_input = tf.placeholder(tf.float32, shape=(1, None, None, None)) self.resize_op = tf.image.resize_images( self.resize_input, [self.resize_size_x, self.resize_size_y]) else: self.modifier = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels)) # no resize self.scaled_modifier = self.modifier # the real variable, initialized to 0 self.load_checkpoint = load_checkpoint if load_checkpoint: # if checkpoint is incorrect reshape will fail print("Using checkpint", load_checkpoint) self.real_modifier = np.load(load_checkpoint).reshape( (1, ) + small_single_shape) else: self.real_modifier = np.zeros((1, ) + small_single_shape, dtype=np.float32) # self.real_modifier = np.random.randn(image_size * image_size * num_channels).astype(np.float32).reshape((1,) + single_shape) # self.real_modifier /= np.linalg.norm(self.real_modifier) # these are variables to be more efficient in sending data to tf # we only work on 1 image at once; the batch is for evaluation loss at different modifiers self.timg = tf.Variable(np.zeros(single_shape), dtype=tf.float32) self.tlab = tf.Variable(np.zeros(num_labels), dtype=tf.float32) self.const = tf.Variable(0.0, dtype=tf.float32) # and here's what we use to assign them self.assign_timg = tf.placeholder(tf.float32, single_shape) self.assign_tlab = tf.placeholder(tf.float32, num_labels) self.assign_const = tf.placeholder(tf.float32) # the resulting image, tanh'd to keep bounded from -0.5 to 0.5 # broadcast self.timg to every dimension of modifier if use_tanh: self.newimg = tf.tanh(self.scaled_modifier + self.timg) / 2 else: self.newimg = self.scaled_modifier + self.timg # prediction BEFORE-SOFTMAX of the model # now we have output at #batch_size different modifiers # the output should have shape (batch_size, num_labels) self.output = model.predict(self.newimg) # distance to the input data if use_tanh: self.l2dist = tf.reduce_sum( tf.square(self.newimg - tf.tanh(self.timg) / 2), [1, 2, 3]) else: self.l2dist = tf.reduce_sum(tf.square(self.newimg - self.timg), [1, 2, 3]) # compute the probability of the label class versus the maximum other # self.tlab * self.output selects the Z value of real class # because self.tlab is an one-hot vector # the reduce_sum removes extra zeros, now get a vector of size #batch_size self.real = tf.reduce_sum((self.tlab) * self.output, 1) # (1-self.tlab)*self.output gets all Z values for other classes # Because soft Z values are negative, it is possible that all Z values are less than 0 # and we mistakenly select the real class as the max. So we minus 10000 for real class self.other = tf.reduce_max( (1 - self.tlab) * self.output - (self.tlab * 10000), 1) # If self.targeted is true, then the targets represents the target labels. # If self.targeted is false, then targets are the original class labels. if self.TARGETED: if use_log: # loss1 = - tf.log(self.real) loss1 = tf.maximum( 0.0, tf.log(self.other + 1e-30) - tf.log(self.real + 1e-30)) else: # if targetted, optimize for making the other class (real) most likely loss1 = tf.maximum(0.0, self.other - self.real + self.CONFIDENCE) else: if use_log: # loss1 = tf.log(self.real) loss1 = tf.maximum( 0.0, tf.log(self.real + 1e-30) - tf.log(self.other + 1e-30)) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(0.0, self.real - self.other + self.CONFIDENCE) # sum up the losses (output is a vector of #batch_size) self.loss2 = self.l2dist self.loss1 = self.const * loss1 self.loss = self.loss1 + self.loss2 # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) # prepare the list of all valid variables var_size = self.small_x * self.small_y * num_channels self.use_var_len = var_size self.var_list = np.array(range(0, self.use_var_len), dtype=np.int32) self.used_var_list = np.zeros(var_size, dtype=np.int32) self.sample_prob = np.ones(var_size, dtype=np.float32) / var_size # upper and lower bounds for the modifier self.modifier_up = np.zeros(var_size, dtype=np.float32) self.modifier_down = np.zeros(var_size, dtype=np.float32) # random permutation for coordinate update self.perm = np.random.permutation(var_size) self.perm_index = 0 # ADAM status self.mt = np.zeros(var_size, dtype=np.float32) self.vt = np.zeros(var_size, dtype=np.float32) # self.beta1 = 0.8 # self.beta2 = 0.99 self.beta1 = adam_beta1 self.beta2 = adam_beta2 self.reset_adam_after_found = reset_adam_after_found self.adam_epoch = np.ones(var_size, dtype=np.int32) self.stage = 0 # variables used during optimization process self.grad = np.zeros(batch_size, dtype=np.float32) self.hess = np.zeros(batch_size, dtype=np.float32) # for testing self.grad_op = tf.gradients(self.loss, self.modifier) # compile numba function # self.coordinate_ADAM_numba = jit(coordinate_ADAM, nopython = True) # self.coordinate_ADAM_numba.recompile() # print(self.coordinate_ADAM_numba.inspect_llvm()) # np.set_printoptions(threshold=np.nan) # set solver solver = solver.lower() self.solver_name = solver if solver == "adam": self.solver = coordinate_ADAM elif solver == "newton": self.solver = coordinate_Newton elif solver == "adam_newton": self.solver = coordinate_Newton_ADAM elif solver != "fake_zero": print("unknown solver", solver) self.solver = coordinate_ADAM print("Using", solver, "solver")
def get_stage_1(dof_feat, simmat_feat, is_training, bn_decay=None): batch_size = dof_feat.get_shape()[0].value #task1: key_point feat1 = tf_util.conv1d(dof_feat, 128, 1, padding='VALID', activation_fn=None, scope='stage1/task1/fc1', bn_decay=bn_decay) pred_labels_key_p = tf_util.conv1d(feat1, 2, 1, padding='VALID', activation_fn=None, scope='stage1/task1/fc2', bn_decay=bn_decay) #task2_1: labels_direction feat2_1 = tf_util.conv1d(dof_feat, 128, 1, padding='VALID', activation_fn=None, scope='stage1/task2_1/fc1', bn_decay=bn_decay) pred_labels_direction = tf_util.conv1d(feat2_1, 15, 1, padding='VALID', activation_fn=None, scope='stage1/task2_1/fc2', bn_decay=bn_decay) #task2_2: regression_direction feat2_2 = tf_util.conv1d(dof_feat, 128, 1, padding='VALID', activation_fn=None, scope='stage1/task2_2/fc1', bn_decay=bn_decay) pred_regression_direction = tf_util.conv1d(feat2_2, 3, 1, padding='VALID', activation_fn=None, scope='stage1/task2_2/fc2', bn_decay=bn_decay) #task_3: position feat3 = tf_util.conv1d(dof_feat, 128, 1, padding='VALID', activation_fn=None, scope='stage1/task3/fc1', bn_decay=bn_decay) pred_regression_position = tf_util.conv1d(feat3, 3, 1, padding='VALID', activation_fn=None, scope='stage1/task3/fc2', bn_decay=bn_decay) #task_4: dof_type feat4 = tf_util.conv1d(dof_feat, 128, 1, padding='VALID', activation_fn=None, scope='stage1/task4/fc1', bn_decay=bn_decay) pred_labels_type = tf_util.conv1d(feat4, 4, 1, padding='VALID', activation_fn=None, scope='stage1/task4/fc2', bn_decay=bn_decay) #task_5: similar matrix feat5 = tf_util.conv1d(simmat_feat, 128, 1, padding='VALID', activation_fn=None, scope='stage1/task_5/fc1', bn_decay=bn_decay) r = tf.reduce_sum(feat5 * feat5, 2) r = tf.reshape(r, [batch_size, -1, 1]) D = r - 2 * tf.matmul(feat5, tf.transpose( feat5, perm=[0, 2, 1])) + tf.transpose(r, perm=[0, 2, 1]) pred_simmat = tf.maximum(10 * D, 0.) #task_6: confidence map feat6 = tf_util.conv1d(simmat_feat, 128, 1, padding='VALID', activation_fn=None, scope='stage1/task6/fc1', bn_decay=bn_decay) conf_logits = tf_util.conv1d(feat6, 1, 1, padding='VALID', activation_fn=None, scope='stage1/task_6/fc2', bn_decay=bn_decay) pred_conf_logits = tf.nn.sigmoid(conf_logits, name='stage1/task_6/confidence') return pred_labels_key_p,pred_labels_direction,pred_regression_direction,pred_regression_position, \ pred_labels_type,pred_simmat,pred_conf_logits
def train_rgvn(self, perf_metric): """Trains DVRL based on the specified objective function. Args: perf_metric: 'auc', 'accuracy', 'log-loss' for classification 'mae', 'mse', 'rmspe' for regression """ # Generates selected probability est_data_value = self.rpm() # Generator loss (REINFORCE algorithm) prob = tf.reduce_sum( self.s_input * tf.log(est_data_value + self.epsilon) + (1 - self.s_input) * tf.log(1 - est_data_value + self.epsilon)) dve_loss = (-self.reward_input * prob) + \ 1e3 * (tf.maximum(tf.reduce_mean(est_data_value) - self.threshold, 0) + tf.maximum((1 - self.threshold) - tf.reduce_mean(est_data_value), 0)) # Variable dve_vars = [ v for v in tf.trainable_variables() if v.name.startswith('data_value_estimator') ] # Solver dve_solver = tf.train.AdamOptimizer(self.learning_rate).minimize( dve_loss, var_list=dve_vars) LogUtil.log('INFO', "To evaluate x_valid with ori model!") # Baseline performance print(self.ori_model_path) y_valid_hat = eval_sgcn_prediction(self.x_valid, window=4, model_path=self.ori_model_path, \ gpu_id=0, y_test=self.y_valid, predict_batch_size=self.batch_size_predictor) if perf_metric == 'auc': # valid_perf = metrics.roc_auc_score(self.y_valid, y_valid_hat[:, 1]) valid_perf = metrics.roc_auc_score(self.y_valid_onehot, y_valid_hat) elif perf_metric == 'accuracy': valid_perf = metrics.accuracy_score(self.y_valid, np.argmax(y_valid_hat, axis=1)) elif perf_metric == 'log_loss': valid_perf = -metrics.log_loss(self.y_valid, y_valid_hat) elif perf_metric == 'rmspe': valid_perf = rgvn_metrics.rmspe(self.y_valid, y_valid_hat) elif perf_metric == 'mae': valid_perf = metrics.mean_absolute_error(self.y_valid, y_valid_hat) elif perf_metric == 'mse': valid_perf = metrics.mean_squared_error(self.y_valid, y_valid_hat) LogUtil.log('INFO', "To evaluate x_train with val model!") # Prediction differences y_train_valid_pred = eval_sgcn_prediction( self.x_train, window=4, model_path=self.val_model_path, gpu_id=0, y_test=self.y_train, predict_batch_size=self.batch_size_predictor) if self.problem == 'classification': y_pred_diff = np.abs(self.y_train_onehot - y_train_valid_pred) elif self.problem == 'regression': y_pred_diff = \ np.abs(self.y_train_onehot - y_train_valid_pred) / \ self.y_train_onehot #Disable GPU Usage # os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Main session session_conf = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False) sess = tf.Session(config=session_conf) sess.run(tf.global_variables_initializer()) # Model save at the end saver = tf.train.Saver(dve_vars) for _ in tqdm.tqdm(range(self.outer_iterations)): # Batch selection batch_idx = \ np.random.permutation(len(self.x_train))[ :self.batch_size] x_batch = self.x_train[batch_idx] y_batch_onehot = self.y_train_onehot[batch_idx] y_batch = self.y_train[batch_idx] y_hat_batch = y_pred_diff[batch_idx] LogUtil.log('INFO', 'hhhhhhhhhhhhhhhhhhhh') x_train_class = SGCNData(self.x_train, self.y_train, 4) alias_inputs, A, items, node_masks, targets = x_train_class.get_slice( batch_idx) LogUtil.log('INFO', 'Start to generate selection probability') # Generates selection probability print(x_batch) print(items) print(A) print(y_input) est_dv_curr = sess.run( est_data_value, feed_dict={ self.A: A, # Liu Chenxu add #self.x_input: x_batch, self.items: items, self.node_masks: node_masks, self.y_input: y_batch_onehot, self.y_hat_input: y_hat_batch }) LogUtil.log('INFO', 'End to generate selection probability') # Samples the selection probability sel_prob_curr = np.random.binomial(1, est_dv_curr, est_dv_curr.shape) # Exception (When selection probability is 0) if np.sum(sel_prob_curr) == 0: est_dv_curr = 0.5 * np.ones(np.shape(est_dv_curr)) sel_prob_curr = np.random.binomial(1, est_dv_curr, est_dv_curr.shape) # Trains predictor flatten_sel_prob_curr = sel_prob_curr.flatten() weighted_x_batch = x_batch[np.where(flatten_sel_prob_curr > 0)] weighted_y_batch = y_batch[np.where(flatten_sel_prob_curr > 0)] LogUtil.log('INFO', "Start to train new model.") # new_model_batch_size = len(weighted_x_batch) new_model_path = train_sgcn(self.hidden_dim, self.label_dim, self.n_nodes, 1, weighted_x_batch, weighted_y_batch, 50, \ 'tmp/sgcn_as_predict_new_model', step_save_model=8, lr=0.001, epoch=self.inner_iterations) LogUtil.log('INFO', "New model training done.") LogUtil.log('INFO', new_model_path) # Prediction y_valid_hat = eval_sgcn_prediction(self.x_valid, window=4, model_path=new_model_path, \ gpu_id=0, y_test=self.y_valid, predict_batch_size=self.batch_size_predictor) LogUtil.log('INFO', "Evaluate with new model done.") # Reward computation if perf_metric == 'auc': rgvn_perf = metrics.roc_auc_score( # self.y_valid, y_valid_hat[:, 1]) self.y_valid_onehot, y_valid_hat) elif perf_metric == 'accuracy': rgvn_perf = metrics.accuracy_score( self.y_valid, np.argmax(y_valid_hat, axis=1)) elif perf_metric == 'log_loss': rgvn_perf = -metrics.log_loss(self.y_valid, y_valid_hat) elif perf_metric == 'rmspe': rgvn_perf = rgvn_metrics.rmspe(self.y_valid, y_valid_hat) elif perf_metric == 'mae': rgvn_perf = metrics.mean_absolute_error( self.y_valid, y_valid_hat) elif perf_metric == 'mse': rgvn_perf = metrics.mean_squared_error(self.y_valid, y_valid_hat) if self.problem == 'classification': reward_curr = rgvn_perf - valid_perf elif self.problem == 'regression': reward_curr = valid_perf - rgvn_perf LogUtil.log('INFO', 'Start to train the generator') # Trains the generator _, _ = sess.run( [dve_solver, dve_loss], feed_dict={ self.A: A, self.items: items, self.node_masks: node_masks, self.y_input: y_batch_onehot, self.y_hat_input: y_hat_batch, self.s_input: sel_prob_curr, self.reward_input: reward_curr }) LogUtil.log('INFO', 'End to train the generator') # Saves trained model saver.save(sess, self.checkpoint_file_name) LogUtil.log('INFO', "Saved trained rgvn model.")
def get_stage_1_loss(pred_labels_key_p,pred_labels_direction,pred_regression_direction,pred_regression_position, \ pred_labels_type,labels_key_p,labels_direction,regression_direction,regression_position,labels_type,\ simmat_pl,neg_simmat_pl,pred_simmat,pred_conf_logits): batch_size = pred_labels_key_p.get_shape()[0].value num_point = pred_labels_key_p.get_shape()[1].value mask = tf.cast(labels_key_p, tf.float32) neg_mask = tf.ones_like(mask) - mask Np = tf.expand_dims(tf.reduce_sum(mask, axis=1), 1) Ng = tf.expand_dims(tf.reduce_sum(neg_mask, axis=1), 1) all_mask = tf.ones_like(mask) #loss:task1 task_1_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_labels_key_p, labels=labels_key_p) * (mask * (Ng / Np) + 1)) task_1_recall = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_key_p,axis=2,output_type = tf.int32),\ labels_key_p),tf.float32)*mask,axis = 1)/tf.reduce_sum(mask,axis=1)) task_1_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_key_p,axis=2,output_type = tf.int32),\ labels_key_p),tf.float32),axis = 1)/num_point) #loss:task2_1 task_2_1_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = pred_labels_direction,\ labels = labels_direction)*mask,axis = 1)/tf.reduce_sum(mask,axis=1)) task_2_1_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_direction,axis=2,output_type=tf.int32), \ labels_direction),tf.float32)*mask,axis=1)/tf.reduce_sum(mask,axis=1)) #loss:task2_2 task_2_2_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(smooth_l1_dist(pred_regression_direction-regression_direction),axis=2)*mask, \ axis = 1)/tf.reduce_sum(mask,axis=1)) #loss:task3 task_3_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(smooth_l1_dist(pred_regression_position-regression_position),axis=2)*mask, \ axis = 1)/tf.reduce_sum(mask,axis=1)) #loss:task4 task_4_loss = tf.reduce_mean( tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_labels_type, labels=labels_type) * mask, axis=1) / tf.reduce_sum(mask, axis=1)) task_4_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_type,axis=2,output_type = tf.int32),\ labels_type),tf.float32)*mask,axis = 1)/tf.reduce_sum(mask,axis=1)) #loss: task_5 pos = pred_simmat * simmat_pl neg = tf.maximum(80 - pred_simmat, 0) * neg_simmat_pl task_5_loss = tf.reduce_mean(pos + neg) #loss: task_6 ng_label = tf.greater(simmat_pl, 0.5) ng = tf.less(pred_simmat, 80) epsilon = tf.constant( np.ones(ng_label.get_shape()[:2]).astype(np.float32) * 1e-6) pts_iou = tf.reduce_sum(tf.cast(tf.logical_and(ng, ng_label), tf.float32), axis=2) / \ (tf.reduce_sum(tf.cast(tf.logical_or(ng, ng_label), tf.float32), axis=2) + epsilon) task_6_loss = tf.reduce_mean( tf.squared_difference(pts_iou, tf.squeeze(pred_conf_logits, [2]))) w1 = 1 w2_1 = 1 w2_2 = 100 w3 = 100 w4 = 1 w5 = 1 w6 = 100 loss = task_1_loss * w1 + task_2_1_loss * w2_1 + task_2_2_loss * w2_2 + task_3_loss * w3 + task_4_loss * w4 + task_5_loss * w5 + task_6_loss * w6 tf.summary.scalar('all loss', loss) tf.add_to_collection('losses', loss) return task_1_loss, task_1_recall, task_1_acc, task_2_1_loss, task_2_1_acc, task_2_2_loss, task_3_loss, task_4_loss, task_4_acc, task_5_loss, task_6_loss, loss
def loss_fn(features, mode, params): """Computes the training loss for depth and egomotion training. This function is written with TPU-friendlines in mind. Args: features: A dictionary mapping strings to tuples of (tf.Tensor, tf.Tensor), representing pairs of frames. The loss will be calculated from these tensors. The expected endpoints are 'rgb', 'depth', 'intrinsics_mat' and 'intrinsics_mat_inv'. mode: One of tf.estimator.ModeKeys: TRAIN, PREDICT or EVAL. params: A dictionary with hyperparameters that optionally override DEFAULT_PARAMS above. Returns: A dictionary mapping each loss name (see DEFAULT_PARAMS['loss_weights']'s keys) to a scalar tf.Tensor representing the respective loss. The total training loss. Raises: ValueError: `features` endpoints that don't conform with their expected structure. """ params = parameter_container.ParameterContainer.from_defaults_and_overrides( DEFAULT_PARAMS, params, is_strict=True, strictness_depth=2) if len(features['rgb']) != 2 or 'depth' in features and len( features['depth']) != 2: raise ValueError( 'RGB and depth endpoints are expected to be a tuple of two' ' tensors. Rather, they are %s.' % str(features)) # On tpu we strive to stack tensors together and perform ops once on the # entire stack, to save time HBM memory. We thus stack the batch-of-first- # frames and the batch-of-second frames, for both depth and RGB. The batch # dimension of rgb_stack and gt_depth_stack are thus twice the original batch # size. rgb_stack = tf.concat(features['rgb'], axis=0) depth_predictor = depth_prediction_nets.ResNet18DepthPredictor( mode, params.depth_predictor_params.as_dict()) predicted_depth = depth_predictor.predict_depth(rgb_stack) maybe_summary.histogram('PredictedDepth', predicted_depth) endpoints = {} endpoints['predicted_depth'] = tf.split(predicted_depth, 2, axis=0) endpoints['rgb'] = features['rgb'] # We make the heuristic that depths that are less than 0.2 meters are not # accurate. This is a rough placeholder for a confidence map that we're going # to have in future. if 'depth' in features: endpoints['groundtruth_depth'] = features['depth'] if params.cascade: motion_features = [ tf.concat([features['rgb'][0], endpoints['predicted_depth'][0]], axis=-1), tf.concat([features['rgb'][1], endpoints['predicted_depth'][1]], axis=-1) ] else: motion_features = features['rgb'] motion_features_stack = tf.concat(motion_features, axis=0) flipped_motion_features_stack = tf.concat(motion_features[::-1], axis=0) # Unlike `rgb_stack`, here we stacked the frames in reverse order along the # Batch dimension. By concatenating the two stacks below along the channel # axis, we create the following tensor: # # Channel dimension (3) # _ _ # | Frame1-s batch | Frame2-s batch |____Batch # |_ Frame2-s batch | Frame1-s batch _| dimension (0) # # When we send this tensor to the motion prediction network, the first and # second halves of the result represent the camera motion from Frame1 to # Frame2 and from Frame2 to Frame1 respectively. Further below we impose a # loss that drives these two to be the inverses of one another # (cycle-consistency). pairs = tf.concat([motion_features_stack, flipped_motion_features_stack], axis=-1) rot, trans, residual_translation, intrinsics_mat = ( object_motion_nets.motion_field_net( images=pairs, weight_reg=params.motion_prediction_params.weight_reg, align_corners=params.motion_prediction_params.align_corners, auto_mask=params.motion_prediction_params.auto_mask)) if params.motion_field_burnin_steps > 0.0: step = tf.to_float(tf.train.get_or_create_global_step()) burnin_steps = tf.to_float(params.motion_field_burnin_steps) residual_translation *= tf.clip_by_value(2 * step / burnin_steps - 1, 0.0, 1.0) # If using grouth truth egomotion if not params.learn_egomotion: egomotion_mat = tf.concat(features['egomotion_mat'], axis=0) rot = transform_utils.angles_from_matrix(egomotion_mat[:, :3, :3]) trans = egomotion_mat[:, :3, 3] trans = tf.expand_dims(trans, 1) trans = tf.expand_dims(trans, 1) if params.use_mask: mask = tf.to_float(tf.concat(features['mask'], axis=0) > 0) if params.foreground_dilation > 0: pool_size = params.foreground_dilation * 2 + 1 mask = tf.nn.max_pool(mask, [1, pool_size, pool_size, 1], [1] * 4, 'SAME') residual_translation *= mask maybe_summary.histogram('ResidualTranslation', residual_translation) maybe_summary.histogram('BackgroundTranslation', trans) maybe_summary.histogram('Rotation', rot) endpoints['residual_translation'] = tf.split(residual_translation, 2, axis=0) endpoints['background_translation'] = tf.split(trans, 2, axis=0) endpoints['rotation'] = tf.split(rot, 2, axis=0) if not params.learn_intrinsics.enabled: endpoints['intrinsics_mat'] = features['intrinsics_mat'] endpoints['intrinsics_mat_inv'] = features['intrinsics_mat_inv'] elif params.learn_intrinsics.per_video: int_mat = intrinsics_utils.create_and_fetch_intrinsics_per_video_index( features['video_index'][0], params.image_preprocessing.image_height, params.image_preprocessing.image_width, max_video_index=params.learn_intrinsics.max_number_of_videos) endpoints['intrinsics_mat'] = tf.concat([int_mat] * 2, axis=0) endpoints[ 'intrinsics_mat_inv'] = intrinsics_utils.invert_intrinsics_matrix( int_mat) else: # The intrinsic matrix should be the same, no matter the order of # images (mat = inv_mat). It's probably a good idea to enforce this # by a loss, but for now we just take their average as a prediction for the # intrinsic matrix. intrinsics_mat = 0.5 * sum(tf.split(intrinsics_mat, 2, axis=0)) endpoints['intrinsics_mat'] = [intrinsics_mat] * 2 endpoints['intrinsics_mat_inv'] = [ intrinsics_utils.invert_intrinsics_matrix(intrinsics_mat) ] * 2 aggregator = loss_aggregator.DepthMotionFieldLossAggregator( endpoints, params.loss_weights.as_dict(), params.loss_params.as_dict()) # Add some more summaries. maybe_summary.image('rgb0', features['rgb'][0]) maybe_summary.image('rgb1', features['rgb'][1]) disp0, disp1 = tf.split(aggregator.output_endpoints['disparity'], 2, axis=0) maybe_summary.image('disparity0/grayscale', disp0) maybe_summary.image_with_colormap('disparity0/plasma', tf.squeeze(disp0, axis=3), 'plasma', 0.0) maybe_summary.image('disparity1/grayscale', disp1) maybe_summary.image_with_colormap('disparity1/plasma', tf.squeeze(disp1, axis=3), 'plasma', 0.0) if maybe_summary.summaries_enabled(): if 'depth' in features: gt_disp0 = 1.0 / tf.maximum(features['depth'][0], 0.5) gt_disp1 = 1.0 / tf.maximum(features['depth'][1], 0.5) maybe_summary.image('disparity_gt0', gt_disp0) maybe_summary.image('disparity_gt1', gt_disp1) depth_proximity_weight0, depth_proximity_weight1 = tf.split( aggregator.output_endpoints['depth_proximity_weight'], 2, axis=0) maybe_summary.image('consistency_weight0', tf.expand_dims(depth_proximity_weight0, -1)) maybe_summary.image('consistency_weight1', tf.expand_dims(depth_proximity_weight1, -1)) maybe_summary.image('trans', aggregator.output_endpoints['trans']) maybe_summary.image('trans_inv', aggregator.output_endpoints['inv_trans']) maybe_summary.image('trans_res', endpoints['residual_translation'][0]) maybe_summary.image('trans_res_inv', endpoints['residual_translation'][1]) return aggregator.losses
def resize_and_crop_image_v2(image, short_side, long_side, padded_size, aug_scale_min=1.0, aug_scale_max=1.0, seed=1, method=tf.image.ResizeMethod.BILINEAR): """Resizes the input image to output size (Faster R-CNN style). Resize and pad images given the specified short / long side length and the stride size. Here are the preprocessing steps. 1. For a given image, keep its aspect ratio and first try to rescale the short side of the original image to `short_side`. 2. If the scaled image after 1 has a long side that exceeds `long_side`, keep the aspect ratio and rescal the long side of the image to `long_side`. 2. Pad the rescaled image to the padded_size. Args: image: a `Tensor` of shape [height, width, 3] representing an image. short_side: a scalar `Tensor` or `int` representing the desired short side to be rescaled to. long_side: a scalar `Tensor` or `int` representing the desired long side to be rescaled to. padded_size: a `Tensor` or `int` list/tuple of two elements representing [height, width] of the padded output image size. Padding will be applied after scaling the image to the desired_size. aug_scale_min: a `float` with range between [0, 1.0] representing minimum random scale applied to desired_size for training scale jittering. aug_scale_max: a `float` with range between [1.0, inf] representing maximum random scale applied to desired_size for training scale jittering. seed: seed for random scale jittering. method: function to resize input image to scaled image. Returns: output_image: `Tensor` of shape [height, width, 3] where [height, width] equals to `output_size`. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [desired_height, desired_width], [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, desired_width] is the actual scaled image size, and [y_scale, x_scale] is the scaling factor, which is the ratio of scaled dimension / original dimension. """ with tf.name_scope('resize_and_crop_image_v2'): image_size = tf.cast(tf.shape(image)[0:2], tf.float32) scale_using_short_side = (short_side / tf.minimum(image_size[0], image_size[1])) scale_using_long_side = (long_side / tf.maximum(image_size[0], image_size[1])) scaled_size = tf.round(image_size * scale_using_short_side) scaled_size = tf.where( tf.greater(tf.maximum(scaled_size[0], scaled_size[1]), long_side), tf.round(image_size * scale_using_long_side), scaled_size) desired_size = scaled_size random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0) if random_jittering: random_scale = tf.random_uniform([], aug_scale_min, aug_scale_max, seed=seed) scaled_size = tf.round(random_scale * scaled_size) # Computes 2D image_scale. image_scale = scaled_size / image_size # Selects non-zero random offset (x, y) if scaled image is larger than # desired_size. if random_jittering: max_offset = scaled_size - desired_size max_offset = tf.where(tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) offset = max_offset * tf.random_uniform([ 2, ], 0, 1, seed=seed) offset = tf.cast(offset, tf.int32) else: offset = tf.zeros((2, ), tf.int32) scaled_image = tf.image.resize_images(image, tf.cast(scaled_size, tf.int32), method=method) if random_jittering: scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0], offset[1]:offset[1] + desired_size[1], :] output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, padded_size[0], padded_size[1]) image_info = tf.stack([ image_size, tf.cast(desired_size, dtype=tf.float32), image_scale, tf.cast(offset, tf.float32) ]) return output_image, image_info
def aggregate_task_losses(hparams, problem_hparams, logits, feature_name, feature): """Multiproblem loss function.""" # If no reweighting, we want the default loss to mimic the LM loss. if not hparams.multiproblem_reweight_label_loss: return aggregate_task_lm_losses(hparams=hparams, problem_hparams=problem_hparams, logits=logits, feature_name=feature_name, feature=feature) summaries = [] main_task_id = hparams.problem.task_list[0].task_id vocab_size = problem_hparams.vocab_size[feature_name] if vocab_size is not None and hasattr(hparams, "vocab_divisor"): vocab_size += (-vocab_size) % hparams.vocab_divisor modality = problem_hparams.modality[feature_name] loss = hparams.loss.get(feature_name, modalities.get_loss(modality)) weights_fn = hparams.weights_fn.get(feature_name, modalities.get_weights_fn(modality)) # Primary task loss loss_num, loss_den = loss( logits, feature, lambda x: common_layers.weights_multi_problem_all(x, main_task_id), hparams, vocab_size, weights_fn) loss_val = loss_num / tf.maximum(1.0, loss_den) summaries.append([hparams.problem.task_list[0].name + "_loss", loss_val]) # Since the losses may undergo rescaling, they cannot exist as separate # numerators and denominators. Set the denominators to 1 in order to faciliate # loss averaging. loss_num = loss_val loss_den = tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32), loss_den) for task in hparams.problem.task_list[1:]: # Loss only from the input sequence -- the auxiliary LM loss. seq_loss_num, seq_loss_den = loss( logits, feature, lambda x: common_layers.weights_multi_problem_input( x, task.task_id), # pylint: disable=cell-var-from-loop hparams, vocab_size) seq_loss_num *= problem_hparams.loss_multiplier # Unscaled sequence loss. seq_loss = seq_loss_num / tf.maximum(1.0, seq_loss_den) summaries.append([task.name + "_seq_loss", seq_loss]) if hasattr(task, "num_classes"): # Loss only from the classification label. label_loss_num, label_loss_den = loss( logits, feature, lambda x: common_layers.weights_multi_problem(x, task.task_id), # pylint: disable=cell-var-from-loop hparams, vocab_size) label_loss_num *= problem_hparams.loss_multiplier # Unscaled classification label loss. label_loss = label_loss_num / tf.maximum(1.0, label_loss_den) summaries.append([task.name + "_label_loss", label_loss]) # Scaling. if hparams.multiproblem_reweight_label_loss: label_loss *= hparams.multiproblem_label_weight seq_loss *= (1 - hparams.multiproblem_label_weight) # This is the training loss for the optimizer after scaling. task_loss_val = seq_loss + label_loss loss_den_ = label_loss_den else: # Loss only from the target sequence. target_loss_num, target_loss_den = loss( logits, feature, lambda x: common_layers.weights_multi_problem(x, task.task_id), # pylint: disable=cell-var-from-loop hparams, vocab_size) target_loss_num *= problem_hparams.loss_multiplier # Unscaled target sequence loss. target_loss = target_loss_num / tf.maximum(1.0, target_loss_den) summaries.append([task.name + "_target_loss", target_loss]) # Scaling. if hparams.multiproblem_reweight_label_loss: target_loss *= hparams.multiproblem_label_weight seq_loss *= (1 - hparams.multiproblem_label_weight) # This is the training loss for the optimizer after all the scaling. task_loss_val = seq_loss + target_loss loss_den_ = target_loss_den summaries.append([task.name + "_loss", task_loss_val]) # Adding 1 to the loss den for each task leads to averaging task losses. # TODO(urvashik): Fix combination with other task losses - weighted # average based on the number of examples from that task. loss_num += task_loss_val loss_den += tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32), loss_den_) return loss_num, loss_den, summaries
def multilevel_crop_and_resize(features, boxes, output_size=7): """Crop and resize on multilevel feature pyramid. Generate the (output_size, output_size) set of pixels for each input box by first locating the box into the correct feature level, and then cropping and resizing it using the correspoding feature map of that level. Args: features: A dictionary with key as pyramid level and value as features. The features are in shape of [batch_size, height_l, width_l, num_filters]. boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents a box with [y1, x1, y2, x2] in un-normalized coordinates. output_size: A scalar to indicate the output crop size. Returns: A 5-D tensor representing feature crop of shape [batch_size, num_boxes, output_size, output_size, num_filters]. """ with tf.name_scope('multilevel_crop_and_resize'): levels = list(features.keys()) min_level = min(levels) max_level = max(levels) batch_size, max_feature_height, max_feature_width, num_filters = ( features[min_level].get_shape().as_list()) _, num_boxes, _ = boxes.get_shape().as_list() # Stack feature pyramid into a features_all of shape # [batch_size, levels, height, width, num_filters]. features_all = [] feature_heights = [] feature_widths = [] for level in range(min_level, max_level + 1): shape = features[level].get_shape().as_list() feature_heights.append(shape[1]) feature_widths.append(shape[2]) # Concat tensor of [batch_size, height_l * width_l, num_filters] for each # levels. features_all.append( tf.reshape(features[level], [batch_size, -1, num_filters])) features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters]) # Calculate height_l * width_l for each level. level_dim_sizes = [ feature_widths[i] * feature_heights[i] for i in range(len(feature_widths)) ] # level_dim_offsets is accumulated sum of level_dim_size. level_dim_offsets = [0] for i in range(len(feature_widths) - 1): level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i]) batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1] level_dim_offsets = tf.constant(level_dim_offsets, tf.int32) height_dim_sizes = tf.constant(feature_widths, tf.int32) # Assigns boxes to the right level. box_width = boxes[:, :, 3] - boxes[:, :, 1] box_height = boxes[:, :, 2] - boxes[:, :, 0] areas_sqrt = tf.sqrt(box_height * box_width) levels = tf.cast( tf.floordiv(tf.log(tf.div(areas_sqrt, 224.0)), tf.log(2.0)) + 4.0, dtype=tf.int32) # Maps levels between [min_level, max_level]. levels = tf.minimum(max_level, tf.maximum(levels, min_level)) # Projects box location and sizes to corresponding feature levels. scale_to_level = tf.cast(tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)), dtype=boxes.dtype) boxes /= tf.expand_dims(scale_to_level, axis=2) box_width /= scale_to_level box_height /= scale_to_level boxes = tf.concat([ boxes[:, :, 0:2], tf.expand_dims(box_height, -1), tf.expand_dims(box_width, -1) ], axis=-1) # Maps levels to [0, max_level-min_level]. levels -= min_level level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32)) boundary = tf.cast( tf.concat([ tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] / level_strides - 1, axis=-1), tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] / level_strides - 1, axis=-1), ], axis=-1), boxes.dtype) # Compute grid positions. kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions( boxes, boundary, output_size, sample_offset=0.5) x_indices = tf.cast(tf.reshape( box_gridx0x1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) y_indices = tf.cast(tf.reshape( box_gridy0y1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) batch_size_offset = tf.tile( tf.reshape( tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]), [1, num_boxes, output_size * 2, output_size * 2]) # Get level offset for each box. Each box belongs to one level. levels_offset = tf.tile( tf.reshape(tf.gather(level_dim_offsets, levels), [batch_size, num_boxes, 1, 1]), [1, 1, output_size * 2, output_size * 2]) y_indices_offset = tf.tile( tf.reshape( y_indices * tf.expand_dims(tf.gather(height_dim_sizes, levels), -1), [batch_size, num_boxes, output_size * 2, 1]), [1, 1, 1, output_size * 2]) x_indices_offset = tf.tile( tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]), [1, 1, output_size * 2, 1]) indices = tf.reshape( batch_size_offset + levels_offset + y_indices_offset + x_indices_offset, [-1]) # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar # performance. features_per_box = tf.reshape(tf.gather(features_r2, indices), [ batch_size, num_boxes, output_size * 2, output_size * 2, num_filters ]) # Bilinear interpolation. features_per_box = feature_bilinear_interpolation( features_per_box, kernel_y, kernel_x) return features_per_box