def estimator_metric_fn(self, detections, groundtruth_data): """Constructs the metric function for tf.TPUEstimator. For each metric, we return the evaluation op and an update op; the update op is shared across all metrics and simply appends the set of detections to the `self.detections` list. The metric op is invoked after all examples have been seen and computes the aggregate COCO metrics. Please find details API in: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/MetricSpec Args: detections: Detection results in a tensor with each row representing [image_id, x, y, width, height, score, class] groundtruth_data: Groundtruth annotations in a tensor with each row representing [y1, x1, y2, x2, is_crowd, area, class]. Returns: metrics_dict: A dictionary mapping from evaluation name to a tuple of operations (`metric_op`, `update_op`). `update_op` appends the detections for the metric to the `self.detections` list. """ with tf.name_scope('coco_metric'): if self.testdev_dir: update_op = tf.numpy_function(self.update_state, [groundtruth_data, detections], []) metrics = tf.numpy_function(self.result, [], tf.float32) metrics_dict = {'AP': (metrics, update_op)} return metrics_dict else: update_op = tf.numpy_function(self.update_state, [groundtruth_data, detections], []) metrics = tf.numpy_function(self.result, [], tf.float32) metrics_dict = {} for i, name in enumerate(self.metric_names): metrics_dict[name] = (metrics[i], update_op) return metrics_dict
def loss(summarizer_output, y_truth): with summary_graph.as_default(): y_synth = generator(summarizer_output)["output_1"] fake_score = discriminator(y_synth)["output_1"] hist1 = tf_v1.numpy_function(hists, [summarizer_output], tf_v1.float32) hist2 = tf_v1.numpy_function(hists, [y_truth], tf_v1.float32) color_loss = gamma*(tf_v1.reduce_sum(((hist1-hist2)**2))) summarizer_loss = beta * (1 - tf_v2.image.ssim(y_synth, y_truth, 255)) discriminator_loss = alpha*(1 - fake_score) return summarizer_loss + discriminator_loss + color_loss
def _build_reward_op(self): off = self.epsilon_eval / self.num_actions on = (1 - self.epsilon_eval) + off s = self._replay.transition['traj_state'] a = self._replay.transition['traj_action'] r = self._replay.transition['traj_reward'] if self.qlambda: p = tf.constant(1.0, shape=r.shape, dtype=r.dtype) off, on = 0.0, 1.0 elif self.uniform_propensities: p = tf.constant(1.0 / self.num_actions, shape=r.shape, dtype=r.dtype) else: p = self._replay.transition['traj_prob'] gamma = self._replay.transition['traj_discount'] state_shape = self.observation_shape + (self.stack_size,) flat_s = tf.reshape(s, shape=(-1,) + state_shape) # b*h x 84 x 84 x 4 flat_qs = tf.stop_gradient(self.target_convnet(flat_s).q_heads) # b*h x num_actions x num_heads flat_qmax = tf.argmax(flat_qs, axis=1) # b*h x num_heads flat_pi = tf.one_hot(flat_qmax, depth=self.num_actions, axis=1, on_value=on, off_value=off) # b*h x num_actions x num_heads flat_a = tf.reshape(a, (-1,)) # b*h action_mask = tf.one_hot(flat_a, depth=self.num_actions, dtype=tf.bool, on_value=True, off_value=False) # b*h x num_actions flat_behavior_probs = tf.boolean_mask(flat_pi, action_mask) #b*h x num_heads behavior_probs = tf.reshape(flat_behavior_probs, (-1, self.update_horizon, self.num_heads)) #b x h x num_heads p_heads = tf.expand_dims(p, axis=-1) # b x h x 1 flassimp = behavior_probs / p_heads # b x h x num_heads # NB: tensorflow sucks def assign_ones(w): w[:, 0, :] = 1 return w importance_weights = tf.numpy_function(assign_ones, [ flassimp ], tf.float32) # b x h x num_heads w = tf.math.cumprod(importance_weights, axis=1) #b x h x num_heads if self.rmin == 0: q = tf.numpy_function(lambda *args: self.iiwlbmommulti.tfhook(*args), [ gamma, w, r ], tf.float32) # b x num_heads else: q = tf.numpy_function(lambda *args: self.iiwlbmommulti.tfhook(*args), [ gamma, w, r - self.rmin ], tf.float32) # b x num_heads if self.summary_writer is not None: duals = tf.numpy_function(lambda *args: self.iiwlbmommulti.dualstfhook(*args), [ ], tf.float32) # 4 x num_heads meanduals = tf.reduce_mean(duals, axis=-1) with tf.compat.v1.variable_scope('Duals'): tf.compat.v1.summary.scalar('v', meanduals[0]) tf.compat.v1.summary.scalar('alpha', meanduals[1]) tf.compat.v1.summary.scalar('beta', meanduals[2]) tf.compat.v1.summary.scalar('kappa', meanduals[3]) biggamma = tf.expand_dims(gamma, axis=-1) # b x h x 1 bigr = tf.expand_dims(r, axis=-1) # b x h x 1 return q * tf.reduce_sum(biggamma * w * bigr, axis=1)
def complete_augmenter(img_tf, ann_tf, output_height, output_width): img_tf, ann_tf = tf.numpy_function(func=crop_augmenter, inp=[img_tf, ann_tf], Tout=[tf.float32, tf.float32]) img_tf, ann_tf = tf.numpy_function(func=padding_augmenter, inp=[img_tf, ann_tf], Tout=[tf.float32, tf.float32]) img_tf, ann_tf = tf.numpy_function(func=horizontal_flip_augmenter, inp=[img_tf, ann_tf], Tout=[tf.float32, tf.float32]) img_tf, ann_tf = color_augmenter(img_tf, ann_tf) img_tf, ann_tf = hue_augmenter(img_tf, ann_tf) img_tf, ann_tf = resize_augmenter(img_tf, ann_tf, (output_height, output_width)) return img_tf, ann_tf
def decode_importance_sample(sample_index, p_loc, p_scale, seed, use_index=False): if use_index: index = sample_index - 1 samples = stateless_normal_sample( loc=p_loc, scale=p_scale, num_samples=tf.cast(index, tf.int32) + 1, seed=seed) return samples[-1:, ...] else: index, code_length = tf.numpy_function(elias_delta_decode, [sample_index], (tf.int64, tf.int64)) index = index - 1 samples = stateless_normal_sample( loc=p_loc, scale=p_scale, num_samples=tf.cast(index, tf.int32) + 1, seed=seed) return samples[-1:, ...], code_length, index, samples
def metric_fn(**kwargs): """Returns a dictionary that has the evaluation metrics.""" if params['nms_configs'].get('pyfunc', True): detections_bs = [] for index in range(kwargs['boxes'].shape[0]): nms_configs = params['nms_configs'] detections = tf.numpy_function( functools.partial(nms_np.per_class_nms, nms_configs=nms_configs), [ kwargs['boxes'][index], kwargs['scores'][index], kwargs['classes'][index], tf.slice(kwargs['image_ids'], [index], [1]), tf.slice(kwargs['image_scales'], [index], [1]), params['num_classes'], nms_configs['max_output_size'], ], tf.float32) detections_bs.append(detections) else: # These two branches should be equivalent, but currently they are not. # TODO(tanmingxing): enable the non_pyfun path after bug fix. nms_boxes, nms_scores, nms_classes, _ = postprocess.per_class_nms( params, kwargs['boxes'], kwargs['scores'], kwargs['classes'], kwargs['image_scales']) img_ids = tf.cast( tf.expand_dims(kwargs['image_ids'], -1), nms_scores.dtype) detections_bs = [ img_ids * tf.ones_like(nms_scores), nms_boxes[:, :, 1], nms_boxes[:, :, 0], nms_boxes[:, :, 3] - nms_boxes[:, :, 1], nms_boxes[:, :, 2] - nms_boxes[:, :, 0], nms_scores, nms_classes, ] detections_bs = tf.stack(detections_bs, axis=-1, name='detnections') if params.get('testdev_dir', None): logging.info('Eval testdev_dir %s', params['testdev_dir']) eval_metric = coco_metric.EvaluationMetric( testdev_dir=params['testdev_dir']) coco_metrics = eval_metric.estimator_metric_fn(detections_bs, tf.zeros([1])) else: logging.info('Eval val with groudtruths %s.', params['val_json_file']) eval_metric = coco_metric.EvaluationMetric( filename=params['val_json_file']) coco_metrics = eval_metric.estimator_metric_fn( detections_bs, kwargs['groundtruth_data']) # Add metrics to output. cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics
def numpy_wrapper(inputs): def numpy_matching(input_matrix): row_indices, col_indices = linear_sum_assignment(input_matrix) match_results = np.full(input_matrix.shape[1], -1) match_results[col_indices] = row_indices return match_results.astype(np.int32) return tf.numpy_function(numpy_matching, inputs, Tout=[tf.int32])
def index_loop_step(i, indices): index = tf.numpy_function( from_bit_string, [tf.strings.substr(sample_index, i, n_bits_per_step)], tf.int64) index = tf.cast(index, tf.int32) return [i + n_bits_per_step, tf.concat((indices, [index]), axis=0)]
def complete_augmenter(img_tf, ann_tf, seed_tf, alpha_tf, output_height, output_width): img_tf, ann_tf = tf.numpy_function( func=numpy_augmenter, inp=[img_tf, ann_tf, seed_tf], Tout=[tf.float32, tf.float32], ) img_tf, ann_tf = color_augmenter(img_tf, ann_tf, alpha_tf[0:3]) img_tf, ann_tf = hue_augmenter(img_tf, ann_tf, alpha_tf[3:4]) img_tf, ann_tf = resize_augmenter(img_tf, ann_tf, (output_height, output_width)) return img_tf, ann_tf
def loss(y_synth, y_truth): y_synth = tf_v1.image.resize(y_synth, (64, 64)) y_truth = (y_truth + 1.0)*127.5 y_synth_full = tf_v1.image.resize(y_synth, (256, 256)) y_truth = tf_v1.image.resize(y_truth, (64, 64)) fake_score = discriminator(y_synth_full)["discriminator_output"] hist1 = tf_v1.numpy_function(hists, [y_synth], tf_v1.float32) hist2 = tf_v1.numpy_function(hists, [y_truth], tf_v1.float32) wasserstein = summarizer.wasserstein.Wasserstein(1, hist1, hist2) color_loss = gamma*wasserstein.dist(C=.1, nsteps=10, reset=True) # color_loss = gamma * (tf_v1.reduce_sum(((hist1 - hist2) ** 2))) summarizer_loss = beta * (1 - tf_v2.image.ssim(y_synth, y_truth, 255)) discriminator_loss = alpha * (1 - tf_v1.nn.sigmoid(fake_score)) mse_loss = tf_v1.losses.mean_squared_error(y_synth, y_truth) nmse_loss = delta * mse_loss/(tf_v1.math.reduce_mean(y_synth) * tf_v1.math.reduce_mean(y_truth)) return (summarizer_loss + discriminator_loss + color_loss + nmse_loss)/(alpha + beta + gamma + delta)
def _parse_apply_preprocessing(self, images, labels): if not self.__label_from_image_file: # If we generated the heatmaps from points in a CSV or JSON file, then we want to treat the labels like # other labels, with the wrinkle that loading them requires wrapping a binary loader with tf.py_func images = self._parse_read_images(images, channels=self._image_depth) labels = tf.numpy_function(self._parse_load_heatmap_binary, [labels], tf.float32) return images, labels else: # If we instead read in the heatmaps as images, then we want to use the version in # SemanticSegmentationModel, which treats the labels like regular images. return super()._parse_apply_preprocessing(images, labels)
def draw_boxes(image_and_detections): """Draws boxes on image.""" true_shape = image_and_detections[0] original_shape = image_and_detections[1] if true_image_shape is not None: image = shape_utils.pad_or_clip_nd( image_and_detections[2], [true_shape[0], true_shape[1], 3]) if original_image_spatial_shape is not None: image_and_detections[2] = _resize_original_image(image, original_shape) image_with_boxes = tf.numpy_function(visualize_boxes_fn, image_and_detections[2:], tf.uint8) return image_with_boxes
def _build_reward_op(self): off = self.epsilon_eval / self.num_actions on = (1 - self.epsilon_eval) + off s = self._replay.transition['traj_state'] a = self._replay.transition['traj_action'] r = self._replay.transition['traj_reward'] p = self._replay.transition['traj_prob'] gamma = self._replay.transition['traj_discount'] state_shape = self.observation_shape + (self.stack_size,) flat_s = tf.reshape(s, shape=(-1,) + state_shape) # b*h x 84 x 84 x 4 flat_qs = tf.stop_gradient(self.target_convnet(flat_s).q_values) # b*h x num_actions flat_qmax = tf.argmax(flat_qs, axis=1) # b*h flat_pi = tf.one_hot(flat_qmax, depth=self.num_actions, axis=-1, on_value=on, off_value=off) # b*h x num_actions flat_a = tf.reshape(a, (-1,)) action_mask = tf.one_hot(flat_a, depth=self.num_actions, dtype=tf.bool, on_value=True, off_value=False) flat_behavior_probs = tf.boolean_mask(flat_pi, action_mask) #b*h behavior_probs = tf.reshape(flat_behavior_probs, (-1, self.update_horizon)) #b x h flassimp = behavior_probs / p #b x h # NB: tensorflow sucks def assign_ones(w): w[:, 0] = 1 return w importance_weights = tf.numpy_function(assign_ones, [ flassimp ], tf.float32) w = tf.math.cumprod(importance_weights, axis=1) #b x h #q = tf.numpy_function(lambda *args: self.mle.tfhook(*args), [ gamma, w, r ], tf.float32) #q = tf.numpy_function(lambda *args: self.ib.tfhook(*args), [ gamma, w, r ], tf.float32) #q = tf.numpy_function(lambda *args: self.iwlb.tfhook(*args), [ gamma, w, r ], tf.float32) q = tf.numpy_function(lambda *args: self.incriwlb.tfhook(*args), [ gamma, w, r ], tf.float32) if self.summary_writer is not None: duals = tf.numpy_function(lambda *args: self.incriwlb.dualstfhook(*args), [ ], tf.float32) with tf.compat.v1.variable_scope('Duals'): tf.compat.v1.summary.scalar('v', duals[0]) tf.compat.v1.summary.scalar('alpha', duals[1]) tf.compat.v1.summary.scalar('kappa', duals[2]) return q * tf.reduce_sum(gamma * w * r, axis=1) #b
def code_importance_sample(t_loc, t_scale, p_loc, p_scale, n_coding_bits, seed, return_index_only=False): target = tfd.Normal(loc=t_loc, scale=t_scale) proposal = tfd.Normal(loc=p_loc, scale=p_scale) #print("Taking {} samples per step".format(n_samples)) sample_index = [] kls = tfd.kl_divergence(target, proposal) total_kl = tf.reduce_sum(kls) num_samples = tf.cast(tf.math.ceil(tf.exp(total_kl)), tf.int32) # Set new seed #samples = proposal.sample(num_samples, seed=seed) samples = stateless_normal_sample(loc=p_loc, scale=p_scale, num_samples=num_samples, seed=seed) importance_weights = tf.reduce_sum(target.log_prob(samples) - proposal.log_prob(samples), axis=1) index = tf.argmax(importance_weights) best_sample = samples[index:index + 1, :] #index, best_sample = sess.run([idx, best_samp]) # if np.log(index + 1) / np.log(2) > n_coding_bits: # raise Exception("Not enough bits to code importance sample!") # Turn the index into a bitstring #bitcode = tf.numpy_function(to_bit_string, [index, n_coding_bits], tf.string) if return_index_only: return best_sample, index + 1 else: bitcode = tf.numpy_function(elias_delta_code, [index + 1], tf.string) return best_sample, bitcode
def kendall_tau_metric(predictions, ratings, weights=None): """Builds the computation graph for Kendall Tau metric.""" def _kendall_tau(x, y): tau = stats.kendalltau(x, y)[0] return np.array(tau).astype(np.float32) if weights is not None: predictions = tf.boolean_mask(predictions, weights) ratings = tf.boolean_mask(ratings, weights) with tf.variable_scope("kendall_tau"): concat_predictions_value, concat_labels_value, update_op = ( concat_tensors(predictions, ratings)) metric_value = tf.reshape(tf.numpy_function( _kendall_tau, [concat_predictions_value, concat_labels_value], tf.float32), shape=[]) return metric_value, update_op
def train_on_dataset(self): self.best_dev_auc = None X_dev, Y_dev = self.load_data_npy(self.FLAGS.dev_data_file, False) train_files = [ join(self.FLAGS.training_data_dir, f) for f in listdir(self.FLAGS.training_data_dir) if isfile(join(self.FLAGS.training_data_dir, f)) ] dataset = tf.data.Dataset.from_tensor_slices(train_files) dataset = dataset.shuffle(buffer_size=len(train_files)) dataset = dataset.map(lambda filename: tuple( tf.numpy_function(self.load_data_npy, [filename, True], [tf.float64, tf.float64])), num_parallel_calls=2) dataset = dataset.prefetch(buffer_size=1) iterator = dataset.make_initializable_iterator() X_iter, Y_iter = iterator.get_next() epoch = 0 while self.FLAGS.num_epochs == 0 or epoch < self.FLAGS.num_epochs: self.sess.run(iterator.initializer) tic_all = time.time() data_time = 0 while True: try: tic_data = time.time() X_train, Y_train = self.sess.run([X_iter, Y_iter]) toc_data = time.time() logging.debug('Load a batch took %.2f seconds' % (toc_data - tic_data)) data_time += toc_data - tic_data batch_size = X_train.shape[ 0] if self.FLAGS.batch_size == 0 else self.FLAGS.batch_size self.train_a_file(X_train, Y_train, batch_size, X_dev, Y_dev, epoch) except tf.errors.OutOfRangeError: break toc_all = time.time() logging.info( 'Epoch %d took %.2f seconds: %.2f seconds for loading data' % (epoch, toc_all - tic_all, data_time)) epoch += 1
def train(args): """Trains the model.""" import glob if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) else: tf.logging.set_verbosity(tf.logging.WARN) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob) if not train_files: raise RuntimeError( "No training images found with glob '{}'.".format( args.train_glob)) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle( buffer_size=len(train_files)).repeat() if 'npy' in args.train_glob: # reading numpy arrays directly instead of from images train_dataset = train_dataset.map( # https://stackoverflow.com/a/49459838 lambda item: tuple( tf.numpy_function(read_npy_file_helper, [item], [ tf.float32, ])), num_parallel_calls=args.preprocess_threads) else: train_dataset = train_dataset.map( read_img, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.map( lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3))) train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() # Instantiate model. model = create_model(args) loss_dict = model.compute_loss(x, args.likelihood_variance) train_loss = loss_dict['loss'] x_hat = loss_dict['x_hat'] train_mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) step = tf.train.create_global_step() nn_variables = model.inference_net.variables + model.generative_net.variables main_step = tf.train.AdamOptimizer(learning_rate=1e-4).minimize( train_loss, global_step=step, var_list=nn_variables) if args.learned_prior: if not args.prior_lr: args.prior_lr = 1e-3 prior_step = tf.train.AdamOptimizer( learning_rate=args.prior_lr).minimize( train_loss, var_list=model.prior.variables) train_op = tf.group(prior_step, main_step) else: train_op = main_step # aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) # aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) # train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), ] runname = get_runname(vars(args)) save_dir = os.path.join(args.checkpoint_dir, runname) if not os.path.exists(save_dir): os.makedirs(save_dir) import json import datetime with open(os.path.join(save_dir, 'record.txt'), 'a') as f: # keep more detailed record in text file f.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\n') f.write(json.dumps(vars(args), indent=4, sort_keys=True) + '\n') f.write('\n') with open(os.path.join(save_dir, 'args.json'), 'w') as f: # will overwrite existing json.dump(vars(args), f, indent=4, sort_keys=True) if args.log_dir != '': tf.summary.scalar("loss", train_loss) tf.summary.scalar("elbo", loss_dict['elbo']) tf.summary.scalar("likelihood", loss_dict['likelihood']) tf.summary.scalar("kl", loss_dict['kl']) tf.summary.scalar("mse", train_mse) tf.summary.scalar( "psnr", -10 * (tf.log(train_mse) / np.log(10))) # note MSE was computed on float images tf.summary.image("original", convert_float_to_uint8(x), max_outputs=2) tf.summary.image("reconstruction", convert_float_to_uint8(x_hat), max_outputs=2) summary_op = tf.summary.merge_all() tf_log_dir = os.path.join(args.log_dir, runname) summary_hook = tf.train.SummarySaverHook( save_secs=args.save_summary_secs, output_dir=tf_log_dir, summary_op=summary_op) hooks.append(summary_hook) with tf.train.MonitoredTrainingSession( hooks=hooks, checkpoint_dir=save_dir, save_checkpoint_secs=args.save_checkpoint_secs, save_summaries_secs=args.save_summary_secs) as sess: while not sess.should_stop(): sess.run(train_op) return model
def tf_np_load(inputs): y = tf.numpy_function(np_load, [inputs], tf.float32) return y
def resize_augmenter(image, annotation, output_shape): resize_method = _DEFAULT_AUG_PARAMS["resize_method"] def resize_PIL_image(image, output_shape): image *= 255.0 image = image.astype("uint8") pil_img = Image.fromarray(image) resize_img = pil_img.resize( (output_shape[1], output_shape[0]), resample=Image.BILINEAR ) np_img = np.array(resize_img) np_img = np_img.astype(np.float32) np_img /= 255.0 return np_img def resize_turicreate_image(image, output_shape): image *= 255.0 image = image.astype("uint8") FORMAT_RAW = 2 tc_image = tc.Image( _image_data=image.tobytes(), _width=image.shape[1], _height=image.shape[0], _channels=image.shape[2], _format_enum=FORMAT_RAW, _image_data_size=image.size, ) tc_image = tc.image_analysis.resize( tc_image, output_shape[1], output_shape[0], resample="bilinear" ) image = tc_image.pixel_data image = image.astype(np.float32) image /= 255.0 return image if resize_method == "tensorflow": new_height = tf.cast(output_shape[0], dtype=tf.int32) new_width = tf.cast(output_shape[1], dtype=tf.int32) # Determine the affine transform to apply and apply to the image itself. image_scaled = tf.squeeze( tf.image.resize_bilinear(tf.expand_dims(image, 0), [new_height, new_width]), [0], ) elif resize_method == "PIL": image_scaled = tf.numpy_function( func=resize_PIL_image, inp=[image, output_shape], Tout=[tf.float32] ) elif resize_method == "turicreate": image_scaled = tf.numpy_function( func=resize_turicreate_image, inp=[image, output_shape], Tout=[tf.float32] ) else: raise Exception("Non-supported resize method.") image_clipped = tf.clip_by_value(image_scaled, 0.0, 1.0) annotation = tf.clip_by_value(annotation, 0.0, 1.0) # No geometry changes (because of relative co-ordinate system) return image_clipped, annotation
def bleurt_preprocessing_ops(references, candidates): """Builds a computation graph for BLEURT tokenization and encoding.""" return tf.numpy_function(func=_py_encode, inp=[references, candidates], Tout=(tf.int64, tf.int64, tf.int64))
def group_pairwise_accuracy(predictions, ratings, group): """Builds the computation graph for Kendall Tau metric.""" # this sorts the results by group, and then splits them by model/year/lp. # what are the "predictions" and "ratings" objects??? def pairwise_accuracy(predictions, ratings, group): def to_components(index): return np.split(np.argsort(index), np.cumsum(np.unique(index, return_counts=True)[1])) split_groups = to_components( group ) # returns an array of arrays of indices (indices --> ratings array) grouped_scores = pd.DataFrame( columns=['bleurt_score', 'score', 'year_lp', 'group_name']) for group_idx in split_groups[:-1]: # last array is empty group_x, group_y = predictions[group_idx], ratings[group_idx] # save the group name + year/lp group_hash = group[group_idx[0]] year_lp = group_hash_dict[group_hash] # store the following [mean_bleurt_pred, mean_rating, year_lp, group] group_info = pd.DataFrame({ 'bleurt_score': [np.mean(group_x)], 'score': [np.mean(group_y)], 'year_lp': [year_lp], 'group_name': [group_hash] }) grouped_scores = grouped_scores.append(group_info) #debug the above series creation # logging.info("attempting to add row to 'grouped_scores'...\n") # logging.info(f"mean prediction: {np.mean(group_x)}, year/lp: {year_lp}") # logging.info(str(group_info)) #debug logging.info("Identified year, lp for all data points...\n") logging.info(str(grouped_scores.head())) # next, perform the pairwise score computation. total_pairs = 0 bleurt_accuracy = 0. for i, g in grouped_scores.groupby('year_lp'): for (_, row), (_, row_) in itertools.combinations(g.iterrows(), r=2): total_pairs += 1 if np.sign(row['bleurt_score'] - row_['bleurt_score']) == np.sign(row['score'] - row_['score']): bleurt_accuracy += 1 accuracy = bleurt_accuracy / total_pairs #debug logging.info( "Pairwise accuracy computed. Total language pairs evaluated: {}, total correctly assessed: {}" .format(str(total_pairs), str(bleurt_accuracy))) return np.array(accuracy).astype(np.float32) # what is this??? with tf.variable_scope("group_pairwise_accuracy"): concat_predictions_value, concat_labels_value, concat_groups_value, update_op = ( concat_tensors(predictions, ratings, group)) metric_value = tf.reshape(tf.numpy_function(pairwise_accuracy, [ concat_predictions_value, concat_labels_value, concat_groups_value ], tf.float32), shape=[]) return metric_value, update_op
def code_grouped_importance_sample(sess, target, proposal, seed, n_bits_per_group, max_group_size_bits=4, dim_kl_bit_limit=12, return_group_indices_only=False, return_indices=False, return_indices_only=False): # Make sure the distributions have the correct type if target.dtype is not tf.float32: raise Exception("Target datatype must be float32!") if proposal.dtype is not tf.float32: raise Exception("Proposal datatype must be float32!") num_dimensions = sess.run(tf.reduce_prod(tf.shape(proposal.loc))) # rescale proposal by the proposal p_loc = sess.run(tf.zeros_like(proposal.loc)) p_scale = sess.run(tf.ones_like(proposal.scale)) # rescale target by the proposal t_loc = (target.loc - proposal.loc) / proposal.scale t_scale = target.scale / proposal.scale # If we're going to do importance sampling, separate out dimensions with large KL, # we'll deal with them separately. kl_bits = tfd.kl_divergence(target, proposal) / np.log(2) t_loc = sess.run(tf.where(kl_bits <= dim_kl_bit_limit, t_loc, p_loc)) t_scale = sess.run(tf.where(kl_bits <= dim_kl_bit_limit, t_scale, p_scale)) # We'll send the quantized samples for dimensions with high KL outlier_indices = tf.where(kl_bits > dim_kl_bit_limit) target_samples = target.sample() # Select only the bits of the sample that are relevant outlier_samples = tf.gather_nd(target_samples, outlier_indices) # Halve precision outlier_samples = tfq.quantize(outlier_samples, -30, 30, tf.quint16).output outlier_extras = (tf.reshape(outlier_indices, [-1]), outlier_samples) kl_divergences = tfd.kl_divergence(tfd.Normal(loc=t_loc, scale=t_scale), tfd.Normal(loc=p_loc, scale=p_scale)) kl_divs = sess.run(kl_divergences) group_start_indices = [0] group_kls = [] total_kl_bits = np.sum(kl_divs) / np.log(2) print("Total KL to split up: {:.2f} bits, " "maximum bits per group: {}, " "estimated number of groups: {}," "coding {} dimensions".format(total_kl_bits, n_bits_per_group, total_kl_bits // n_bits_per_group + 1, num_dimensions)) current_group_size = 0 current_group_kl = 0 n_nats_per_group = n_bits_per_group * np.log(2) - 1 for idx in range(num_dimensions): group_bits = np.log(current_group_size + 1) / np.log(2) if group_bits > max_group_size_bits or \ current_group_kl + kl_divs[idx] > n_nats_per_group or \ idx == num_dimensions - 1: group_start_indices.append(idx) group_kls.append(current_group_kl / np.log(2)) current_group_size = 1 current_group_kl = kl_divs[idx] else: current_group_kl += kl_divs[idx] current_group_size += 1 print("Maximum group KL: {:.3f}".format(np.max(group_kls))) group_start_indices += [num_dimensions] group_start_indices = np.array(group_start_indices) if return_group_indices_only: return group_start_indices, group_kls # ====================================================================== # Sample each group # ====================================================================== results = [] # Get the importance sampling op before looping it to avoid graph construction cost # The length is variable, hence the shape is [None] target_loc = tf.placeholder(tf.float32, shape=[None]) target_scale = tf.placeholder(tf.float32, shape=[None]) prop_loc = tf.placeholder(tf.float32, shape=[None]) prop_scale = tf.placeholder(tf.float32, shape=[None]) seed_feed = tf.placeholder(tf.int32) result_ops = code_importance_sample(t_loc=target_loc, t_scale=target_scale, p_loc=prop_loc, p_scale=prop_scale, seed=seed_feed, n_coding_bits=n_bits_per_group, return_index_only=return_indices_only or return_indices) for i in tqdm(range(len(group_start_indices) - 1)): start_idx = group_start_indices[i] end_idx = group_start_indices[i + 1] result = sess.run(result_ops, feed_dict={ target_loc: t_loc[start_idx:end_idx], target_scale: t_scale[start_idx:end_idx], prop_loc: p_loc[start_idx:end_idx], prop_scale: p_scale[start_idx:end_idx], seed_feed: seed + i }) results.append(result) # To build probability distribution we return the indices only if return_indices_only: samples, indices = zip(*results) return indices if return_indices: samples, indices = zip(*results) else: samples, codes = zip(*results) bitcode = tf.numpy_function( lambda code_words: ''.join( [cw.decode("utf-8") for cw in code_words]), [codes], tf.string) sample = tf.concat(samples, axis=1) # Rescale the sample sample = proposal.scale * sample + proposal.loc sample = tf.where(kl_bits <= dim_kl_bit_limit, tf.squeeze(sample), target_samples) if return_indices: sample, outlier_extras = sess.run([sample, outlier_extras]) return sample, indices, group_start_indices, outlier_extras else: sample, bitcode, outlier_extras = sess.run( [sample, bitcode, outlier_extras]) return sample, bitcode, group_start_indices, outlier_extras
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None): """Model definition entry. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN and EVAL. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the model outputs class logits and box regression outputs. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. Raises: RuntimeError: if both ckpt and backbone_ckpt are set. """ is_tpu = params['strategy'] == 'tpu' if params['img_summary_steps']: utils.image('input_image', features, is_tpu) training_hooks = [] params['is_training_bn'] = (mode == tf.estimator.ModeKeys.TRAIN) if params['use_keras_model']: def model_fn(inputs): model = efficientdet_keras.EfficientDetNet( config=hparams_config.Config(params)) cls_out_list, box_out_list = model(inputs, params['is_training_bn']) cls_outputs, box_outputs = {}, {} for i in range(params['min_level'], params['max_level'] + 1): cls_outputs[i] = cls_out_list[i - params['min_level']] box_outputs[i] = box_out_list[i - params['min_level']] return cls_outputs, box_outputs else: model_fn = functools.partial(model, config=hparams_config.Config(params)) precision = utils.get_precision(params['strategy'], params['mixed_precision']) cls_outputs, box_outputs = utils.build_model_with_precision( precision, model_fn, features, params['is_training_bn']) # Set up training loss and learning rate. update_learning_rate_schedule_parameters(params) global_step = tf.train.get_or_create_global_step() learning_rate = learning_rate_schedule(params, global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. det_loss, cls_loss, box_loss, box_iou_loss = detection_loss( cls_outputs, box_outputs, labels, params) reg_l2loss = reg_l2_loss(params['weight_decay']) total_loss = det_loss + reg_l2loss if mode == tf.estimator.ModeKeys.TRAIN: utils.scalar('lrn_rate', learning_rate, is_tpu) utils.scalar('trainloss/cls_loss', cls_loss, is_tpu) utils.scalar('trainloss/box_loss', box_loss, is_tpu) utils.scalar('trainloss/det_loss', det_loss, is_tpu) utils.scalar('trainloss/reg_l2_loss', reg_l2loss, is_tpu) utils.scalar('trainloss/loss', total_loss, is_tpu) if params['iou_loss_type']: utils.scalar('trainloss/box_iou_loss', box_iou_loss, is_tpu) train_epochs = tf.cast(global_step, tf.float32) / params['steps_per_epoch'] utils.scalar('train_epochs', train_epochs, is_tpu) moving_average_decay = params['moving_average_decay'] if moving_average_decay: ema = tf.train.ExponentialMovingAverage( decay=moving_average_decay, num_updates=global_step) ema_vars = utils.get_ema_vars() if mode == tf.estimator.ModeKeys.TRAIN: if params['optimizer'].lower() == 'sgd': optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=params['momentum']) elif params['optimizer'].lower() == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) else: raise ValueError('optimizers should be adam or sgd') if is_tpu: optimizer = tf.tpu.CrossShardOptimizer(optimizer) if params['device']['grad_ckpting']: # pylint: disable=g-import-not-at-top,g-direct-tensorflow-import from third_party.grad_checkpoint import grad from tensorflow.python.ops import gradients # pylint: enable=g-import-not-at-top,g-direct-tensorflow-import # monkey patch tf.gradients to point to our custom version, # with automatic checkpoint selection def gradients_(ys, xs, grad_ys=None, **kwargs): return grad.gradients( ys, xs, grad_ys, checkpoints=params['device']['grad_ckpting_list'], **kwargs) gradients.__dict__['gradients'] = gradients_ # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = tf.trainable_variables() if variable_filter_fn: var_list = variable_filter_fn(var_list) if params.get('clip_gradients_norm', None): logging.info('clip gradients norm by %f', params['clip_gradients_norm']) grads_and_vars = optimizer.compute_gradients(total_loss, var_list) with tf.name_scope('clip'): grads = [gv[0] for gv in grads_and_vars] tvars = [gv[1] for gv in grads_and_vars] # First clip each variable's norm, then clip global norm. clip_norm = abs(params['clip_gradients_norm']) clipped_grads = [ tf.clip_by_norm(g, clip_norm) if g is not None else None for g in grads ] clipped_grads, _ = tf.clip_by_global_norm(clipped_grads, clip_norm) utils.scalar('gradient_norm', tf.linalg.global_norm(clipped_grads), is_tpu) grads_and_vars = list(zip(clipped_grads, tvars)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step) else: with tf.control_dependencies(update_ops): train_op = optimizer.minimize( total_loss, global_step, var_list=var_list) if moving_average_decay: with tf.control_dependencies([train_op]): train_op = ema.apply(ema_vars) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Returns a dictionary that has the evaluation metrics.""" if params['nms_configs'].get('pyfunc', True): detections_bs = [] for index in range(kwargs['boxes'].shape[0]): nms_configs = params['nms_configs'] detections = tf.numpy_function( functools.partial(nms_np.per_class_nms, nms_configs=nms_configs), [ kwargs['boxes'][index], kwargs['scores'][index], kwargs['classes'][index], tf.slice(kwargs['image_ids'], [index], [1]), tf.slice(kwargs['image_scales'], [index], [1]), params['num_classes'], nms_configs['max_output_size'], ], tf.float32) detections_bs.append(detections) detections_bs = postprocess.transform_detections( tf.stack(detections_bs)) else: # These two branches should be equivalent, but currently they are not. # TODO(tanmingxing): enable the non_pyfun path after bug fix. nms_boxes, nms_scores, nms_classes, _ = postprocess.per_class_nms( params, kwargs['boxes'], kwargs['scores'], kwargs['classes'], kwargs['image_scales']) img_ids = tf.cast( tf.expand_dims(kwargs['image_ids'], -1), nms_scores.dtype) detections_bs = [ img_ids * tf.ones_like(nms_scores), nms_boxes[:, :, 1], nms_boxes[:, :, 0], nms_boxes[:, :, 3] - nms_boxes[:, :, 1], nms_boxes[:, :, 2] - nms_boxes[:, :, 0], nms_scores, nms_classes, ] detections_bs = tf.stack(detections_bs, axis=-1, name='detnections') if params.get('testdev_dir', None): logging.info('Eval testdev_dir %s', params['testdev_dir']) eval_metric = coco_metric.EvaluationMetric( testdev_dir=params['testdev_dir']) coco_metrics = eval_metric.estimator_metric_fn(detections_bs, tf.zeros([1])) else: logging.info('Eval val with groudtruths %s.', params['val_json_file']) eval_metric = coco_metric.EvaluationMetric( filename=params['val_json_file'], label_map=params['label_map']) coco_metrics = eval_metric.estimator_metric_fn( detections_bs, kwargs['groundtruth_data']) # Add metrics to output. cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ params['batch_size'], ]), [params['batch_size'], 1]) cls_outputs = postprocess.to_list(cls_outputs) box_outputs = postprocess.to_list(box_outputs) params['nms_configs']['max_nms_inputs'] = anchors.MAX_DETECTION_POINTS boxes, scores, classes = postprocess.pre_nms(params, cls_outputs, box_outputs) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'image_ids': labels['source_ids'], 'groundtruth_data': labels['groundtruth_data'], 'image_scales': labels['image_scales'], 'boxes': boxes, 'scores': scores, 'classes': classes, } eval_metrics = (metric_fn, metric_fn_inputs) checkpoint = params.get('ckpt') or params.get('backbone_ckpt') if checkpoint and mode == tf.estimator.ModeKeys.TRAIN: # Initialize the model from an EfficientDet or backbone checkpoint. if params.get('ckpt') and params.get('backbone_ckpt'): raise RuntimeError( '--backbone_ckpt and --checkpoint are mutually exclusive') if params.get('backbone_ckpt'): var_scope = params['backbone_name'] + '/' if params['ckpt_var_scope'] is None: # Use backbone name as default checkpoint scope. ckpt_scope = params['backbone_name'] + '/' else: ckpt_scope = params['ckpt_var_scope'] + '/' else: # Load every var in the given checkpoint var_scope = ckpt_scope = '/' def scaffold_fn(): """Loads pretrained model through scaffold function.""" logging.info('restore variables from %s', checkpoint) var_map = utils.get_ckpt_var_map( ckpt_path=checkpoint, ckpt_scope=ckpt_scope, var_scope=var_scope, skip_mismatch=params['skip_mismatch']) tf.train.init_from_checkpoint(checkpoint, var_map) return tf.train.Scaffold() elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay: def scaffold_fn(): """Load moving average variables for eval.""" logging.info('Load EMA vars with ema_decay=%f', moving_average_decay) restore_vars_dict = ema.variables_to_restore(ema_vars) saver = tf.train.Saver(restore_vars_dict) return tf.train.Scaffold(saver=saver) else: scaffold_fn = None if is_tpu: return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, host_call=utils.get_tpu_host_call(global_step, params), scaffold_fn=scaffold_fn, training_hooks=training_hooks) else: # Profile every 1K steps. if params.get('profile', False): profile_hook = tf.estimator.ProfilerHook( save_steps=1000, output_dir=params['model_dir'], show_memory=True) training_hooks.append(profile_hook) # Report memory allocation if OOM; it will slow down the running. class OomReportingHook(tf.estimator.SessionRunHook): def before_run(self, run_context): return tf.estimator.SessionRunArgs( fetches=[], options=tf.RunOptions(report_tensor_allocations_upon_oom=True)) training_hooks.append(OomReportingHook()) logging_hook = tf.estimator.LoggingTensorHook( { 'step': global_step, 'det_loss': det_loss, 'cls_loss': cls_loss, 'box_loss': box_loss, }, every_n_iter=params.get('iterations_per_loop', 100), ) training_hooks.append(logging_hook) if params['device']['nvgpu_logging']: try: from third_party.tools import nvgpu # pylint: disable=g-import-not-at-top mem_message = tf.numpy_function(nvgpu.gpu_memory_util_message, [], [tf.string])[0] logging_hook_nvgpu = tf.estimator.LoggingTensorHook( tensors={'mem_message': mem_message}, every_n_iter=params.get('iterations_per_loop', 100), formatter=lambda x: x['mem_message'].decode('utf-8'), ) training_hooks.append(logging_hook_nvgpu) except: # pylint: disable=bare-except logging.error('nvgpu error: nvidia-smi format not recognized.') eval_metric_ops = ( eval_metrics[0](**eval_metrics[1]) if eval_metrics else None) return tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, scaffold=scaffold_fn() if scaffold_fn else None, training_hooks=training_hooks)
def train(args, build_train_graph): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) else: tf.logging.set_verbosity(tf.logging.ERROR) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob) if not train_files: raise RuntimeError( "No training images found with glob '{}'.".format(args.train_glob)) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat() if 'npy' in args.train_glob: # reading numpy arrays directly instead of from images train_dataset = train_dataset.map( # https://stackoverflow.com/a/49459838 lambda item: tuple(tf.numpy_function(read_npy_file_helper, [item], [tf.float32, ])), num_parallel_calls=args.preprocess_threads) else: train_dataset = train_dataset.map( read_png, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.map(lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3))) train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) # num_pixels = args.batchsize * args.patchsize ** 2 # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() res = build_train_graph(args, x) train_loss = res['train_loss'] train_op = res['train_op'] model_name = res['model_name'] # boiler plate code for logging runname = get_runname(vars(args), record_keys=('num_filters', 'num_hfilters', 'lmbda'), prefix=model_name) save_dir = os.path.join(args.checkpoint_dir, runname) if not os.path.exists(save_dir): os.makedirs(save_dir) import json import datetime with open(os.path.join(save_dir, 'record.txt'), 'a') as f: # keep more detailed record in text file f.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\n') f.write(json.dumps(vars(args), indent=4, sort_keys=True) + '\n') f.write('\n') with open(os.path.join(save_dir, 'args.json'), 'w') as f: # will overwrite existing json.dump(vars(args), f, indent=4, sort_keys=True) # save a copy of the script that defined the model from shutil import copy copied_path = copy(model_name + '.py', save_dir) print('Saved a copy of %s.py to %s' % (model_name, copied_path)) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), ] save_summary_secs = args.save_summary_secs if args.logdir != '': for key in res: if 'bpp' in key or 'loss' in key or key in ('mse', 'psnr'): tf.summary.scalar(key, res[key]) elif key in ('original', 'reconstruction'): tf.summary.image(key, res[key], max_outputs=2) summary_op = tf.summary.merge_all() tf_log_dir = os.path.join(args.logdir, runname) summary_hook = tf.train.SummarySaverHook(save_secs=save_summary_secs, output_dir=tf_log_dir, summary_op=summary_op) hooks.append(summary_hook) with tf.train.MonitoredTrainingSession( hooks=hooks, checkpoint_dir=save_dir, save_checkpoint_secs=args.save_checkpoint_secs, save_summaries_secs=save_summary_secs) as sess: while not sess.should_stop(): sess.run(train_op)
def code_greedy_sample(t_loc, t_scale, p_loc, p_scale, n_bits_per_step, n_steps, seed, rho=1.): n_samples = int(2**n_bits_per_step) # The scale divisor needs to be square rooted because # we are dealing with standard deviations and not variances scale_divisor = np.sqrt(n_steps) proposal_shard = tfd.Normal(loc=p_loc / n_steps, scale=rho * p_scale / scale_divisor) target = tfd.Normal(loc=t_loc, scale=t_scale) # Setup greedy sampler for loop def loop_step(i, sample_index, best_sample): samples = stateless_normal_sample(loc=proposal_shard.loc, scale=proposal_shard.scale, num_samples=n_samples, seed=1000 * seed + i) test_samples = tf.tile(tf.expand_dims(best_sample, 0), [n_samples, 1]) + samples log_probs = tf.reduce_sum(target.log_prob(test_samples), axis=1) index = tf.argmax(log_probs) best_sample = test_samples[index, :] return [i + 1, tf.concat((sample_index, [index]), axis=0), best_sample] i = tf.constant(0) best_sample = tf.zeros(tf.shape(p_loc), dtype=tf.float32) sample_index = tf.cast([], tf.int32) cond = lambda i, sample_index, best_sample: i < n_steps _, sample_index, best_sample = tf.while_loop( cond=cond, body=loop_step, loop_vars=[i, sample_index, best_sample], shape_invariants=[ i.get_shape(), tf.TensorShape([None]), best_sample.get_shape() ]) sample_index = tf.map_fn(lambda x: tf.numpy_function( to_bit_string, [x, n_bits_per_step], tf.string), sample_index, dtype=tf.string) sample_index = tf.numpy_function( lambda indices: ''.join([ind.decode('utf-8') for ind in indices]), [sample_index], tf.string) return best_sample, sample_index
def loss(summarizer_output, y_truth): with summary_graph.as_default(): hist1 = tf_v1.numpy_function(hists, [summarizer_output], tf_v1.float32) hist2 = tf_v1.numpy_function(hists, [y_truth], tf_v1.float32) return gamma*(tf_v1.reduce_sum(((hist1-hist2)**2)))
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. box_iou_loss: an integer tensor representing total box iou loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 levels = cls_outputs.keys() cls_losses = [] box_losses = [] sumrule = {} if params.get('sumrule'): sumrule = params['sumrule'] # because of cls_targets -= 1 (so that bg class becomes -1, actual class then starts from 0) # we need to subtract 1 from sumrule as well. _sumrule = {} for k, v in sumrule.items(): _sumrule[k - 1] = [vv - 1 for vv in v] sumrule = _sumrule def table_lookup(values, old_onehot, cls_targets_at_level): for val in values: if sumrule.get(val): new_val = sumrule[val] #prob = 1.0/len(new_val) prob = 0.5 # try sigmoid cross entropy first so set this to 0.5, if we use softmax we should set this to 1.0/len(new_val) if len(new_val) == 1: # leaf node, prob = 1.0 prob = 1.0 _matching_onehot = old_onehot[np.where( cls_targets_at_level == val)] _matching_onehot[:, new_val] = prob _matching_onehot[:, val] = 0 old_onehot[np.where( cls_targets_at_level == val)] = _matching_onehot return old_onehot for level in levels: # Onehot encoding for classification labels. _cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params.get('sumrule'): unique_labels, _ = tf.unique( tf.reshape(labels['cls_targets_%d' % level], [-1])) # refine one-hot labels so that we map each label to it's finest leaves cls_targets_at_level = tf.numpy_function( table_lookup, [ unique_labels, _cls_targets_at_level, labels['cls_targets_%d' % level] ], _cls_targets_at_level.dtype) cls_targets_at_level = tf.reshape(cls_targets_at_level, _cls_targets_at_level.shape) else: cls_targets_at_level = _cls_targets_at_level if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = focal_loss(cls_outputs[level], cls_targets_at_level, params['alpha'], params['gamma'], normalizer=num_positives_sum, label_smoothing=params['label_smoothing']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape( cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape( cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.reduce_sum(cls_loss)) if params['box_loss_weight']: box_losses.append( _box_loss(box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) if params['iou_loss_type']: input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels] box_outputs = tf.concat(box_output_list, axis=0) box_target_list = [ tf.reshape(labels['box_targets_%d' % level], [-1, 4]) for level in levels ] box_targets = tf.concat(box_target_list, axis=0) anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1]) box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes) box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes) box_iou_loss = _box_iou_loss(box_outputs, box_targets, num_positives_sum, params['iou_loss_type']) else: box_iou_loss = 0 # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) if box_losses else 0 total_loss = (cls_loss + params['box_loss_weight'] * box_loss + params['iou_loss_weight'] * box_iou_loss) return total_loss, cls_loss, box_loss, box_iou_loss
def main(argv): del argv # Unused. params = factory.config_generator(FLAGS.model) if FLAGS.config_file: params = params_dict.override_params_dict( params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict( params, FLAGS.params_override, is_strict=True) params.train.input_partition_dims = None params.train.num_cores_per_replica = None params.architecture.use_bfloat16 = False # params.maskrcnn_parser.use_autoaugment = False params.validate() params.lock() # Prepares input functions for train and eval. train_input_fn = input_reader.InputFnTest( params.train.train_file_pattern, params, mode=ModeKeys.TRAIN, dataset_type=params.train.train_dataset_type) batch_size = 1 dataset = train_input_fn({'batch_size': batch_size}) category_index = {} for i in range(50): category_index[i] = { 'name': 'test_%d' % i, } for i, (image_batch, labels_batch) in enumerate(dataset.take(10)): image_batch = tf.transpose(image_batch, [3, 0, 1, 2]) image_batch = tf.map_fn(denormalize_image, image_batch, dtype=tf.uint8, back_prop=False) image_shape = tf.shape(image_batch)[1:3] masks_batch = [] for image, bboxes, masks in zip(image_batch, labels_batch['gt_boxes'], labels_batch['gt_masks']): # extract masks bboxes = tf.numpy_function(box_utils.yxyx_to_xywh, [bboxes], tf.float32) binary_masks = tf.numpy_function(mask_utils.paste_instance_masks, [masks, bboxes, image_shape[0], image_shape[1]], tf.uint8) masks_batch.append(binary_masks) masks_batch = tf.stack(masks_batch, axis=0) scores_mask = tf.cast(tf.greater(labels_batch['gt_classes'], -1), tf.float32) scores = tf.ones_like(labels_batch['gt_classes'], dtype=tf.float32) * scores_mask images = draw_bounding_boxes_on_image_tensors(image_batch, labels_batch['gt_boxes'], labels_batch['gt_classes'], scores, category_index, instance_masks=masks_batch, use_normalized_coordinates=False) for j, image in enumerate(images): image_bytes = tf.io.encode_jpeg(image) tf.io.write_file(root_dir('data/visualizations/aug_%d.jpg' % (i * batch_size + j)), image_bytes)