def test_readme_example(self): data = tf.random.uniform((128, 128), 0, 10, dtype=tf.int32) histogram = tf.bincount(data, minlength=10, maxlength=10) cdf = tf.cumsum(histogram, exclusive=False) cdf = tf.pad(cdf, [[1, 0]]) cdf = tf.reshape(cdf, [1, 1, -1]) data = tf.cast(data, tf.int16) encoded = range_coding_ops.range_encode(data, cdf, precision=14) decoded = range_coding_ops.range_decode( encoded, tf.shape(data), cdf, precision=14) with self.cached_session() as sess: self.assertAllEqual(*sess.run((data, decoded)))
def _compute_w_per_class_vector_for_xentr(self, num_classes, y_gt, eps = 1e-6): # Re-weights samples in the cost function on a per-class basis. # E.g. to exclude a class, or counter class imbalance. # From first to given epoch, start from weighting classes equally to natural frequency, decreasing weighting linearly. # Return value: a function of epochs_trained_tfv if self._reweight_classes_in_cost is None or self._reweight_classes_in_cost["type"] is None: # No re-weighting. w_per_cl_vec = tf.ones( shape=[num_classes], dtype='float32' ) else: # A type of reweighting has been specified if self._reweight_classes_in_cost["type"] == "freq": # Frequency re-weighting num_lbls_in_ygt = tf.cast( tf.reduce_prod(tf.shape(y_gt)), dtype="float32" ) num_lbls_in_ygt_per_c = tf.bincount( arr = y_gt, minlength=num_classes, maxlength=num_classes, dtype="float32" ) # without the min/max, length of vector can change. y1 = (1./(num_lbls_in_ygt_per_c + eps)) * (num_lbls_in_ygt / num_classes) elif self._reweight_classes_in_cost["type"] == "per_c": # self._reweight_classes_in_cost["prms"] should be a list, with one float per class assert len(self._reweight_classes_in_cost["prms"]) == num_classes y1 = tf.constant(self._reweight_classes_in_cost["prms"], dtype="float32") # Linear schedule: lin_schedule_min_max_epoch = self._reweight_classes_in_cost["schedule"] assert lin_schedule_min_max_epoch[0] < lin_schedule_min_max_epoch[1] # yx - y1 = (x - x1) * (y2 - y1)/(x2 - x1) # yx = the multiplier I currently want, y1 = the multiplier at the beginning, y2 = the multiplier at the end # x = current epoch, x1 = epoch where linear decrease starts, x2 = epoch where linear decrease ends y2 = 1. # Where weight should be after end of schedule. x1 = tf.cast(lin_schedule_min_max_epoch[0], dtype="float32") x2 = tf.cast(lin_schedule_min_max_epoch[1], dtype="float32") x = tf.cast(self._num_epochs_trained_tfv, dtype="float32") # To handle the piecewise linear behaviour of x being before x1 and after x2 giving the same y as if =x1 or =x2 : x = tf.maximum(x1, x) x = tf.minimum(x, x2) yx = (x - x1) * (y2 - y1)/(x2 - x1) + y1 w_per_cl_vec = yx return w_per_cl_vec
def model_fun(features, labels, mode, params): atomic_contributions = {} atom_types = params['atom_types'] for (t, lays, offs, acts) in zip(atom_types, params['layers'], params['offsets'], params['act_funs']): with _tf.variable_scope('{}_ANN'.format(t), reuse = _tf.AUTO_REUSE): input_tensor = features['%s_input'%t] atomic_contributions[t] = BPAtomicNN( input_tensor, lays, offs, acts) predicted_energies = _tf.scatter_nd( _tf.concat([features['%s_indices'%t] for t in atom_types], 0), _tf.concat([_tf.reshape(atomic_contributions[t].output, [-1]) for t in atom_types], 0), _tf.shape(labels), name = 'E_prediction') if mode == _tf.estimator.ModeKeys.PREDICT: predictions = {'energies': predicted_energies} return _tf.estimator.EstimatorSpec(mode, predictions=predictions) num_atoms = _tf.reduce_sum([_tf.bincount(features['%s_indices'%t]) for t in atom_types], axis = 0, name = 'NumberOfAtoms') # Compute loss. loss = _tf.losses.mean_squared_error( labels=labels, predictions=predicted_energies) rmse = _tf.metrics.root_mean_squared_error(labels, predicted_energies) metrics = {'rmse': rmse} _tf.summary.scalar('rmse', rmse[1]) if mode == _tf.estimator.ModeKeys.EVAL: return _tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) assert mode == _tf.estimator.ModeKeys.TRAIN optimizer = _tf.train.AdagradOptimizer(learning_rate=0.1) train_op = optimizer.minimize(loss, global_step=_tf.train.get_global_step()) return _tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def _bincount_2d(values, num_values): """Bincounts each row of values. Args: values: The values to bincount. 2D integer tensor. num_values: The number of columns of the output. Entries in `values` that are `>= num_values` will be ignored. Returns: The bin counts. Shape `(values.shape[0], num_values)`. The `i`th row contains the result of `tf.bincount(values[i, :], maxlength=num_values)`. """ num_rows = tf.shape(values)[0] # Convert the values in each row to a consecutive range of ids that will not # overlap with the other rows. row_values = values + tf.range(num_rows)[:, None] * num_values # Remove entries that would collide with other rows. values_flat = tf.boolean_mask(row_values, (0 <= values) & (values < num_values)) bins_length = num_rows * num_values bins = tf.bincount(values_flat, minlength=bins_length, maxlength=bins_length) return tf.reshape(bins, [num_rows, num_values])
def _compute_w_per_class_vector_for_xentr(self, num_classes, y_gt): # To counter class imbalance. Return value is a function of epochs_trained_tfv # From first to given epoch, start from weighting classes equally to natural frequency, decreasing weighting linearly. TINY_FLOAT = 1e-6 if self._weight_c_in_xentr_and_release_between_eps[ 0] >= 0 and self._weight_c_in_xentr_and_release_between_eps[ 1] > 0: assert self._weight_c_in_xentr_and_release_between_eps[ 0] < self._weight_c_in_xentr_and_release_between_eps[1] labels_in_ygt = tf.cast(tf.reduce_prod(tf.shape(y_gt)), dtype="float32") labels_in_ygt_per_c = tf.bincount( arr=y_gt, minlength=num_classes, maxlength=num_classes, dtype="float32" ) # without the min/max, length of vector can change. # yx - y1 = (x - x1) * (y2 - y1)/(x2 - x1) # yx = the multiplier I currently want, y1 = the multiplier at the begining, y2 = the multiplier at the end # x = current epoch, x1 = epoch where linear decrease starts, x2 = epoch where linear decrease ends y1 = (1. / (labels_in_ygt_per_c + TINY_FLOAT)) * (labels_in_ygt / num_classes) y2 = 1. x1 = tf.cast(self._weight_c_in_xentr_and_release_between_eps[0], dtype="float32") x2 = tf.cast(self._weight_c_in_xentr_and_release_between_eps[1], dtype="float32") x = tf.cast(self._num_epochs_trained_tfv, dtype="float32") # To handle the piecewise linear behavior of x being before x1 and after x2 giving the same y as if =x1 or =x2 : x = tf.maximum(x1, x) x = tf.minimum(x, x2) yx = (x - x1) * (y2 - y1) / (x2 - x1) + y1 w_per_cl_vec = yx else: # Negative given. We are not reweighting. w_per_cl_vec = tf.ones(shape=[num_classes], dtype='float32') return w_per_cl_vec
def sparse_balanced_crossentropy(logits, labels): """ Calculates a class frequency balanced crossentropy loss from sparse labels. Args: logits (tf.Tensor): logits prediction for which to calculate crossentropy error labels (tf.Tensor): sparse labels used for crossentropy error calculation Returns: tf.Tensor: Tensor scalar representing the mean loss """ epsilon = tf.constant(np.finfo(np.float32).tiny) num_classes = tf.cast(tf.shape(logits)[-1], tf.int32) probs = tf.nn.softmax(logits) probs += tf.cast(tf.less(probs, epsilon), tf.float32) * epsilon log = -1. * tf.log(probs) onehot_labels = tf.one_hot(labels, num_classes) class_frequencies = tf.stop_gradient(tf.bincount( labels, minlength=num_classes, dtype=tf.float32)) weights = (1. / (class_frequencies + tf.constant(1e-8))) weights *= (tf.cast(tf.reduce_prod(tf.shape(labels)), tf.float32) / tf.cast(num_classes, tf.float32)) new_shape = (([1, ] * len(labels.get_shape().as_list())) + [logits.get_shape().as_list()[-1]]) weights = tf.reshape(weights, new_shape) loss = tf.reduce_mean(tf.reduce_sum(onehot_labels * log * weights, axis=-1)) return loss
def build_stochastic_layer(self, layer): self.a = tf.layers.dense(layer, self.cfg["L2_truncation_level"]-1, activation=self.cfg["dirichlet_ab_fct"], use_bias=self.cfg["dirichlet_ab_use_bias"], kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.zeros_initializer(), name="posterior_a_output") self.b = tf.layers.dense(layer, self.cfg["L2_truncation_level"]-1, activation=self.cfg["dirichlet_ab_fct"], use_bias=self.cfg["dirichlet_ab_use_bias"], kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.constant_initializer(self.cfg["b_init"]), name="posterior_b_output") uniform_samples = tf.random_uniform((self.cfg["MC_samples"], tf.shape(self.x)[0], self.cfg["L2_truncation_level"]-1), minval=0.01, maxval=0.99, dtype=tf.floatX) self.a = self.a + 1e-5 self.b = self.b + 1e-5 self.vs = (1 - uniform_samples ** (1 / self.b)) ** (1 / self.a) stick_segments_lst = [] remaining_sticks = tf.ones((self.cfg["MC_samples"], tf.shape(self.x)[0]), dtype=tf.floatX) for i in range(self.cfg["L2_truncation_level"] - 1): stick_segments_lst.append(remaining_sticks * self.vs[:, :, i]) remaining_sticks = remaining_sticks * (1 - self.vs[:, :, i]) stick_segments = tf.stack(stick_segments_lst) # (self.cfg["L2_truncation_level"] - 1) x (MC samples) x (batch size) self.L2_z_3d = tf.transpose(tf.concat((stick_segments, tf.expand_dims(remaining_sticks, axis=0)), axis=0), (1, 2, 0)) if not self.cfg["posterior_one_c"]: # multinomial logits self.phi_logits = tf.layers.dense(layer, self.cfg["L2_truncation_level"] * self.topic_dim, activation=None, use_bias=self.cfg["dirichlet_phi_use_bias"], kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.zeros_initializer(), name="posterior_phi_output") self.phi_prob = tf.nn.softmax(tf.reshape(self.phi_logits, [-1, self.cfg["L2_truncation_level"], self.topic_dim])) else: self.phi_logits = tf.layers.dense(layer, self.topic_dim, activation=None, use_bias=self.cfg["dirichlet_phi_use_bias"], kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.zeros_initializer(), name="posterior_phi_output") self.phi_prob = tf.tile(tf.expand_dims(tf.nn.softmax(tf.reshape(self.phi_logits, [-1, self.topic_dim])), axis=1), [1, self.cfg["L2_truncation_level"], 1]) # c_4d: (MC samples) x (batch size) x (L2 truncation level) x (L1 truncation level/topic dim) self.soft_z_3d = tf.reduce_sum(tf.expand_dims(self.L2_z_3d, axis=-1) * self.phi_prob, axis=2) self.c_4d, self.gumbel_tau = gumbel_softmax(self.phi_prob, self.training_placeholder, tau_init=self.cfg["gumbel_tau_init"], tau_trainable=self.cfg["gumbel_tau_trainable"], MC_samples=self.cfg["MC_samples"], straight_through=True) self.z_3d = tf.reduce_sum(tf.expand_dims(self.L2_z_3d, axis=-1) * self.c_4d, axis=2) # Change if self.cfg["effective_indicator"] == "average": self.average_of_every_topic = tf.reduce_mean(self.z_3d, axis=(0, 1)) * tf.cast(tf.shape(self.x)[0], tf.floatX) effective_dims = self.average_of_every_topic > self.cfg["effective_threshold"] self.average_used_dims = tf.reduce_sum(tf.cast(effective_dims, tf.floatX)) self.effective_dims = tf.squeeze(tf.where(effective_dims)) elif self.cfg["effective_indicator"] == "assignment" or self.cfg["effective_indicator"] == "ratio": self.assignment_of_every_topic = tf.bincount(tf.cast(tf.argmax(self.z_3d, axis=-1), tf.int32), minlength=self.topic_dim) effective_dims_bool = tf.cast(self.assignment_of_every_topic, tf.floatX) > self.cfg["assignment_threshold"] * tf.cast(tf.shape(self.x)[0], tf.floatX) * self.cfg["MC_samples"] # FIXME: for now, if MC_sample is not 1. This is not correct. self.average_used_dims = tf.reduce_sum(tf.cast(effective_dims_bool, tf.floatX)) self.effective_dims = tf.squeeze(tf.where(effective_dims_bool)) # self.average_used_dims = tf.Print(self.average_used_dims, [tf.transpose(remaining_sticks, (1, 2, 0))], "print_remaining", summarize=100, first_n=3) # self.z = tf.Print(self.z, [self.z], "print_z", summarize=50) # self.z = tf.Print(self.z, [tf.reduce_sum(self.z, axis=-1)], "print_z_sum") z = tf.reshape(self.z_3d, [-1, self.topic_dim]) return z
def build_stochastic_layer(self, layer): self.a = tf.layers.dense( layer, self.topic_dim - 1, activation=self.cfg["dirichlet_ab_fct"], use_bias=self.cfg["dirichlet_ab_use_bias"], kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.zeros_initializer(), name="posterior_a_output") self.b = tf.layers.dense( layer, self.topic_dim - 1, activation=self.cfg["dirichlet_ab_fct"], use_bias=self.cfg["dirichlet_ab_use_bias"], kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.constant_initializer(self.cfg["b_init"]), name="posterior_b_output") uniform_samples = tf.random_uniform( (self.cfg["MC_samples"], tf.shape(self.x)[0], self.topic_dim - 1), minval=0.01, maxval=0.99, dtype=tf.floatX) if self.cfg.get("bias_on_prior", False): self.prior_a = np.floatX(self.cfg["prior_alpha"]) self.prior_b = np.floatX(self.cfg["prior_beta"]) if self.cfg["pitman_yor"]: self.prior_b = np.floatX( np.arange(self.topic_dim - 1) * (1 - self.prior_a) + self.prior_b) self.b = self.b + self.prior_b self.a = self.a + self.prior_a else: self.a = self.a + 1e-5 self.b = self.b + 1e-5 self.vs = (1 - uniform_samples**(1 / self.b))**(1 / self.a) # self.vs = tf.Print(self.vs, [tf.reduce_mean(self.vs), tf.reduce_max(self.vs), self.vs[:, 37, :]], summarize=200, message="print_vs: ") # Construct topic vector by stick-breaking process # stick_segment = tf.zeros((self.cfg["MC_samples"], tf.shape(self.x)[0])) # remaining_stick = tf.ones((self.cfg["MC_samples"], tf.shape(self.x)[0])) # def stick_breaking(s, elem): # stick = s[1] * self.vs[:, :, elem] # remain = s[1] * (1 - self.vs[:, :, elem]) # return (stick, remain) # stick_segments, remaining_sticks = tf.scan(fn=stick_breaking, elems=tf.range(self.topic_dim - 1), # initializer=(stick_segment, remaining_stick)) # self.z = tf.transpose(tf.concat((stick_segments, tf.expand_dims(remaining_sticks[-1, :, :], axis=0)), axis=0), (1, 2, 0)) # # 0.01 -> 99% stick # self.average_used_dims = tf.reduce_mean(tf.reduce_sum(tf.cast(remaining_sticks > self.cfg["stick_epsilon"], tf.floatX), axis=0)) stick_segments_lst = [] remaining_sticks = tf.ones( (self.cfg["MC_samples"], tf.shape(self.x)[0]), dtype=tf.floatX) for i in range(self.topic_dim - 1): stick_segments_lst.append(remaining_sticks * self.vs[:, :, i]) remaining_sticks = remaining_sticks * (1 - self.vs[:, :, i]) stick_segments = tf.stack( stick_segments_lst ) # (topic_dim - 1) x (MC samples) x (batch size) self.z_3d = tf.transpose( tf.concat( (stick_segments, tf.expand_dims(remaining_sticks, axis=0)), axis=0), (1, 2, 0)) # Change if self.cfg["effective_indicator"] == "average": self.average_of_every_topic = tf.reduce_mean( self.z_3d, axis=(0, 1)) * tf.cast( tf.shape(self.x)[0], tf.floatX) effective_dims = self.average_of_every_topic > self.cfg[ "effective_threshold"] self.average_used_dims = tf.reduce_sum( tf.cast(effective_dims, tf.floatX)) self.effective_dims = tf.squeeze(tf.where(effective_dims)) elif self.cfg["effective_indicator"] == "assignment" or self.cfg[ "effective_indicator"] == "ratio": self.assignment_of_every_topic = tf.bincount( tf.cast(tf.argmax(self.z_3d, axis=-1), tf.int32), minlength=self.topic_dim) effective_dims_bool = tf.cast( self.assignment_of_every_topic, tf.floatX) > self.cfg["assignment_threshold"] * tf.cast( tf.shape(self.x)[0], tf.floatX) * self.cfg["MC_samples"] # FIXME: for now, if MC_sample is not 1. This is not correct. self.average_used_dims = tf.reduce_sum( tf.cast(effective_dims_bool, tf.floatX)) self.effective_dims = tf.squeeze(tf.where(effective_dims_bool)) # self.average_used_dims = tf.Print(self.average_used_dims, [tf.transpose(remaining_sticks, (1, 2, 0))], "print_remaining", summarize=100, first_n=3) # self.z = tf.Print(self.z, [self.z], "print_z", summarize=50) # self.z = tf.Print(self.z, [tf.reduce_sum(self.z, axis=-1)], "print_z_sum") z = tf.reshape(self.z_3d, [-1, self.topic_dim]) return z
def tensor_operations(num_classes): """Create the tensor operations to be used in training and testing, stored in a dictionary.""" # Placeholders ph = { "y": tf.placeholder(tf.int32, shape=(None)), "train": tf.placeholder(tf.bool) } for c3d_depth in range(6): ph["x_" + str(c3d_depth)] = tf.placeholder( tf.float32, shape=(None, num_features[c3d_depth], window_size[c3d_depth])) # Tensor operations loss_arr = [] train_op_arr = [] predictions_arr = [] accuracy_arr = [] weights = {} # for each model generate the tensor ops for c3d_depth in range(6): # logits if (c3d_depth < 3): logits = conv_model(ph, c3d_depth, num_classes) else: logits = model(ph, c3d_depth, num_classes) # probabilities and associated weights probabilities = tf.nn.softmax(logits, name="softmax_tensor") if USE_WEIGHTS: weights[c3d_depth] = tf.get_variable( "weight_%s" % c3d_depth, shape=[1], initializer=tf.ones_initializer()) probabilities = tf.multiply(probabilities, weights[c3d_depth], "probability_weight") # functions for predicting class predictions = { "classes": tf.argmax(input=logits, axis=1, output_type=tf.int32), "probabilities": probabilities } predictions_arr.append(predictions) # functions for training/optimizing the network loss = tf.losses.sparse_softmax_cross_entropy(labels=ph["y"], logits=logits) optimizer = tf.train.AdamOptimizer(learning_rate=ALPHA) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) loss_arr.append(loss) train_op_arr.append(train_op) # functions for evaluating the network correct_pred = tf.equal(predictions["classes"], ph["y"]) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) accuracy_arr.append(accuracy) # combine all of the models together for the ensemble all_preds = tf.stack([x["probabilities"] for x in predictions_arr]) all_preds = tf.transpose(all_preds, [1, 2, 0]) model_preds = tf.transpose(all_preds, [0, 2, 1]) model_top_10_values, model_top_10_indices = tf.nn.top_k(model_preds, k=10) model_preds = tf.argmax(model_preds, axis=2, output_type=tf.int32) if AGGREGATE_METHOD == 'average': # average over softmaxes test_prob = tf.reduce_mean(all_preds, axis=2) test_class = tf.argmax(test_prob, axis=1, output_type=tf.int32) elif AGGREGATE_METHOD == 'most_common': print("Aggregate method most_common not implemented") sys.exit(1) test_prob = tf.argmax(all_preds, axis=1, output_type=tf.int32) test_class = tf.argmax(tf.bincount(test_prob_max), output_type=tf.int32) # verify if prediction is correct test_correct_pred = tf.equal(test_class, ph["y"]) operations = dict() operations['ph'] = ph operations['loss_arr'] = loss_arr operations['train_op_arr'] = train_op_arr operations['predictions_arr'] = predictions_arr operations['accuracy_arr'] = accuracy_arr operations['weights'] = weights operations['logits'] = logits operations['all_preds'] = all_preds operations['model_preds'] = model_preds operations['model_top_10_values'] = model_top_10_values operations['model_top_10_indices'] = model_top_10_indices operations['test_prob'] = test_prob operations['test_class'] = test_class operations['test_correct_pred'] = test_correct_pred return operations
def domi(rv): return tf.argmax(tf.bincount(rv))
def crnn_fn(features, labels, mode, params): """ :param features: dict { 'images' 'images_widths' 'filenames' } :param labels: labels. flattend (1D) array with encoded label (one code per character) :param mode: :param params: dict { 'Params' } :return: """ parameters = params.get('Params') assert isinstance(parameters, Params) if mode == tf.estimator.ModeKeys.TRAIN: parameters.keep_prob_dropout = 0.7 else: parameters.keep_prob_dropout = 1.0 conv = deep_cnn(features['images'], (mode == tf.estimator.ModeKeys.TRAIN), summaries=False) logprob, raw_pred = deep_bidirectional_lstm(conv, params=parameters, summaries=False) # Compute seq_len from image width n_pools = CONST.DIMENSION_REDUCTION_W_POOLING # 2x2 pooling in dimension W on layer 1 and 2 seq_len_inputs = tf.divide( features['images_widths'], n_pools, name='seq_len_input_op') - 1 predictions_dict = { 'prob': logprob, 'raw_predictions': raw_pred, } try: predictions_dict['filenames'] = features['filenames'] except KeyError: pass if not mode == tf.estimator.ModeKeys.PREDICT: # Alphabet and codes keys = [c for c in parameters.alphabet] values = parameters.alphabet_codes # Convert string label to code label with tf.name_scope('str2code_conversion'): table_str2int = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), -1) splited = tf.string_split( labels, delimiter='' ) # TODO change string split to utf8 split in next tf version codes = table_str2int.lookup(splited.values) sparse_code_target = tf.SparseTensor(splited.indices, codes, splited.dense_shape) seq_lengths_labels = tf.bincount( tf.cast(sparse_code_target.indices[:, 0], tf.int32), minlength=tf.shape(predictions_dict['prob'])[1]) # Loss # ---- # >>> Cannot have longer labels than predictions -> error with tf.control_dependencies([ tf.less_equal(sparse_code_target.dense_shape[1], tf.reduce_max(tf.cast(seq_len_inputs, tf.int64))) ]): loss_ctc = tf.nn.ctc_loss( labels=sparse_code_target, inputs=predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs= True, # returns zero gradient in case it happens -> ema loss = NaN time_major=True) loss_ctc = tf.reduce_mean(loss_ctc) loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ') global_step = tf.train.get_or_create_global_step() # # Create an ExponentialMovingAverage object ema = tf.train.ExponentialMovingAverage(decay=0.99, num_updates=global_step, zero_debias=True) # Create the shadow variables, and add op to maintain moving averages maintain_averages_op = ema.apply([loss_ctc]) loss_ema = ema.average(loss_ctc) # Train op # -------- learning_rate = tf.train.exponential_decay( parameters.learning_rate, global_step, parameters.learning_decay_steps, parameters.learning_decay_rate, staircase=True) if parameters.optimizer == 'ada': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif parameters.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5) elif parameters.optimizer == 'rms': optimizer = tf.train.RMSPropOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) opt_op = optimizer.minimize(loss_ctc, global_step=global_step) with tf.control_dependencies(update_ops + [opt_op]): train_op = tf.group(maintain_averages_op) # Summaries # --------- tf.summary.scalar('learning_rate', learning_rate) tf.summary.scalar('losses/ctc_loss', loss_ctc) else: loss_ctc, train_op = None, None if mode in [ tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.TRAIN ]: with tf.name_scope('code2str_conversion'): keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64) values = [c for c in parameters.alphabet_decoding] table_int2str = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?') sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder( predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), merge_repeated=False, beam_width=100, top_paths=2) # Score predictions_dict['score'] = tf.subtract(log_probability[:, 0], log_probability[:, 1]) # around 10.0 -> seems pretty sure, less than 5.0 bit unsure, some errors/challenging images sparse_code_pred = sparse_code_pred[0] sequence_lengths_pred = tf.bincount( tf.cast(sparse_code_pred.indices[:, 0], tf.int32), minlength=tf.shape(predictions_dict['prob'])[1]) pred_chars = table_int2str.lookup(sparse_code_pred) predictions_dict['words'] = get_words_from_chars( pred_chars.values, sequence_lengths=sequence_lengths_pred) tf.summary.text('predicted_words', predictions_dict['words'][:10]) # Evaluation ops # -------------- if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('evaluation'): CER = tf.metrics.mean(tf.edit_distance( sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') # Convert label codes to decoding alphabet to compare predicted and groundtrouth words target_chars = table_int2str.lookup( tf.cast(sparse_code_target, tf.int64)) target_words = get_words_from_chars(target_chars.values, seq_lengths_labels) accuracy = tf.metrics.accuracy(target_words, predictions_dict['words'], name='accuracy') eval_metric_ops = { 'eval/accuracy': accuracy, 'eval/CER': CER, } CER = tf.Print(CER, [CER], message='-- CER : ') accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ') else: eval_metric_ops = None export_outputs = { 'predictions': tf.estimator.export.PredictOutput(predictions_dict) } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions_dict, loss=loss_ctc, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=tf.train.Scaffold() # scaffold=tf.train.Scaffold(init_fn=None) # Specify init_fn to restore from previous model )
def distribuited_k_means(data_batch, K, GPU_names, n_max_iters): setup_ts = time.time() number_of_gpus = len(GPU_names) sizes = [len(arg) for arg in np.array_split( data_batch, len(GPU_names))] result_matrix = [[] for _ in GPU_names] partial_directions = [] partial_values = [] partial_results = [] initial_centers = k_means_._init_centroids(data_batch, K, init='k-means++') tf.reset_default_graph() with tf.name_scope('global'): with tf.device('/cpu:0'): all_data = tf.placeholder(data_batch.dtype, shape=(data_batch.shape), name='all_data') parts = tf.split(all_data, sizes, 0) global_centroids = tf.Variable(initial_centers) for GPU_num in range(len(GPU_names)): GPU_name = GPU_names[GPU_num] (X_mat) = parts[GPU_num] (N, M) = X_mat.get_shape().as_list() with tf.name_scope('scope_' + str(GPU_num)): with tf.device(GPU_name) : #### # In the coments we denote : # => N = Number of Observations # => M = Number of Dimensions # => K = Number of Centers #### # Data for GPU GPU_num to Clusterize X = tf.Variable(X_mat) # Reshapes rep_centroids and rep_points to format N x K x M so that # the 2 matrixes have the same size rep_centroids = tf.reshape(tf.tile(global_centroids, [N, 1]), [N, K, M]) rep_points = tf.reshape(tf.tile(X, [1, K]), [N, K, M]) # Calculates sum_squares, a matrix of size N x K # This matrix is not sqrt((X-Y)^2), it is just(X-Y)^2 # Since we need just the argmin(sqrt((X-Y)^2)) wich is equal to # argmin((X-Y)^2), it would be a waste of computation subtraction = tf.subtract(rep_points, rep_centroids) square = tf.square(subtraction) sum_squares = tf.reduce_sum(square, axis = 2) # Use argmin to select the lowest-distance point # This gets a matrix of size N x 1 best_centroids = tf.argmin(sum_squares, axis = 1) result_matrix[GPU_num] = sum_squares means = [] for c in range(K): aux_points = tf.gather(X, tf.reshape(tf.where(tf.equal(best_centroids, c)), [1,-1])) means.append(tf.reduce_mean(aux_points, axis=[1])) new_centroids = tf.concat(means, 0) with tf.device('/cpu:0'): y_count = tf.cast( tf.bincount(tf.to_int32(best_centroids), maxlength = K, minlength = K), dtype = tf.float64) partial_mu = tf.multiply( tf.transpose(new_centroids), y_count ) partial_directions.append( y_count ) partial_values.append( partial_mu ) with tf.name_scope('global') : with tf.device('/cpu:0') : result_matrix = tf.argmin(tf.concat(result_matrix, 0), axis = 1) sum_direction = tf.add_n( partial_directions ) sum_mu = tf.add_n( partial_values ) rep_sum_direction = tf.reshape(tf.tile(sum_direction, [M]), [M, K]) new_centers = tf.transpose( tf.div(sum_mu, rep_sum_direction) ) update_centroid = tf.group( global_centroids.assign(new_centers) ) setup_time = float( time.time() - setup_ts ) config = tf.ConfigProto( allow_soft_placement = True ) config.gpu_options.allow_growth = True config.gpu_options.allocator_type = 'BFC' with tf.Session( config = config ) as sess: initialization_ts = time.time() sess.run(tf.global_variables_initializer(), feed_dict={all_data: data_batch}) initialization_time = float( time.time() - initialization_ts ) computation_time = 0.0 for i in range(n_max_iters): aux_ts = time.time() [result, centroids, _] = sess.run([global_centroids, best_centroids, update_centroid]) computation_time += float(time.time() - aux_ts) cluster_idx = sess.run(result_matrix, feed_dict={all_data: data_batch}) end_resut = { 'end_center' : result , 'cluster_idx' : cluster_idx , 'centroids' : centroids , 'init_center' : initial_centers , 'setup_time' : setup_time , 'initialization_time' : initialization_time, 'computation_time' : computation_time , 'n_iter' : i+1 } return end_resut
def approx_kl_divergence(p, logits, partitions, partitions_dist, scope=None, partial_loss=True, partitions_dist_scale=1.0, skip_normalization=False): ''' p: tensor with shape [..., N] in which elements are samples from (unormalized) target distribution logits: tensor with the same type and shape as p in which elements are samples from predicted logits partitions: integer tensor in which each element shows the index of the partition that each correponding element of p and q_logits are comming from. We assume all the element in one partition has the same value. max(partitions) < M partitions_dist: A tensor with shape [M] that shows relative size of each different M partitions ''' with tf.variable_scope(scope, 'approx_kl_divergence', values=[p, logits, partitions, partitions]): util.add_extra_tensor('logits', logits) util.add_extra_tensor('partitions', partitions) util.add_extra_tensor('partitions_dist', partitions_dist) partitions_dist = tf.convert_to_tensor(partitions_dist) p, logits, partitions = [ _flatt_bach(t) for t in [p, logits, partitions] ] m = partitions_dist.shape[0].value ## Count the number of elements in each partition count = tf.map_fn( lambda arr: tf.bincount(arr, minlength=m, maxlength=m), partitions) ## count shape = [B, M] count.set_shape([partitions.shape[0].value, m]) ## Adjust the weights based on the counts ## inf values wont be showing up in weights... partitions_dist2 = tf.truediv(partitions_dist[tf.newaxis], tf.cast(count, tf.float32)) weights = util.batched_gather(partitions, partitions_dist2) if skip_normalization: weights = tf.ones_like(weights) partitions_dist_scale = 1.0 ## See tf.reduce_logsumexp implementation raw_max = tf.reduce_max(logits, axis=-1) my_max = tf.stop_gradient( tf.where(tf.is_finite(raw_max), raw_max, tf.zeros_like(raw_max))) logits = logits - my_max[..., tf.newaxis] q_normalizer = tf.reduce_sum(weights * tf.exp(logits), keep_dims=True, axis=-1) p_normalizer = tf.reduce_sum(weights * p, keep_dims=True, axis=-1) p = tf.truediv(p, p_normalizer) if partial_loss: loss_scale = partitions_dist_scale else: loss_scale = 1.0 p = p * weights return -loss_scale * p * (logits - tf.log(q_normalizer) + tf.log(partitions_dist_scale))
def _compute_word_overlap(context_ids, context_len, question_ids, question_len, reduce_type, weighted, vocab_df): """Compute word overlap between question and context ids. Args: context_ids: <int32> [batch_size, num_contexts, max_context_len] context_len: <int32> [batch_size, num_contexts] question_ids: <int32> [batch_size, max_question_len] question_len: <int32> [batch_size] reduce_type: String for reduce type when computing overlap. Choices are: max - Allows at most one match per question word. sum - Sums over all matches for each question word. weighted: Boolean indicate whether or not weight the overlap by IDF. vocab_df: Tensor of shape [vocab_size] for word frequency. Computes this at the document-level if not given. Returns: overlap: <float32> [batch_size, num_contexts] Raises: Exception: If invalid reduce_type is provided. """ # <float> [batch_size, num_contexts, question_len, context_len] overlap = tf.to_float( _word_overlap_helper(question_ids=question_ids, context_ids=context_ids)) # <float> [batch_size, question_len] question_mask = tf.sequence_mask(question_len, tf.shape(question_ids)[1], dtype=tf.float32) # <float> [batch_size, num_contexts, context_len] context_mask = tf.sequence_mask(context_len, tf.shape(context_ids)[2], dtype=tf.float32) overlap *= tf.expand_dims(tf.expand_dims(question_mask, 1), -1) overlap *= tf.expand_dims(context_mask, 2) if weighted: if vocab_df is None: # Use document-level IDF computed with respect to the current batch. flat_context_ids = tf.to_int32(tf.reshape(context_ids, [-1])) # <float> [number of unique words] vocab_df = tf.bincount(flat_context_ids, minlength=tf.reduce_max(question_ids) + 1, dtype=tf.float32) # Replace all zeros with ones. vocab_df = tf.where(tf.equal(vocab_df, 0), x=tf.ones_like(vocab_df), y=vocab_df) # <float>[batch_size, question_len] expanded to # <float> [batch_size, 1, question_len, 1] question_df = tf.gather(vocab_df, question_ids) question_df = tf.expand_dims(tf.expand_dims(question_df, 1), -1) # <float> [batch_size, num_contexts, question_len, context_len] overlap = tf.divide(tf.to_float(overlap), question_df) if reduce_type == "max": # <float> [batch_size, num_contexts] overlap = tf.reduce_sum(tf.reduce_max(overlap, axis=[3]), axis=[2]) elif reduce_type == "sum": # <float> [batch_size, num_contexts] overlap = tf.reduce_sum(overlap, axis=[2, 3]) else: raise Exception("Reduce type %s is invalid." % reduce_type) return overlap
def expected_calibration_error(y_true, y_pred, nbins=20): """Calculates Expected Calibration Error (ECE). ECE is a scalar summary statistic of calibration error. It is the sample-weighted average of the difference between the predicted and true probabilities of a positive detection across uniformly-spaced model confidences [0, 1]. See referenced paper for a thorough explanation. Reference: Guo, et. al, "On Calibration of Modern Neural Networks" Page 2, Expected Calibration Error (ECE). https://arxiv.org/pdf/1706.04599.pdf This function creates three local variables, `bin_counts`, `bin_true_sum`, and `bin_preds_sum` that are used to compute ECE. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the ECE. Args: y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each prediction in y_pred. y_pred: 1-D tf.float32 tensor of model confidence scores in range [0.0, 1.0]. nbins: int specifying the number of uniformly-spaced bins into which y_pred will be bucketed. Returns: value_op: A value metric op that returns ece. update_op: An operation that increments the `bin_counts`, `bin_true_sum`, and `bin_preds_sum` variables appropriately and whose value matches `ece`. Raises: InvalidArgumentError: if y_pred is not in [0.0, 1.0]. """ bin_counts = metrics_impl.metric_variable( [nbins], tf.float32, name='bin_counts') bin_true_sum = metrics_impl.metric_variable( [nbins], tf.float32, name='true_sum') bin_preds_sum = metrics_impl.metric_variable( [nbins], tf.float32, name='preds_sum') with tf.control_dependencies([ tf.assert_greater_equal(y_pred, 0.0), tf.assert_less_equal(y_pred, 1.0), ]): bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0], nbins=nbins) with tf.control_dependencies([bin_ids]): update_bin_counts_op = tf.assign_add( bin_counts, tf.to_float(tf.bincount(bin_ids, minlength=nbins))) update_bin_true_sum_op = tf.assign_add( bin_true_sum, tf.to_float(tf.bincount(bin_ids, weights=y_true, minlength=nbins))) update_bin_preds_sum_op = tf.assign_add( bin_preds_sum, tf.to_float(tf.bincount(bin_ids, weights=y_pred, minlength=nbins))) ece_update_op = _ece_from_bins( update_bin_counts_op, update_bin_true_sum_op, update_bin_preds_sum_op, name='update_op') ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value') return ece, ece_update_op
def __init__(self, corpus_file, staffline_extractor, **kwargs): """Build a 1-nearest-neighbor classifier with labeled patches. Args: corpus_file: Path to the TFRecords of Examples with patch (cluster) values in the "patch" feature, and the glyph label in the "label" feature. staffline_extractor: The staffline extractor. **kwargs: Passed through to `Convolutional1DGlyphClassifier`. """ super(NearestNeighborGlyphClassifier, self).__init__(**kwargs) patch_height, patch_width = corpus.get_patch_shape(corpus_file) centroids, labels = corpus.parse_corpus(corpus_file, patch_height, patch_width) centroids_shape = tf.shape(centroids) flattened_centroids = tf.reshape( centroids, [centroids_shape[0], centroids_shape[1] * centroids_shape[2]]) self.staffline_extractor = staffline_extractor stafflines = staffline_extractor.extract_staves() # Collapse the stafflines per stave. width = tf.shape(stafflines)[-1] # Shape (num_staves, num_stafflines, num_patches, height, patch_width). staffline_patches = patches.patches_1d(stafflines, patch_width) staffline_patches_shape = tf.shape(staffline_patches) flattened_patches = tf.reshape(staffline_patches, [ staffline_patches_shape[0] * staffline_patches_shape[1] * staffline_patches_shape[2], staffline_patches_shape[3] * staffline_patches_shape[4] ]) distance_matrix = _squared_euclidean_distance_matrix( flattened_patches, flattened_centroids) # Take the k centroids with the lowest distance to each patch. Wrap the k # constant in a tf.identity, which tests can use to feed in another value. k_value = tf.identity(tf.constant(K_NEAREST_VALUE), name='k_nearest_value') nearest_centroid_inds = tf.nn.top_k(-distance_matrix, k=k_value)[1] # Get the label corresponding to each nearby centroids, and reshape the # labels back to the original shape. nearest_labels = tf.reshape( tf.gather(labels, tf.reshape(nearest_centroid_inds, [-1])), tf.shape(nearest_centroid_inds)) # Make a histogram of counts for each glyph type in the nearest centroids, # for each row (patch). bins = tf.map_fn(lambda row: tf.bincount(row, minlength=NUM_GLYPHS), tf.to_int32(nearest_labels)) # Take the argmax of the histogram to get the top prediction. Discard glyph # type 1 (NONE) for now. mode_out_of_k = tf.argmax( bins[:, musicscore_pb2.Glyph.NONE + 1:], axis=1) + 2 # Force predictions to NONE only if all k nearby centroids were NONE. # Otherwise, the non-NONE nearby centroids will contribute to the # prediction. mode_out_of_k = tf.where( tf.equal(bins[:, musicscore_pb2.Glyph.NONE], k_value), tf.fill( tf.shape(mode_out_of_k), tf.to_int64(musicscore_pb2.Glyph.NONE)), mode_out_of_k) predictions = tf.reshape(mode_out_of_k, staffline_patches_shape[:3]) # Pad the output. predictions_width = tf.shape(predictions)[-1] pad_before = (width - predictions_width) // 2 pad_shape_before = tf.concat([staffline_patches_shape[:2], [pad_before]], axis=0) pad_shape_after = tf.concat( [staffline_patches_shape[:2], [width - predictions_width - pad_before]], axis=0) self.output = tf.concat( [ # NONE has value 1. tf.ones(pad_shape_before, tf.int64), predictions, tf.ones(pad_shape_after, tf.int64), ], axis=-1)
def knn_kmeans_model(centroids, labels, patches=None): """The KNN k-means classifier model. Args: centroids: The k-means centroids NumPy array. Shape `(num_centroids, patch_height, patch_width)`. labels: The centroid labels NumPy array. Vector with length `num_centroids`. patches: Optional input tensor for the patches. If None, a placeholder will be used. Returns: The predictions (class ids) tensor determined from the input patches. Vector with the same length as `patches`. """ with tf.name_scope('knn_model'): centroids = tf.identity(_to_float(tf.constant(_to_uint8(centroids))), name='centroids') labels = tf.constant(labels, name='labels') centroids_shape = tf.shape(centroids) num_centroids = centroids_shape[0] patch_height = centroids_shape[1] patch_width = centroids_shape[2] flattened_centroids = tf.reshape( centroids, [num_centroids, patch_height * patch_width], name='flattened_centroids') if patches is None: patches = tf.placeholder( tf.float32, (None, centroids.shape[1], centroids.shape[2]), name='patches') patches_shape = tf.shape(patches) flattened_patches = tf.reshape( patches, [patches_shape[0], patches_shape[1] * patches_shape[2]], name='flattened_patches') with tf.name_scope('distance_matrix'): distance_matrix = _squared_euclidean_distance_matrix( flattened_patches, flattened_centroids) # Take the k centroids with the lowest distance to each patch. Wrap the k # constant in a tf.identity, which tests can use to feed in another value. k_value = tf.identity(tf.constant(K_NEAREST_VALUE), name='k_nearest_value') nearest_centroid_inds = tf.nn.top_k(-distance_matrix, k=k_value)[1] # Get the label corresponding to each nearby centroids, and reshape the # labels back to the original shape. nearest_labels = tf.reshape(tf.gather( labels, tf.reshape(nearest_centroid_inds, [-1])), tf.shape(nearest_centroid_inds), name='nearest_labels') # Make a histogram of counts for each glyph type in the nearest centroids, # for each row (patch). length = NUM_GLYPHS bins = tf.map_fn( lambda row: tf.bincount(row, minlength=length, maxlength=length), tf.to_int32(nearest_labels), name='bins') with tf.name_scope('mode_out_of_k'): # Take the argmax of the histogram to get the top prediction. Discard # glyph type 1 (NONE) for now. mode_out_of_k = tf.argmax(bins[:, musicscore_pb2.Glyph.NONE + 1:], axis=1) + 2 # Force predictions to NONE only if all k nearby centroids were NONE. # Otherwise, the non-NONE nearby centroids will contribute to the # prediction. mode_out_of_k = tf.where( tf.equal(bins[:, musicscore_pb2.Glyph.NONE], k_value), tf.fill(tf.shape(mode_out_of_k), tf.to_int64(musicscore_pb2.Glyph.NONE)), mode_out_of_k) return tf.identity(mode_out_of_k, name='predictions')
# combine all of the models together for the ensemble all_preds = tf.stack([x["probabilities"] for x in predictions_arr]) all_preds = tf.transpose(all_preds, [1,2,0]) model_preds = tf.transpose(all_preds, [0, 2, 1]) model_top_10_values, model_top_10_indices = tf.nn.top_k(model_preds, k=10) model_preds = tf.argmax(model_preds, axis=2, output_type=tf.int32) if aggregate_method == 'average': # average over softmaxes test_prob = tf.reduce_mean(all_preds, axis = 2) test_class = tf.argmax(test_prob, axis=1, output_type=tf.int32) elif aggregate_method == 'most_common': test_prob = tf.argmax(all_preds, axis=1, output_type=tf.int32) test_class = tf.argmax(tf.bincount(test_prob_max), output_type=tf.int32) # verify if prediction is correct test_correct_pred = tf.equal(test_class, ph["y"]) ############################################## # File IO ############################################## def read_file(filename_list): all_data, all_labels = [], [] for file in filename_list: infile = np.load(file) data, labels = infile["data"], infile["label"]
def set_metrics(label, predicted): with tf.name_scope("metrics_tn_fn_fp_tp"): label = tf.cast(label, tf.int32) predicted = tf.cast(predicted, tf.int32) return tf.bincount(label + 2 * predicted)
def main(_): tf.reset_default_graph() # Import data gztan = GZTan(FLAGS.num_batches, mel=(FLAGS.repr_func == 'mel')) # print('num train tracks: {}'.format(gztan.nTrainTracks)) with tf.variable_scope('inputs'): # Create the model x = tf.placeholder(tf.float32, [None, 80, 80, 1]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, FLAGS.num_classes]) train_flag = tf.placeholder(tf.bool, [1]) label = tf.placeholder(tf.int32, [FLAGS.num_classes]) # Build the graph for the deep net if FLAGS.net_depth == 'shallow': print('SHALLOW') y_conv = shallownn(x, train_flag) elif FLAGS.net_depth == 'deep': print('DEEP') y_conv = deepnn(x, train_flag) else: print("Error: Unrecognised depth.") return # Define loss function - softmax_cross_entropy + L1 regularisation with tf.name_scope("regularized_loss"): cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) l1_regularizer = tf.contrib.layers.l1_regularizer(scale=0.0001) weights = tf.trainable_variables() regularization_penalty = tf.contrib.layers.apply_regularization(l1_regularizer, weights) regularized_cross_entropy = tf.add(cross_entropy, regularization_penalty, name='reg_loss') # Define AdamOptimiser, using FLAGS.learning_rate to minimize the loss function if FLAGS.decay == 'const': optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(regularized_cross_entropy) else: batch_number = tf.Variable(0, trainable=False) our_learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, batch_number, 3000, 0.9) optimizer = tf.train.AdamOptimizer(our_learning_rate).minimize(regularized_cross_entropy, global_step=batch_number) # Calculate the prediction and the accuracy raw_prediction = tf.argmax(y_conv, 1) raw_prediction_correct = tf.cast(tf.equal(raw_prediction, tf.argmax(y_, 1)), tf.float32) raw_accuracy = tf.reduce_mean(raw_prediction_correct) max_prob_prediction = tf.argmax(tf.reduce_sum(y_conv, 0), 0) max_prob_prediction_correct = tf.cast(tf.equal(max_prob_prediction, tf.argmax(label)), tf.int32) vote_count = tf.bincount(tf.cast(raw_prediction, tf.int32)) maj_vote_prediction = tf.argmax(vote_count) maj_vote_prediction_correct = tf.cast(tf.equal(maj_vote_prediction, tf.argmax(label)), tf.int32) av_confidence = tf.reduce_mean(y_conv, 0) # saver for checkpoints saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) with tf.Session() as sess: summary_writer = tf.summary.FileWriter(FLAGS.log_dir + '/_train', sess.graph) summary_writer_validation = tf.summary.FileWriter(FLAGS.log_dir + '/_validate', sess.graph, flush_secs=5) sess.run(tf.global_variables_initializer()) # Training and validation for step in range(FLAGS.max_steps): # Training: Backpropagation using train set total_loss = 0 for batchNum in range(FLAGS.num_batches): (train_samples, train_labels) = gztan.getTrainBatch(batchNum) _, batch_loss = sess.run([optimizer, regularized_cross_entropy], feed_dict={x: train_samples, train_flag: [True], y_: train_labels}) total_loss += batch_loss if step % (FLAGS.log_frequency + 1) == 0: loss_summary = tf.Summary(value=[ tf.Summary.Value(tag="Regularized_Loss", simple_value=total_loss), ]) summary_writer.add_summary(loss_summary, step) # Validation: Monitoring accuracy using validation set if step % FLAGS.log_frequency == 0: total_accuracy = 0.0 for batchNum in range(FLAGS.num_batches): (test_samples, test_labels) = gztan.getTestBatch(batchNum) validation_accuracy = sess.run(raw_accuracy, feed_dict={x: test_samples, train_flag: [False], y_: test_labels}) total_accuracy += validation_accuracy total_accuracy = total_accuracy / FLAGS.num_batches print('step %d, accuracy on validation batch: %g' % (step, total_accuracy)) tot_acc_summary = tf.Summary(value=[ tf.Summary.Value(tag="Total_Raw_Accuracy", simple_value=total_accuracy), ]) summary_writer_validation.add_summary(tot_acc_summary, step) # # Save the model checkpoint periodically. # if step % FLAGS.save_model == 0 or (step + 1) == FLAGS.max_steps: # checkpoint_path = FLAGS.log_dir + '/_train' + '/model.ckpt' # saver.save(sess, checkpoint_path, global_step=step) gztan.shuffle() # Testing mp_pred_correct = [] mv_pred_correct = [] raw_pred_acc = [] done = False print('num test tracks: {}'.format(gztan.nTracks)) confusion_matrix = np.zeros((FLAGS.num_classes, FLAGS.num_classes), dtype=np.int32) for track_id in range(gztan.nTracks): (track_samples, track_labels) = gztan.getTrackSamples(track_id) track_label = track_labels[0] test_raw_acc = sess.run(raw_accuracy, feed_dict={x: track_samples, train_flag: [False], y_: track_labels}) test_mp_prediction = sess.run(max_prob_prediction, feed_dict={x: track_samples, train_flag: [False]}) test_mp_prediction_correct = (test_mp_prediction == np.argmax(track_label)) test_mv_prediction = sess.run(maj_vote_prediction, feed_dict={x: track_samples, train_flag: [False], label: track_label}) test_mv_prediction_correct = (test_mv_prediction == np.argmax(track_label)) confusion_matrix[int(np.argmax(track_label)), int(test_mp_prediction)] += 1 mp_pred_correct.append(test_mp_prediction_correct) mv_pred_correct.append(test_mv_prediction_correct) raw_pred_acc.append(test_raw_acc) # Find interesting examples and output them if not test_mv_prediction_correct and not test_mp_prediction_correct and not done: test_raw_confidences = sess.run(y_conv, feed_dict={x: track_samples, train_flag: [False]}) test_raw_predictions = np.argmax(test_raw_confidences, axis=1) test_av_conf_vals = np.mean(test_raw_confidences, axis=1) low_correct_confidences = np.where(test_raw_confidences[:,np.argmax(track_label)] < test_av_conf_vals)[0] if len(low_correct_confidences) > 0: done = True # np.where outputs a 1-tuple so do [0] on this to get actual result print('test_mp_prediction: {} test_mv_prediction: {} true label: {}'.format(test_mp_prediction, test_mv_prediction, np.argmax(track_label))) incorrect_pred_idxs = np.where(test_raw_predictions != np.argmax(track_label))[0] print('found at track_id: {}!'.format(track_id)) for idx in low_correct_confidences: print('Incorrectly classified sample {} with as {} with confidences {}. Should be {}.'.format(idx, test_raw_predictions[idx], test_raw_confidences[idx], np.argmax(track_label))) gztan.outputSample(track_id, idx) sample_spec = track_samples[idx] specshow(sample_spec.reshape([80, 80]), y_axis=FLAGS.repr_func) pylab.savefig('incorrect_{r}_track{t}_example{e}.png'.format(r=FLAGS.repr_func, t=track_id, e=idx), bbox_inches=None, pad_inches=0) pylab.close() test_mp_accuracy = sum(mp_pred_correct) / len(mp_pred_correct) test_mv_accuracy = sum(mv_pred_correct) / len(mv_pred_correct) test_raw_accuracy = sum(raw_pred_acc) / len(raw_pred_acc) print('test set: raw accuracy on test set: %0.3f' % test_raw_accuracy) print('test set: max prob accuracy on test set: %0.3f' % test_mp_accuracy) print('test set: maj vote accuracy on test set: %0.3f' % test_mv_accuracy) np.savetxt("confusion.csv", confusion_matrix, delimiter=",")
# shape_top_k_xvals = tf.shape(top_k_xvals) # shape_top_k_indices = tf.shape(top_k_indices) # x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1), 1) # x_sums_repeated = tf.matmul(x_sums, tf.ones([1, k], tf.float32)) # shape = [k,k] # x_val_weights = tf.expand_dims(tf.div(top_k_xvals, x_sums_repeated), 1) # # # count = tf.unique_with_counts(tf.cast(top_k_yvals, dtype=tf.int32)) # pre = tf.argmax(top_k_xvals, axis=1) # In[167]: top_k_yvals = tf.gather(y_input, top_k_indices) count = tf.bincount(tf.cast(top_k_yvals, dtype=tf.int32)) pre_y = tf.argmax(count) # In[169]: # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() sess.run(init) batch_size = 1 target_size = 5000 num_loops = int(np.ceil(len(x_vals_test) / batch_size)) num_loops = 1000 # pre_dic = [] # target_dict=[] acc = []
def compute_loss(self, binary_seg_logits, binary_label, instance_seg_logits, instance_label, name, reuse): """ compute lanenet loss :param binary_seg_logits: :param binary_label: :param instance_seg_logits: :param instance_label: :param name: :param reuse: :return: """ with tf.variable_scope(name_or_scope=name, reuse=reuse): # calculate class weighted binary seg loss with tf.variable_scope(name_or_scope='binary_seg'): binary_label_onehot = tf.one_hot(tf.reshape( tf.cast(binary_label, tf.int32), shape=[ binary_label.get_shape().as_list()[0], binary_label.get_shape().as_list()[1], binary_label.get_shape().as_list()[2] ]), depth=self._class_nums, axis=-1) binary_label_plain = tf.reshape( binary_label, shape=[ binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2] * binary_label.get_shape().as_list()[3] ]) # unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain) binary_label_plain = tf.cast(binary_label_plain, tf.int32) counts = tf.bincount(binary_label_plain, minlength=6) counts = tf.where(tf.equal(counts, 0), tf.ones_like(counts), counts) counts = tf.cast(counts, tf.float32) inverse_weights = tf.divide( 1.0, tf.log( tf.add(tf.divide(counts, tf.reduce_sum(counts)), tf.constant(1.02)))) if self._binary_loss_type == 'cross_entropy': binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=inverse_weights) elif self._binary_loss_type == 'focal': binary_segmenatation_loss = self._multi_category_focal_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=inverse_weights) else: raise NotImplementedError # calculate class weighted instance seg loss with tf.variable_scope(name_or_scope='instance_seg'): pix_bn = self.layerbn(inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn') pix_relu = self.relu(inputdata=pix_bn, name='pix_relu') pix_embedding = self.conv2d(inputdata=pix_relu, out_channel=self._embedding_dims, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) instance_segmentation_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, self._embedding_dims, pix_image_shape, 0.4, 3.0, 1.0, 1.0, 0.001 ) l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name or 'gn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': binary_seg_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': instance_segmentation_loss } return ret
def lstm_layers(self, feature_maps, features): parameters = self.parameters mode = self.detection_model._is_training logprob, raw_pred = deep_bidirectional_lstm(feature_maps, features['corpus'], params=parameters, summaries=False) # Compute seq_len from image width # n_pools = CONST.DIMENSION_REDUCTION_W_POOLING # 2x2 pooling in dimension W on layer 1 and 2 # seq_len_inputs = tf.divide(features['image_width'], n_pools, name='seq_len_input_op') - 1 seq_len_inputs = features['image_width'] batch_size = logprob.shape[1] predictions_dict = { 'prob': logprob, 'raw_predictions': raw_pred, 'seq_len_inputs': seq_len_inputs } if mode in [ tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.TRAIN ]: with tf.name_scope('code2str_conversion'): keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64) values = [c for c in parameters.alphabet_decoding] table_int2str = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?') sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder( predictions_dict['prob'], sequence_length=tf.cast([seq_len_inputs] * batch_size, tf.int32), merge_repeated=False, beam_width=100, top_paths=parameters.nb_logprob) # confidence value predictions_dict['score'] = log_probability sequence_lengths_pred = [ tf.bincount( tf.cast(sparse_code_pred[i].indices[:, 0], tf.int32), minlength=tf.shape(predictions_dict['prob'])[1]) for i in range(parameters.top_paths) ] pred_chars = [ table_int2str.lookup(sparse_code_pred[i]) for i in range(parameters.top_paths) ] list_preds = [ get_words_from_chars( pred_chars[i].values, sequence_lengths=sequence_lengths_pred[i]) for i in range(parameters.top_paths) ] predictions_dict['words'] = tf.stack(list_preds) tf.summary.text('predicted_words', predictions_dict['words'][0][:10]) # Evaluation ops # -------------- if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('evaluation'): CER = tf.metrics.mean(tf.edit_distance( sparse_code_pred[0], tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') # Convert label codes to decoding alphabet to compare predicted and groundtrouth words target_chars = table_int2str.lookup( tf.cast(sparse_code_target, tf.int64)) target_words = get_words_from_chars(target_chars.values, seq_lengths_labels) accuracy = tf.metrics.accuracy(target_words, predictions_dict['words'][0], name='accuracy') eval_metric_ops = { 'eval/accuracy': accuracy, 'eval/CER': CER, } CER = tf.Print(CER, [CER], message='-- CER : ') accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ') else: eval_metric_ops = None return predictions_dict, eval_metric_ops
def _tf_bincount_histogram(image, source_range, sess=None, as_tensor=False): """ Efficient histogram calculation for an image of integers. This function is significantly more efficient than tf.histogram_fixed_width but works only on images of integers. It is based on tf.bincount. Args --------------- image: image: tensor, A tensor of a image source_range : string 'image' determines the range from the input image. 'dtype' determines the range from the expected range of the images of that data type. sess: bool, optional A tensorflow session. as_tensor: bool, optional returns the result as a tensor if true otherwise returns the result as evaluated values. default value is false Returns --------------- hist : array/tensor The values of the histogram. bin_centers : array/tensor The values at the center of the bins. """ # check if a tensorflow session is provided my_sess = False if sess == None: # if not initialize a tensorflow session sess = tf.InteractiveSession() my_sess = True tf_image = image # Determine how to calculate value range for the histogram if source_range not in ['image', 'dtype']: raise ValueError( 'Incorrect value for `source_range` argument: {}'.format( source_range)) if source_range == 'image': # get value range from image image_min = tf.cast(tf.math.reduce_min(tf_image), tf.int64).eval() image_max = tf.cast(tf.math.reduce_max(tf_image), tf.int64).eval() elif source_range == 'dtype': # get value range from image datatype image_min, image_max = tf_dtype_limits(tf_image, clip_negative=False) # offset the image array to get low value bolundary to zero image, offset = _tf_offset_array(array=image, low_boundary=image_min, high_boundary=image_max, sess=sess, as_tensor=True) # flatten the image tf_image = tf.cast(tf.reshape(tensor=tf_image, shape=[-1]), dtype=tf.int32) # get the bincount value hist = tf.bincount(arr=tf_image, minlength=image_max - image_min + 1, dtype=tf.int32) # get bin centers bin_centers = tf.range(start=image_min, limit=image_max + 1) # if value range is calculated via image if source_range == 'image': # get the min value in image idx = tf.maximum(image_min, 0) hist = hist[idx:] # check if results need to be returned as tensors and close the tensorflow session if it was initialized by this function if as_tensor: if my_sess: sess.close() return hist, bin_centers else: return hist, bin_centers else: if my_sess: hist, bin_centers = hist.eval(), bin_centers.eval() sess.close() return hist, bin_centers else: hist, bin_centers = hist.eval(), bin_centers.eval() return hist, bin_centers
def postprocess(self, prediction_dict): if ('box_encodings' not in prediction_dict or 'class_predictions_with_background' not in prediction_dict): raise ValueError( 'prediction_dict does not contain expected entries.') with tf.name_scope('Postprocessor'): preprocessed_images = prediction_dict['preprocessed_inputs'] anchors = prediction_dict['anchors'] box_encodings = prediction_dict['box_encodings'] class_predictions = prediction_dict[ 'class_predictions_with_background'] feature_maps = prediction_dict['feature_maps'] feature_masks = prediction_dict['feature_masks'] #decode detection_boxes = self.batch_decode(anchors, box_encodings) tf.logging.info('detection_boxes: %s', detection_boxes) #score detection_scores_with_background = self._tf_score_converter_fn( class_predictions, name='convert_scores') detection_scores = tf.slice(detection_scores_with_background, [0, 0, 1], [-1, -1, -1]) tf.logging.info('detection_scores: %s', detection_scores) debug_detection_scores = tf.slice(detection_scores, [0, 0, 0], [-1, 1, 1]) #self.tensors_to_log[debug_detection_scores.op.name] = debug_detection_scores #nms (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_indices, num_detections) = self.batch_non_max_suppression( detection_boxes, detection_scores) tf.logging.info('nmsed boxes: %s', nmsed_boxes) tf.logging.info('nmsed scores: %s', nmsed_scores) tf.logging.info('nmsed classes: %s', nmsed_classes) tf.logging.info('nmsed indices: %s', nmsed_indices) nmsed_masks = tf.gather(feature_masks, nmsed_indices) count_list = [] for i in range(len(feature_maps)): count = tf.reduce_sum(tf.to_int32(tf.equal(nmsed_masks, i))) count_list.append(count) nmsed_feature_distribute = tf.to_int32(count_list) self.tensors_to_log['nmsed_feature'] = nmsed_feature_distribute #y, _, count = tf.unique_with_counts(tf.reshape(tf.to_int32(nmsed_classes), [-1])) #classes_distribute = tf.stack([y, count]) count = tf.bincount(tf.reshape(tf.to_int32(nmsed_classes), [-1])) self.tensors_to_log['top_classes_count'] = count #tensor log max_scores = tf.squeeze(tf.slice(nmsed_scores, [0, 0], [-1, 1]), name='max_scores') #max_scores = tf.slice(nmsed_scores, # [0, 0], [-1, 5], name='max_scores') max_classes = tf.squeeze(tf.slice(nmsed_classes, [0, 0], [-1, 1]), name='max_classes') #max_classes = tf.slice(nmsed_classes, # [0, 0], [-1, 5], name='max_classes') self.tensors_to_log[max_scores.op.name] = max_scores self.tensors_to_log[max_classes.op.name] = max_classes detection_dict = { 'detection_boxes': nmsed_boxes, 'detection_scores': nmsed_scores, 'detection_classes': nmsed_classes, 'num_detections': tf.to_float(num_detections) } for name in detection_dict: tf.logging.info('[detection] %s: %s', name, detection_dict[name]) return detection_dict
def loss(self, prediction_dict, labels, scope=None): with tf.name_scope(scope, 'Loss', prediction_dict.values()): groundtruth_boxes_list = labels['groundtruth_boxes'] groundtruth_classes_list = labels['groundtruth_classes'] groundtruth_labels_list = labels['groundtruth_labels'] anchors = prediction_dict['anchors'] box_encodings = prediction_dict['box_encodings'] class_predictions = prediction_dict[ 'class_predictions_with_background'] feature_maps = prediction_dict['feature_maps'] feature_masks = prediction_dict['feature_masks'] groundtruth_classes_with_background_list = [ tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT') for one_hot_encoding in groundtruth_classes_list ] cls_targets_list = [] cls_weights_list = [] reg_targets_list = [] reg_weights_list = [] cls_labels_list = [] matches_list = [] for gt_boxes, gt_classes, gt_lables in zip( groundtruth_boxes_list, groundtruth_classes_with_background_list, groundtruth_labels_list): cls_targets, cls_weights, reg_targets, reg_weights, \ cls_labels, matches = self.assign_targets( anchors, gt_boxes, gt_classes, gt_lables) cls_targets_list.append(cls_targets) cls_weights_list.append(cls_weights) reg_targets_list.append(reg_targets) reg_weights_list.append(reg_weights) cls_labels_list.append(cls_labels) matches_list.append(matches) batch_cls_targets = tf.stack(cls_targets_list) batch_cls_weights = tf.stack(cls_weights_list) batch_reg_targets = tf.stack(reg_targets_list) batch_reg_weights = tf.stack(reg_weights_list) batch_cls_labels = tf.stack(cls_labels_list) tf.logging.info('batch_cls_targets: %s', batch_cls_targets) tf.logging.info('batch_cls_weights: %s', batch_cls_weights) tf.logging.info('batch_reg_targets: %s', batch_reg_targets) tf.logging.info('batch_reg_weights: %s', batch_reg_weights) tf.logging.info('batch_cls_labels: %s', batch_cls_labels) self._summarize_target_assignment(groundtruth_boxes_list, matches_list) #loss location_losses = self._localization_loss( box_encodings, batch_reg_targets, ignore_nan_targets=True, weights=batch_reg_weights) cls_losses = ops.reduce_sum_trailing_dimensions( self._classification_loss(class_predictions, batch_cls_targets, weights=batch_cls_weights), ndims=2) tf.logging.info('location_losses: %s', location_losses) tf.logging.info('cls_losses: %s', cls_losses) if self._hard_example_miner: (localization_loss, classification_loss, selected_masks) = self.apply_hard_mining( location_losses, cls_losses, prediction_dict, matches_list) self._hard_example_miner.summarize() self.tensors_to_log.update( self._hard_example_miner.tensors_to_log()) else: selected_masks = tf.ones_like(location_losses, dtype=bool) localization_loss = tf.reduce_sum(location_losses) classification_loss = tf.reduce_sum(cls_losses) normalizer = tf.maximum( tf.to_float(tf.reduce_sum(batch_reg_weights)), 1.0) #tensor to log feature_loc_losses = tf.reduce_sum(tf.where( selected_masks, location_losses, tf.zeros_like(location_losses)), axis=0) feature_cls_losses = tf.reduce_sum(tf.where( selected_masks, cls_losses, tf.zeros_like(cls_losses)), axis=0) match_counts = tf.reduce_sum(batch_reg_weights, axis=0) selected_counts = tf.reduce_sum(tf.to_int32(selected_masks), axis=0) feature_loc_losses_list = [] feature_cls_losses_list = [] counts_list = [] selected_counts_list = [] for i, feature_map in enumerate(feature_maps): feature_mask = tf.equal(feature_masks, i) selected_count = tf.reduce_sum( tf.where(feature_mask, selected_counts, tf.zeros_like(selected_counts))) selected_counts_list.append(selected_count) feature_loc_losses_list.append( tf.reduce_sum( tf.where(feature_mask, feature_loc_losses, tf.zeros_like(feature_loc_losses))) / tf.to_float(selected_count)) feature_cls_losses_list.append( tf.reduce_sum( tf.where(feature_mask, feature_cls_losses, tf.zeros_like(feature_cls_losses))) / tf.to_float(selected_count)) counts = tf.reduce_sum( tf.where(feature_mask, match_counts, tf.zeros_like(match_counts))) counts_list.append(counts) feature_losses = tf.stack([ tf.to_float(feature_loc_losses_list), tf.to_float(feature_cls_losses_list) ], name='feature_losses') counts = tf.stack( [tf.to_int32(counts_list), tf.to_int32(selected_counts_list)], name='counts') self.tensors_to_log[feature_losses.op.name] = feature_losses self.tensors_to_log[counts.op.name] = counts class_loc_losses_list = [] class_cls_losses_list = [] class_counts_list = [] for i in range(self.num_classes + 1): class_mask = tf.logical_and(selected_masks, tf.equal(batch_cls_labels, i)) selected_count = tf.reduce_sum(tf.to_int32(class_mask)) class_counts_list.append(selected_count) class_loc_losses_list.append( tf.reduce_sum( tf.where(class_mask, location_losses, tf.zeros_like(location_losses))) / tf.to_float(selected_count)) class_cls_losses_list.append( tf.reduce_sum( tf.where(class_mask, cls_losses, tf.zeros_like(cls_losses))) / tf.to_float(selected_count)) label_count_loss = tf.stack([ tf.to_float(class_counts_list), tf.to_float(class_loc_losses_list), tf.to_float(class_cls_losses_list), ], name='label_count_loss') self.tensors_to_log[label_count_loss.op.name] = label_count_loss count = tf.bincount( tf.reshape(tf.to_int32(tf.concat(groundtruth_labels_list, 0)), [-1])) self.tensors_to_log['gt_classes_count'] = count #sigma * 1/N tf.summary.scalar('normalizer', normalizer) localization_loss_normalizer = normalizer localization_loss = tf.multiply((self._localization_loss_weight / localization_loss_normalizer), localization_loss, name='localization_loss') classification_loss = tf.multiply( (self._classification_loss_weight / normalizer), classification_loss, name='classification_loss') loss_dict = { str(localization_loss.op.name): localization_loss, str(classification_loss.op.name): classification_loss } return loss_dict
def crnn_fn(features, labels, mode, params): """ :param features: dict { 'images' 'images_widths' 'filenames' } :param labels: labels. flattend (1D) array with encoded label (one code per character) :param mode: :param params: dict { 'Params' } :return: """ parameters = params.get('Params') # 如果不是Params类型,报错 assert isinstance(parameters, Params) # 设置训练和其他阶段的dropout比例 if mode == tf.estimator.ModeKeys.TRAIN: parameters.keep_prob_dropout = 0.7 else: parameters.keep_prob_dropout = 1.0 # 开始执行网络-cnn阶段 128*32*304*3 -> 128*75*512 conv = resnet(features['images'], (mode == tf.estimator.ModeKeys.TRAIN), summaries=False) # rnn阶段 128*75*512 -> 75*128*3851 logprob, raw_pred = deep_bidirectional_lstm(conv, params=parameters, summaries=False) # 计算图片宽度 n_pools = CONST.DIMENSION_REDUCTION_W_POOLING # 2x2 pooling in dimension W on layer 1 and 2 # seq_len_inputs是输入到rnn图像的长度,在deep_cnn中,宽度减少了input_w/*/4 -1 seq_len_inputs = tf.divide(features['images_widths'], n_pools) - 1 # 构造输出词典 predictions_dict = { 'prob': logprob, 'raw_predictions': raw_pred, } try: predictions_dict['filenames'] = features['filenames'] except KeyError: pass if not mode == tf.estimator.ModeKeys.PREDICT: # Convert string label to code label 将字符串label转换成数字label,即,每个数字在字母表中的索引 # ************************************* start ************************************* # 当前tensorflow版本的string_split不支持utf8字符,采取一种折中方案,保存标签的时候就保存为索引,以'$'分隔 # 待支持后将下面代码解禁 # keys = [c for c in parameters.alphabet] # values = parameters.alphabet_codes # # # Convert string label to code label # with tf.name_scope('str2code_conversion'): # table_str2int = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(keys, values), -1) # splited = tf.string_split(labels, delimiter='') # TODO change string split to utf8 split in next tf version # codes = table_str2int.lookup(splited.values) # sparse_code_target = tf.SparseTensor(splited.indices, codes, splited.dense_shape) # ************************************* end **************************************** # ************临时解决方案的代码 -start**************************************************************************** with tf.name_scope('str2code_conversion'): splited = tf.string_split(labels, delimiter='$') sparse_code_target = tf.SparseTensor( splited.indices, tf.cast(tf.string_to_number(splited.values), tf.int32), splited.dense_shape) seq_lengths_labels = tf.bincount( tf.cast(sparse_code_target.indices[:, 0], tf.int32), minlength=tf.shape(predictions_dict['prob'])[1]) # ************临时解决方案的代码-end******************************************************************************* # 开始计算Loss # ---- # >>> Cannot have longer labels than predictions -> error with tf.control_dependencies([ tf.less_equal(sparse_code_target.dense_shape[1], tf.reduce_max(tf.cast(seq_len_inputs, tf.int64))) ]): loss_ctc = tf.nn.ctc_loss( labels=sparse_code_target, inputs=predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs=True, # returns zero gradient in case it happens -> ema loss = NaN time_major=True) loss_ctc = tf.reduce_mean(loss_ctc) global_step = tf.train.get_or_create_global_step() # 创建一个学习率指数衰减器 ema = tf.train.ExponentialMovingAverage(decay=0.99, num_updates=global_step, zero_debias=True) # Create the shadow variables, and add op to maintain moving averages maintain_averages_op = ema.apply([loss_ctc]) loss_ema = ema.average(loss_ctc) # 创建一个Train op 并且制定优化策略 # -------- learning_rate = tf.train.exponential_decay( parameters.learning_rate, global_step, parameters.learning_decay_steps, parameters.learning_decay_rate, staircase=True) if parameters.optimizer == 'ada': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif parameters.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5) elif parameters.optimizer == 'rms': optimizer = tf.train.RMSPropOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) opt_op = optimizer.minimize(loss_ctc, global_step=global_step) with tf.control_dependencies(update_ops + [opt_op]): train_op = tf.group(maintain_averages_op) # 写入tensorboard Summaries # --------- tf.summary.scalar('learning_rate', learning_rate) tf.summary.scalar('losses/ctc_loss', loss_ctc) else: loss_ctc, train_op = None, None if mode in [ tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.TRAIN ]: # 将预测的label转换为字符 with tf.name_scope('code2str_conversion'): # 构造hash表 keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64) values = [c for c in parameters.alphabet_decoding] table_int2str = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?') sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder( predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), merge_repeated=False, beam_width=100, top_paths=2) # Score predictions_dict['score'] = tf.subtract(log_probability[:, 0], log_probability[:, 1]) # around 10.0 -> seems pretty sure, less than 5.0 bit unsure, some errors/challenging images sparse_code_pred = sparse_code_pred[0] sequence_lengths_pred = tf.bincount( tf.cast(sparse_code_pred.indices[:, 0], tf.int32), minlength=tf.shape(predictions_dict['prob'])[1]) pred_chars = table_int2str.lookup(sparse_code_pred) predictions_dict['words'] = get_words_from_chars( pred_chars.values, sequence_lengths=sequence_lengths_pred) tf.summary.text('predicted_words', predictions_dict['words'][:10]) # 计算训练准确率 if mode == tf.estimator.ModeKeys.TRAIN: CER = tf.metrics.mean(tf.edit_distance( sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') # Convert label codes to decoding alphabet to compare predicted and groundtrouth words target_chars = table_int2str.lookup( tf.cast(sparse_code_target, tf.int64)) target_words = get_words_from_chars(target_chars.values, seq_lengths_labels) accuracy = tf.metrics.accuracy(target_words, predictions_dict['words'], name='accuracy') tf.identity(accuracy[1], name='train_accuracy') tf.summary.scalar('train_accuracy', accuracy[1]) # Evaluation ops # -------------- if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('evaluation'): CER = tf.metrics.mean(tf.edit_distance( sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') # Convert label codes to decoding alphabet to compare predicted and groundtrouth words target_chars = table_int2str.lookup( tf.cast(sparse_code_target, tf.int64)) target_words = get_words_from_chars(target_chars.values, seq_lengths_labels) accuracy = tf.metrics.accuracy(target_words, predictions_dict['words'], name='accuracy') eval_metric_ops = { 'eval/accuracy': accuracy, 'eval/CER': CER, } else: eval_metric_ops = None # 需要输出的op export_outputs = { 'predictions': tf.estimator.export.PredictOutput(predictions_dict) } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions_dict, loss=loss_ctc, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=tf.train.Scaffold() # scaffold=tf.train.Scaffold(init_fn=None) # Specify init_fn to restore from previous model )
def crnn_fn(features, labels, mode, params): """ :param features: dict { 'image' 'images_width' 'corpora' } :param labels: labels. flattend (1D) array with encoded label (one code per character) :param mode: :param params: dict { 'Params' } :return: """ parameters = params.get('Params') assert isinstance(parameters, Params) # Load pre-trained cnn model if parameters.cnn_pretained_ckpt_path: exclude = ['deep_bidirectional_lstm'] variables_to_restore = tf.contrib.slim.get_variables_to_restore( exclude=exclude) tf.train.init_from_checkpoint( parameters.cnn_pretained_ckpt_path, {v.name.split(':')[0]: v for v in variables_to_restore}) if mode != tf.estimator.ModeKeys.TRAIN: parameters.keep_prob_dropout = 1.0 conv = deep_cnn(features['image'], (mode == tf.estimator.ModeKeys.TRAIN), parameters.cnn_model, summaries=False) logprob, raw_pred = deep_bidirectional_lstm(conv, features['corpus'], params=parameters, summaries=False) # Compute seq_len from image width n_pools = parameters.width_down_sampling seq_len_inputs = tf.divide( features['image_width'], n_pools, name='seq_len_input_op') - 1 predictions_dict = {'prob': logprob, 'raw_predictions': raw_pred} if not mode == tf.estimator.ModeKeys.PREDICT: # Alphabet and codes keys = [c for c in parameters.alphabet.encode('latin1')] values = parameters.alphabet_codes # Convert string label to code label with tf.name_scope('str2code_conversion'): table_str2int = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer( keys, values, key_dtype=tf.int64, value_dtype=tf.int64), -1) splitted = tf.string_split(labels, delimiter='') values_int = tf.cast( tf.squeeze(tf.decode_raw(splitted.values, tf.uint8)), tf.int64) codes = table_str2int.lookup(values_int) codes = tf.cast(codes, tf.int32) sparse_code_target = tf.SparseTensor(splitted.indices, codes, splitted.dense_shape) seq_lengths_labels = tf.bincount( tf.cast(sparse_code_target.indices[:, 0], tf.int32), #array of labels length minlength=tf.shape(predictions_dict['prob'])[1]) # Loss # ---- # >>> Cannot have longer labels than predictions -> error with tf.control_dependencies([ tf.less_equal(sparse_code_target.dense_shape[1], tf.reduce_max(tf.cast(seq_len_inputs, tf.int64))) ]): loss_ctc = tf.nn.ctc_loss( labels=sparse_code_target, inputs=predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs= True, # returns zero gradient in case it happens -> ema loss = NaN time_major=True) loss_ctc = tf.reduce_mean(loss_ctc) loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ') global_step = tf.train.get_or_create_global_step() # # Create an ExponentialMovingAverage object ema = tf.train.ExponentialMovingAverage(decay=0.99, num_updates=global_step, zero_debias=True) # Create the shadow variables, and add op to maintain moving averages maintain_averages_op = ema.apply([loss_ctc]) loss_ema = ema.average(loss_ctc) # Train op # -------- if parameters.learning_rate_decay: learning_rate = tf.train.exponential_decay( parameters.learning_rate, global_step, parameters.learning_rate_steps, parameters.learning_rate_decay, staircase=True) else: learning_rate = tf.constant(parameters.learning_rate) if parameters.optimizer == 'ada': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif parameters.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) elif parameters.optimizer == 'adam': optimizer = tf.train.AdamOptimizer( learning_rate, beta1=0.5, epsilon=1e-07) # at 1e-08 sometimes exploding gradient elif parameters.optimizer == 'rms': optimizer = tf.train.RMSPropOptimizer(learning_rate) if not parameters.train_cnn: trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'deep_bidirectional_lstm') print('Training LSTM only') else: trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) opt_op = optimizer.minimize(loss_ctc, global_step=global_step, var_list=trainable) with tf.control_dependencies(update_ops + [opt_op]): train_op = tf.group(maintain_averages_op) # Summaries # --------- tf.summary.scalar('learning_rate', learning_rate) tf.summary.scalar('losses/ctc_loss', loss_ctc) else: loss_ctc, train_op = None, None if mode in [ tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.TRAIN ]: with tf.name_scope('code2str_conversion'): keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64) values = [c for c in parameters.alphabet_decoding] table_int2str = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?') sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder( predictions_dict['prob'], sequence_length=tf.cast(seq_len_inputs, tf.int32), merge_repeated=False, beam_width=100, top_paths=parameters.nb_logprob) # likelihoood. For future rename it as confidence and take softmax of log_probability predictions_dict['score'] = log_probability sequence_lengths_pred = [ tf.bincount(tf.cast(sparse_code_pred[i].indices[:, 0], tf.int32), minlength=tf.shape(predictions_dict['prob'])[1]) for i in range(parameters.top_paths) ] pred_chars = [ table_int2str.lookup(sparse_code_pred[i]) for i in range(parameters.top_paths) ] list_preds = [ get_words_from_chars(pred_chars[i].values, sequence_lengths=sequence_lengths_pred[i]) for i in range(parameters.top_paths) ] predictions_dict['words'] = tf.stack(list_preds) tf.summary.text('predicted_words', predictions_dict['words'][0][:10]) # Evaluation ops # -------------- if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('evaluation'): CER = tf.metrics.mean(tf.edit_distance( sparse_code_pred[0], tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') # Convert label codes to decoding alphabet to compare predicted and groundtrouth words target_chars = table_int2str.lookup( tf.cast(sparse_code_target, tf.int64)) target_words = get_words_from_chars(target_chars.values, seq_lengths_labels) accuracy = tf.metrics.accuracy(target_words, predictions_dict['words'][0], name='accuracy') eval_metric_ops = { 'eval/accuracy': accuracy, 'eval/CER': CER, } CER = tf.Print(CER, [CER], message='-- CER : ') accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ') else: eval_metric_ops = None export_outputs = { 'predictions': tf.estimator.export.PredictOutput(predictions_dict) } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions_dict, loss=loss_ctc, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=tf.train.Scaffold())
import tensorflow as tf import os os.environ["CUDA_VISIBLE_DEVICES"] = "1" a = [0, 1, 1, 2, 2, 10] sess = tf.Session() print(sess.run(tf.bincount(a)))
def _shadownet_fun(features, labels, mode, params): is_training = (mode == tf.estimator.ModeKeys.TRAIN) tower_features = features tower_labels = labels tower_losses = [] tower_gradvars = [] tower_preds = [] tower_tensor_dict = [] tower_seq_len = [] num_devices = num_gpus device_type = 'gpu' tower_batch_size = int(params.batch_size / num_devices) for i in range(num_devices): worker_device = '/{}:{}'.format(device_type, i) device_setter = local_device_setter(worker_device=worker_device) with tf.variable_scope('shadownet', reuse=bool(i != 0)): with tf.name_scope('tower_%d' % i) as name_scope: with tf.device(device_setter): loss, gradvars, preds, tensor_dict, seq_len = _tower_fn( is_training, tower_features[i], tower_labels[i], tower_batch_size, params.l_size) tower_losses.append(loss) tower_gradvars.append(gradvars) tower_preds.append(preds) tower_tensor_dict.append(tensor_dict) tower_seq_len.append(seq_len) if i == 0: # Only trigger batch_norm moving mean and variance update from # the 1st tower. Ideally, we should grab the updates from all # towers but these stats accumulate extremely fast so we can # ignore the other stats from the other towers without # significant detriment. update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS, name_scope) # Now compute global loss and gradients. gradvars = [] with tf.name_scope('gradient_averaging'): all_grads = {} for grad, var in itertools.chain(*tower_gradvars): if grad is not None: all_grads.setdefault(var, []).append(grad) for var, grads in six.iteritems(all_grads): # Average gradients on the same device as the variables with tf.device(var.device): if len(grads) == 1: avg_grad = grads[0] else: avg_grad = tf.multiply(tf.add_n(grads), 1. / len(grads)) gradvars.append((avg_grad, var)) # Device that runs the ops to apply global gradient updates. consolidation_device = '/gpu:0' if variable_strategy == 'GPU' else '/cpu:0' with tf.device(consolidation_device): global_step = tf.train.get_global_step() starter_learning_rate = params.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, params.decay_steps, params.decay_rate, staircase=True) loss = tf.reduce_mean(tower_losses, name='loss') decoded, log_prob = tf.nn.ctc_beam_search_decoder( tower_preds[0], tower_seq_len[0] * np.ones(tower_batch_size), merge_repeated=False) sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), tower_labels[0])) sequence_lengths_pred = tf.bincount( tf.cast(decoded[0].indices[:, 0], tf.int32), minlength=tf.shape(tower_labels[0])[1]) label_lengths_pred = tf.bincount( tf.cast(labels[0].indices[:, 0], tf.int32), minlength=tf.shape(tower_labels[0])[1]) tensors_to_log = { 'global_step': global_step, 'learning_rate': learning_rate, 'loss': loss } dist_to_log = { 'global_step': global_step, 'learning_rate': learning_rate, 'loss': loss, 'train_seq_dist': sequence_dist, 'sequence_lengths_pred': sequence_lengths_pred, 'label_lengths_pred': label_lengths_pred } logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=10) dist_hook = tf.train.LoggingTensorHook(tensors=dist_to_log, every_n_iter=1000) train_hooks = [logging_hook, dist_hook] seq_dist_sum = tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist) lr_sum = tf.summary.scalar(name='Learning_rate', tensor=learning_rate) summaries = [seq_dist_sum, lr_sum] summary_hook = tf.train.SummarySaverHook( save_steps=1000, output_dir='/data/output/', summary_op=summaries) optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate) if params.sync: optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=num_workers) sync_replicas_hook = optimizer.make_session_run_hook( params.is_chief) train_hooks.append(sync_replicas_hook) # Create single grouped train op train_op = [ optimizer.apply_gradients( gradvars, global_step=tf.train.get_global_step()) ] train_op.extend(update_ops) train_op = tf.group(*train_op) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=train_hooks)
def should_continue_translating(self, model, stack): """ Returns a bool vector for all hypotheses where True means hypo should be kept, 0 means it should be dropped. A hypothesis is dropped if it is either finished or pruned by beam_spread or by beam_size Note: this function assumes hypotheses for each input sample are sorted by scores(best first)!!! """ # drop finished hypotheses should_keep = tf.logical_not( tf.reduce_any(tf.equal(stack.out, model.out_voc.eos), axis=-1)) # [batch_size x beam_size] n_hypos = tf.shape(stack.out)[0] batch_size = tf.shape(stack.best_out)[0] batch_indices = hypo_to_batch_index(n_hypos, stack.slices) # prune by length if self.max_len is not None: within_max_length = tf.less_equal(stack.out_len, self.max_len) # if we're given one max_len per each sentence, repeat it for each batch if not is_scalar(self.max_len): within_max_length = tf.gather(within_max_length, batch_indices) should_keep = tf.logical_and( should_keep, within_max_length, ) # prune by beam spread if self.beam_spread is not None: best_scores_for_hypos = tf.gather(stack.best_scores, batch_indices) pruned_by_spread = tf.less(stack.scores + self.beam_spread, best_scores_for_hypos) should_keep = tf.logical_and(should_keep, tf.logical_not(pruned_by_spread)) if self.beam_spread_raw: best_raw_scores_for_hypos = tf.gather(stack.best_raw_scores, batch_indices) pruned_by_raw_spread = tf.less(stack.raw_scores + self.beam_spread_raw, best_raw_scores_for_hypos) should_keep = tf.logical_and(should_keep, tf.logical_not(pruned_by_raw_spread)) # pruning anything exceeding beam_size if self.beam_size is not None: # This code will use a toy example to explain itself: slices=[0,2,5,5,8], n_hypos=10, beam_size=2 # should_keep = [1,1,1,0,1,1,1,1,0,1] (two hypotheses have been pruned/finished) # 1. compute index of each surviving hypothesis globally over full batch, [0,1,2,3,3,4,5,6,7,7] survived_hypo_id = tf.cumsum(tf.cast(should_keep, 'int32'), exclusive=True) # 2. compute number of surviving hypotheses for each batch sample, [2,2,3,1] survived_hypos_per_input = tf.bincount(batch_indices, weights=tf.cast(should_keep, 'int32'), minlength=batch_size, maxlength=batch_size) # 3. compute the equivalent of slices for hypotheses excluding pruned: [0,2,4,4,7] slices_exc_pruned = tf.cumsum(survived_hypos_per_input, exclusive=True) # 4. compute index of surviving hypothesis within one sample (for each sample) # index of input sentence in batch: inp0 /inp_1\ /inp_2\, /inp_3\ # index of hypothesis within input: [0, 1, 0, 1, 1, 0, 1, 2, 0, 0, 1] # 'e' = pruned earlier, 'x' - pruned now: 'e' 'x' 'e' beam_index = survived_hypo_id - tf.gather(slices_exc_pruned, batch_indices) # 5. prune hypotheses with index exceeding beam_size pruned_by_beam_size = tf.greater_equal(beam_index, self.beam_size) should_keep = tf.logical_and(should_keep, tf.logical_not(pruned_by_beam_size)) return should_keep
def expected_calibration_error(y_true, y_pred, nbins=20): """Calculates Expected Calibration Error (ECE). ECE is a scalar summary statistic of calibration error. It is the sample-weighted average of the difference between the predicted and true probabilities of a positive detection across uniformly-spaced model confidences [0, 1]. See referenced paper for a thorough explanation. Reference: Guo, et. al, "On Calibration of Modern Neural Networks" Page 2, Expected Calibration Error (ECE). https://arxiv.org/pdf/1706.04599.pdf This function creates three local variables, `bin_counts`, `bin_true_sum`, and `bin_preds_sum` that are used to compute ECE. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the ECE. Args: y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each prediction in y_pred. y_pred: 1-D tf.float32 tensor of model confidence scores in range [0.0, 1.0]. nbins: int specifying the number of uniformly-spaced bins into which y_pred will be bucketed. Returns: value_op: A value metric op that returns ece. update_op: An operation that increments the `bin_counts`, `bin_true_sum`, and `bin_preds_sum` variables appropriately and whose value matches `ece`. Raises: InvalidArgumentError: if y_pred is not in [0.0, 1.0]. """ bin_counts = metrics_impl.metric_variable([nbins], tf.float32, name='bin_counts') bin_true_sum = metrics_impl.metric_variable([nbins], tf.float32, name='true_sum') bin_preds_sum = metrics_impl.metric_variable([nbins], tf.float32, name='preds_sum') with tf.control_dependencies([ tf.assert_greater_equal(y_pred, 0.0), tf.assert_less_equal(y_pred, 1.0), ]): bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0], nbins=nbins) with tf.control_dependencies([bin_ids]): update_bin_counts_op = tf.assign_add( bin_counts, tf.cast(tf.bincount(bin_ids, minlength=nbins), dtype=tf.float32)) update_bin_true_sum_op = tf.assign_add( bin_true_sum, tf.cast(tf.bincount(bin_ids, weights=y_true, minlength=nbins), dtype=tf.float32)) update_bin_preds_sum_op = tf.assign_add( bin_preds_sum, tf.cast(tf.bincount(bin_ids, weights=y_pred, minlength=nbins), dtype=tf.float32)) ece_update_op = _ece_from_bins(update_bin_counts_op, update_bin_true_sum_op, update_bin_preds_sum_op, name='update_op') ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value') return ece, ece_update_op
def __init__(self, preds, labels, model, num_nodes, pos_weight, norm, target_list, global_step, new_learning_rate, if_drop_edge=True): """ The initial functions :param preds: it is not used in model :param labels: it is not used in model :param model: the model built from cdattack.py :param num_nodes: the number of the nodes :param pos_weight: not used in the model :param norm: not used in the model :param target_list: the target nodes: core members :param global_step: the global learning steps of model :param new_learning_rate: teh learning rate :param if_drop_edge: if drop the edges when learning the model """ en_preds_sub = preds en_labels_sub = labels self.opt_op = 0 # this is the minimize function self.cost = 0 # this is the loss self.accuracy = 0 # this is the accuracy self.G_comm_loss = 0 self.G_comm_loss_KL = 0 self.num_nodes = num_nodes self.if_drop_edge = if_drop_edge # this is for vae, it contains two parts of losses: self.generate_optimizer = tf.train.RMSPropOptimizer( learning_rate=new_learning_rate) self.community_optimizer = tf.train.RMSPropOptimizer( learning_rate=new_learning_rate) generate_varlist = [ var for var in tf.trainable_variables() if ('generate' in var.name) or ('encoder' in var.name) ] # the first part is generator and the second part is community detection community_varlist = [ var for var in tf.trainable_variables() if 'community' in var.name ] #################### the new G_comm_loss for targets in target_list: targets_indices = [[x] for x in targets] self.G_target_pred = tf.gather_nd(model.vaeD_tilde, targets_indices) ## calculate the KL divergence for i in range(len(targets)): for j in range(i + 1, len(targets)): if (i == 0) and (j == 1): self.G_comm_loss_KL = -1 * tf.reduce_sum( (self.G_target_pred[i] * tf.log(self.G_target_pred[i] / self.G_target_pred[j]))) else: self.G_comm_loss_KL += -1 * tf.reduce_sum( (self.G_target_pred[i] * tf.log(self.G_target_pred[i] / self.G_target_pred[j]))) # to maximize the KL is to minimize the neg KL ###################################################### ###################################################### if if_drop_edge == True: self.mu = 0 ## the new G_comm_loss for idx, targets in enumerate(target_list): target_pred = tf.gather(model.vaeD_tilde, targets) max_index = tf.argmax(target_pred, axis=1) max_index = tf.cast(max_index, tf.int32) if idx == 0: self.mu = ((len(tf.unique(max_index)) - 1) / (np.max([FLAGS.n_clusters - 1, 1]) * (tf.reduce_max(tf.bincount(max_index))))) else: self.mu += ((len(tf.unique(max_index)) - 1) / (np.max([FLAGS.n_clusters - 1, 1]) * (tf.reduce_max(tf.bincount(max_index))))) self.mu = tf.cast(self.mu, tf.float32) eij = tf.gather_nd(model.x_tilde_deleted, tf.where(model.x_tilde_deleted > 0)) eij = tf.reduce_sum(tf.log(eij)) self.G_comm_loss = ( -1) * self.mu * eij + FLAGS.G_KL_r * self.G_comm_loss_KL ###################################################### # because the generate part is only inner product , there is no variable to optimize, we should change the format and try again self.G_min_op = self.generate_optimizer.minimize( self.G_comm_loss, global_step=global_step, var_list=generate_varlist) ####################################################### ## the cutminloss for community detection # if it is the modified model if if_drop_edge == True: A_pool = tf.matmul( tf.transpose(tf.matmul(model.adj_ori_dense, model.vaeD_tilde)), model.vaeD_tilde) num = tf.diag_part(A_pool) D = tf.reduce_sum(model.adj_ori_dense, axis=-1) D = tf.matrix_diag(D) D_pooled = tf.matmul(tf.transpose(tf.matmul(D, model.vaeD_tilde)), model.vaeD_tilde) den = tf.diag_part(D_pooled) D_mincut_loss = -(1 / FLAGS.n_clusters) * (num / den) D_mincut_loss = tf.reduce_sum(D_mincut_loss) ## the orthogonal part loss St_S = (FLAGS.n_clusters / self.num_nodes) * tf.matmul( tf.transpose(model.vaeD_tilde), model.vaeD_tilde) I_S = tf.eye(FLAGS.n_clusters) # here is I_k ortho_loss = tf.square(tf.norm(St_S - I_S)) ## the overall cutmin_loss self.D_mincut_loss = D_mincut_loss + FLAGS.mincut_r * ortho_loss ###### at first we need to train the community detection with clean one A_pool_clean = tf.matmul( tf.transpose(tf.matmul(model.adj_ori_dense, model.realD_tilde)), model.realD_tilde) num_clean = tf.diag_part(A_pool_clean) D_clean = tf.reduce_sum(model.adj_ori_dense, axis=-1) D_clean = tf.matrix_diag(D_clean) D_pooled_clean = tf.matmul( tf.transpose(tf.matmul(D_clean, model.realD_tilde)), model.realD_tilde) den_clean = tf.diag_part(D_pooled_clean) D_mincut_loss_clean = -(1 / FLAGS.n_clusters) * (num_clean / den_clean) D_mincut_loss_clean = tf.reduce_sum(D_mincut_loss_clean) ## the orthogonal part loss St_S_clean = (FLAGS.n_clusters / self.num_nodes) * tf.matmul( tf.transpose(model.realD_tilde), model.realD_tilde) I_S_clean = tf.eye(FLAGS.n_clusters) ortho_loss_clean = tf.square(tf.norm(St_S_clean - I_S_clean)) self.D_mincut_loss_clean = D_mincut_loss_clean + FLAGS.mincut_r * ortho_loss_clean ######## self.D_min_op_clean = self.community_optimizer.minimize( self.D_mincut_loss_clean, global_step=global_step, var_list=community_varlist) ###################################### the clean community detection model loss ################## else: A_pool = tf.matmul( tf.transpose(tf.matmul(model.adj_ori_dense, model.realD_tilde)), model.realD_tilde) num = tf.diag_part(A_pool) D = tf.reduce_sum(model.adj_ori_dense, axis=-1) D = tf.matrix_diag(D) D_pooled = tf.matmul(tf.transpose(tf.matmul(D, model.realD_tilde)), model.realD_tilde) den = tf.diag_part(D_pooled) D_mincut_loss = -(1 / FLAGS.n_clusters) * (num / den) D_mincut_loss = tf.reduce_sum(D_mincut_loss) ## the orthogonal part loss St_S = (FLAGS.n_clusters / self.num_nodes) * tf.matmul( tf.transpose(model.realD_tilde), model.realD_tilde) I_S = tf.eye(FLAGS.n_clusters) ortho_loss = tf.square(tf.norm(St_S - I_S)) ## the overall cutmin_loss self.D_mincut_loss_test = D_mincut_loss + FLAGS.mincut_r * ortho_loss ######## if self.if_drop_edge == False: self.D_min_op = self.community_optimizer.minimize( self.D_mincut_loss_test, global_step=global_step, var_list=community_varlist) else: self.D_min_op = self.community_optimizer.minimize( self.D_mincut_loss, global_step=global_step, var_list=community_varlist) ## this part is not correct now self.correct_prediction = tf.equal( tf.cast(tf.greater_equal(tf.sigmoid(model.realD_tilde), 0.5), tf.int32), tf.cast(tf.ones_like(model.realD_tilde), tf.int32)) self.D_accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) return
def ctc_loss(prob, labels, input_shape, alphabet, alphabet_codes, batch_size, n_pools=2 * 2, decode=True): # Compute seq_len from image width # 2x2 pooling in dimension W on layer 1 and 2 -> n-pools = 2*2 seq_len_inputs = tf.divide( [input_shape[1]] * batch_size, n_pools, name='seq_len_input_op') - 1 # Get keys (letters) and values (integer stand ins for letters) # Alphabet and codes keys = [c for c in alphabet] # the letters themselves values = alphabet_codes # integer representations # Create non-string labels from the keys and values above # Convert string label to code label with tf.name_scope('str2code_conversion'): table_str2int = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), -1) splited = tf.string_split( labels, delimiter='' ) # TODO change string split to utf8 split in next tf version codes = table_str2int.lookup(splited.values) sparse_code_target = tf.SparseTensor(splited.indices, codes, splited.dense_shape) seq_lengths_labels = tf.bincount(tf.cast(sparse_code_target.indices[:, 0], tf.int32), minlength=tf.shape(prob)[1]) # Use ctc loss on probabilities from lstm output # Loss # ---- # >>> Cannot have longer labels than predictions -> error with tf.control_dependencies([ tf.less_equal(sparse_code_target.dense_shape[1], tf.reduce_max(tf.cast(seq_len_inputs, tf.int64))) ]): loss_ctc = tf.nn.ctc_loss( labels=sparse_code_target, inputs=prob, sequence_length=tf.cast(seq_len_inputs, tf.int32), preprocess_collapse_repeated=False, ctc_merge_repeated=True, ignore_longer_outputs_than_inputs= True, # returns zero gradient in case it happens -> ema loss = NaN time_major=True) loss_ctc = tf.reduce_mean(loss_ctc) # loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ') if decode: with tf.name_scope('code2str_conversion'): keys = tf.cast(alphabet_codes, tf.int64) values = [c for c in alphabet] table_int2str = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?') sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder( prob, sequence_length=tf.cast(seq_len_inputs, tf.int32), merge_repeated=False, beam_width=100, top_paths=2) # Score pred_score = tf.subtract(log_probability[:, 0], log_probability[:, 1]) sparse_code_pred = sparse_code_pred[0] sequence_lengths_pred = tf.bincount(tf.cast( sparse_code_pred.indices[:, 0], tf.int32), minlength=tf.shape(prob)[1]) pred_chars = table_int2str.lookup(sparse_code_pred) words = get_words_from_chars( pred_chars.values, sequence_lengths=sequence_lengths_pred) # tf.summary.text('predicted_words', words[:10]) with tf.name_scope('evaluation'): CER = tf.metrics.mean(tf.edit_distance( sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') CER = tf.reduce_mean(tf.edit_distance( sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)), name='CER') # Convert label codes to decoding alphabet to compare predicted and groundtrouth words target_chars = table_int2str.lookup( tf.cast(sparse_code_target, tf.int64)) target_words = get_words_from_chars(target_chars.values, seq_lengths_labels) accuracy = tf.metrics.accuracy(target_words, words, name='accuracy') # CER = tf.Print(CER, [CER], message='-- CER : ') # accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ') else: CER = None accuracy = None return loss_ctc, words, pred_score, CER, accuracy