def to_weighted_sum(self, input_tensor, num_outputs=1, weight_collections=None, trainable=True): """Returns a Tensor as linear predictions and a list of created Variable.""" dimension = self.source_column.dimension batch_size = array_ops.shape(input_tensor)[0] if dimension > 1: i1 = array_ops.reshape(array_ops.tile(array_ops.expand_dims( math_ops.range(0, batch_size), 1), [1, dimension]), [-1]) i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size]) # Flatten the bucket indices and unique them across dimensions # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets # TODO(chapelle): move that logic to insert_transformed_feature to ensure # unique buckets across dimensions after crossing. bucket_indices = array_ops.reshape(input_tensor, [-1]) + self.length * i2 else: # Simpler indices when dimension=1 i1 = math_ops.range(0, batch_size) i2 = array_ops.zeros([batch_size], dtype=dtypes.int32) bucket_indices = array_ops.reshape(input_tensor, [-1]) indices = math_ops.to_int64(array_ops.transpose(array_ops.pack((i1, i2)))) shape = math_ops.to_int64(array_ops.pack([batch_size, 1])) sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape) vocab_size = self.length * self.source_column.dimension return _create_embedding_lookup( sparse_id_values, vocab_size, num_outputs, _add_variable_collection(weight_collections), 0., "sum", trainable, self.name + "_weights")
def _get_eval_ops(self, features, targets, metrics): features, _, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) _assert_float32(features) _assert_float32(labels) graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, training=False, **self.construction_args) probabilities = graph_builder.inference_graph(features, data_spec=spec) # One-hot the labels. if not self.params.regression: labels = math_ops.to_int64(array_ops.one_hot(math_ops.to_int64( array_ops.squeeze(labels)), self.params.num_classes, 1, 0)) if metrics is None: metrics = {self.accuracy_metric: eval_metrics.get_metric(self.accuracy_metric)} result = {} for name, metric in six.iteritems(metrics): result[name] = metric(probabilities, labels) return result
def generate_sequence_output(num_encoder_symbols, encoder_outputs, encoder_state, targets,sequence_length, num_decoder_symbols, weights, buckets, softmax_loss_function=None, per_example_loss=False, name=None, use_attention=False): if len(targets) < buckets[-1][1]: raise ValueError("Length of targets (%d) must be at least that of last" "bucket (%d)." % (len(targets), buckets[-1][1])) all_inputs = encoder_outputs + targets + weights with ops.op_scope(all_inputs, name, "model_with_buckets"): with variable_scope.variable_scope("decoder_sequence_output", reuse=None): logits, attention_weights = attention_RNN(encoder_outputs, encoder_state, num_decoder_symbols, sequence_length, use_attention=use_attention) if per_example_loss is None: assert len(logits) == len(targets) # We need to make target and int64-tensor and set its shape. bucket_target = [array_ops.reshape(math_ops.to_int64(x), [-1]) for x in targets] crossent = sequence_loss_by_example( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) else: assert len(logits) == len(targets) bucket_target = [array_ops.reshape(math_ops.to_int64(x), [-1]) for x in targets] crossent = sequence_loss( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) return logits, crossent
def _call_cell(self, inputs, initial_cell_state=None, initial_output=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len)` or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size, output_size]` - Output (h): A `3-D` tensor of shape `[time_len, batch_size, output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] if self._use_peephole: wci = self._w_i_diag wco = self._w_o_diag wcf = self._w_f_diag else: wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = math_ops.to_int64(time_len) else: max_seq_len = math_ops.to_int64(math_ops.reduce_max(sequence_length)) _, cs, _, _, _, _, h = gen_lstm_ops.block_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=self._kernel, wci=wci, wcf=wcf, wco=wco, b=self._bias, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0, name=None): """Crosses a list of Tensor or SparseTensor objects. See sparse_cross_op.cc for more details. Args: inputs: List of `SparseTensor` or `Tensor` to be crossed. hashed_output: If true, returns the hash of the cross instead of the string. This will allow us avoiding string manipulations. num_buckets: It is used if hashed_output is true. output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. name: A name prefix for the returned tensors (optional). Returns: A `SparseTensor` with the crossed features. Return type is string if hashed_output=False, int64 otherwise. Raises: TypeError: If the inputs aren't either SparseTensor or Tensor. """ if not isinstance(inputs, list): raise TypeError("Inputs must be a list") if not all(isinstance(i, ops.SparseTensor) or isinstance(i, ops.Tensor) for i in inputs): raise TypeError("All inputs must be SparseTensors") sparse_inputs = [i for i in inputs if isinstance(i, ops.SparseTensor)] dense_inputs = [i for i in inputs if not isinstance(i, ops.SparseTensor)] indices = [sp_input.indices for sp_input in sparse_inputs] values = [sp_input.values for sp_input in sparse_inputs] shapes = [sp_input.shape for sp_input in sparse_inputs] out_type = dtypes.int64 if hashed_output else dtypes.string internal_type = dtypes.string for i in range(len(values)): if values[i].dtype != dtypes.string: values[i] = math_ops.to_int64(values[i]) internal_type = dtypes.int64 for i in range(len(dense_inputs)): if dense_inputs[i].dtype != dtypes.string: dense_inputs[i] = math_ops.to_int64(dense_inputs[i]) internal_type = dtypes.int64 indices_out, values_out, shape_out = ( _sparse_feature_cross_op.sparse_feature_cross(indices, values, shapes, dense_inputs, hashed_output, num_buckets, out_type=out_type, internal_type=internal_type, name=name)) return ops.SparseTensor(indices_out, values_out, shape_out)
def make_splits(self, stamp_token, next_stamp_token, class_id): """Create the best split using the accumulated stats and flush the state.""" # Get the aggregated gradients and hessians per <partition_id, feature_id> # pair. num_minibatches, partition_ids, feature_ids, gradients, hessians = ( self._stats_accumulator.flush(stamp_token, next_stamp_token)) # For sum_reduction, we don't need to divide by number of minibatches. num_minibatches = control_flow_ops.cond( ops.convert_to_tensor(self._loss_uses_sum_reduction), lambda: math_ops.to_int64(1), lambda: num_minibatches) partition_ids, gains, split_infos = ( split_handler_ops.build_categorical_equality_splits( num_minibatches=num_minibatches, partition_ids=partition_ids, feature_ids=feature_ids, gradients=gradients, hessians=hessians, class_id=class_id, feature_column_group_id=self._feature_column_group_id, l1_regularization=self._l1_regularization, l2_regularization=self._l2_regularization, tree_complexity_regularization=self._tree_complexity_regularization, min_node_weight=self._min_node_weight, bias_feature_id=_BIAS_FEATURE_ID, multiclass_strategy=self._multiclass_strategy, weak_learner_type=self._weak_learner_type)) # There are no warm-up rounds needed in the equality column handler. So we # always return ready. are_splits_ready = constant_op.constant(True) return (are_splits_ready, partition_ids, gains, split_infos)
def Backward(*args): """Backward pass for the recurrent net.""" # theta, state0, inputs are Forward's inputs. # acc_state is the accumulated 1st output of Forward. # acc_extras is the accumulated 2nd output of Forward. # d_acc_state is the gradient for acc_state. # d_state1 is the gradient for the final state computed by Forward. (theta, state0, inputs, max_input_length, acc_state, acc_extras, d_acc_state, d_state1) = _Pack(args, backward_sig) # Accumulators for gradients. d_theta = _EmptyLike(theta) d_inputs = _EmptyLike(inputs) # Loop backwards. Note the loop's limit is open-ended, so goes through # t=0. t = max_input_length - 1 dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t) run = functional_ops.For( start=t, limit=-1, delta=-1, inputs=[dev_t] + _Flatten([ theta, state0, inputs, acc_state, acc_extras, d_theta, d_state1, d_inputs, d_acc_state ]), body=BackwardLoopBody, rewrite_with_while=compiled) (theta, state0, inputs, acc_state, acc_extras, d_theta, d_state0, d_inputs, d_acc_state) = _Pack(run[1:], bakloop_sig) d_max_input_length = array_ops.constant(0, dtype=max_input_length.dtype) return _Flatten( [d_theta, d_state0, d_inputs, d_max_input_length, acc_extras])
def index_to_string_table_from_tensor(vocabulary_list, default_value="UNK", name=None): """Returns a lookup table that maps a `Tensor` of indices into strings. This operation constructs a lookup table to map int64 indices into string values. The mapping is initialized from a string `mapping` 1-D `Tensor` where each element is a value and the corresponding index within the tensor is the key. Any input which does not have a corresponding index in 'mapping' (an out-of-vocabulary entry) is assigned the `default_value` The underlying table must be initialized by calling `tf.tables_initializer.run()` or `table.init.run()` once. Elements in `mapping` cannot have duplicates, otherwise when executing the table initializer op, it will throw a `FailedPreconditionError`. Sample Usages: ```python vocabulary_list = tf.constant(["emerson", "lake", "palmer"]) indices = tf.constant([1, 5], tf.int64) table = tf.contrib.lookup.index_to_string_table_from_tensor( vocabulary_list, default_value="UNKNOWN") values = table.lookup(indices) ... tf.tables_initializer().run() values.eval() ==> ["lake", "UNKNOWN"] ``` Args: vocabulary_list: A 1-D string `Tensor` that specifies the strings to map from indices. default_value: The value to use for out-of-vocabulary indices. name: A name for this op (optional). Returns: The lookup table to map a string values associated to a given index `int64` `Tensors`. Raises: ValueError: when `vocabulary_list` is not set. """ if vocabulary_list is None: raise ValueError("vocabulary_list must be specified.") with ops.name_scope(name, "index_to_string") as scope: vocabulary_list = ops.convert_to_tensor(vocabulary_list, dtypes.string) num_elements = array_ops.size(vocabulary_list) keys = math_ops.to_int64(math_ops.range(num_elements)) shared_name = "" init = KeyValueTensorInitializer( keys, vocabulary_list, dtypes.int64, dtypes.string, name="table_init") # TODO(yleon): Use a more effienct structure. return HashTable(init, default_value, shared_name=shared_name, name=scope)
def split(self, value, lengths, name=None): """See TensorArray.""" with ops.name_scope(name, "TensorArraySplit", [self._flow, value, lengths]): value = ops.convert_to_tensor(value, name="value") lengths_64 = math_ops.to_int64(lengths) if self._infer_shape and not context.executing_eagerly(): clengths = tensor_util.constant_value(lengths_64) if value.shape.dims is not None: if clengths is not None and clengths.max() == clengths.min(): self._merge_element_shape( tensor_shape.TensorShape([clengths[0]]).concatenate( value.shape[1:])) flow_out = list_ops.tensor_list_split( tensor=value, lengths=lengths_64, element_shape=self._element_shape[0] if self._element_shape else None, name=name) ta = TensorArray( dtype=self._dtype, handle=self.handle, flow=flow_out, colocate_with_first_write_call=self._colocate_with_first_write_call) ta._infer_shape = self._infer_shape ta._element_shape = self._element_shape ta._colocate_with = self._colocate_with return ta
def split(self, value, lengths, name=None): """See TensorArray.""" with ops.name_scope(name, "TensorArraySplit", [self._handle, value, lengths]): value = ops.convert_to_tensor(value, name="value") with self._maybe_colocate_with(value): lengths_64 = math_ops.to_int64(lengths) if self._infer_shape and context.in_graph_mode(): clengths = tensor_util.constant_value(lengths_64) if value.shape.dims is not None: if clengths is not None and clengths.max() == clengths.min(): self._merge_element_shape( tensor_shape.TensorShape([clengths[0]]).concatenate( value.shape[1:])) flow_out = gen_data_flow_ops._tensor_array_split_v3( handle=self._handle, value=value, lengths=lengths_64, flow_in=self._flow, name=name) ta = TensorArray( dtype=self._dtype, handle=self._handle, flow=flow_out, colocate_with_first_write_call=self._colocate_with_first_write_call) ta._infer_shape = self._infer_shape ta._element_shape = self._element_shape ta._colocate_with = self._colocate_with return ta
def _call_cell(self, inputs, initial_cell_state, initial_output, dtype, sequence_length): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len x batch_size x input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An int32 or int64 vector (tensor) size [batch_size], values in [0, time_len) or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len x batch_size x output_size]` - Output (h): A `3-D` tensor of shape `[time_len x batch_size x output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] input_size = inputs_shape[2].value w = vs.get_variable( "W_0", [input_size + self._num_units, self._num_units * 4], dtype=dtype) b = vs.get_variable( "B", [w.get_shape().with_rank(2)[1]], initializer=init_ops.constant_initializer(0.0), dtype=dtype) if self._use_peephole: wci = vs.get_variable("W_I_diag", [self._num_units], dtype=dtype) wco = vs.get_variable("W_O_diag", [self._num_units], dtype=dtype) wcf = vs.get_variable("W_F_diag", [self._num_units], dtype=dtype) else: wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = time_len else: max_seq_len = math_ops.to_int64(math_ops.reduce_max(sequence_length)) _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=w, wci=wci, wco=wco, wcf=wcf, b=b, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def generate_single_output(encoder_state, attention_states, sequence_length, targets, num_classes, buckets, use_mean_attention=False, softmax_loss_function=None, per_example_loss=False, name=None, use_attention=False): all_inputs = targets with ops.op_scope(all_inputs, name, "model_with_buckets"): with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=None): bucket_attention_states, bucket_attn_weights, bucket_attns, bucket_outputs = attention_single_output_decoder( encoder_state, attention_states, output_size=num_classes, num_heads=1, sequence_length=sequence_length, initial_state_attention=True, use_attention=use_attention) if softmax_loss_function is None: assert len(bucket_outputs) == len(targets) == 1 # We need to make target and int64-tensor and set its shape. bucket_target = array_ops.reshape(math_ops.to_int64(targets[0]), [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( logits=bucket_outputs[0], labels=bucket_target) else: assert len(bucket_outputs) == len(targets) == 1 crossent = softmax_loss_function(bucket_outputs[0], targets[0]) batch_size = array_ops.shape(targets[0])[0] loss = tf.reduce_sum(crossent) / math_ops.cast(batch_size, dtypes.float32) return bucket_outputs, loss
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) input_shape = tensor_shape.unknown_shape(ndims=input_seq[0].get_shape().ndims) for input_ in input_seq: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(input_seq) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r in result: r.set_shape(input_shape) return result
def crf_unary_score(tag_indices, sequence_lengths, inputs): """Computes the unary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. Returns: unary_scores: A [batch_size] vector of unary scores. """ batch_size = array_ops.shape(inputs)[0] max_seq_len = array_ops.shape(inputs)[1] num_tags = array_ops.shape(inputs)[2] flattened_inputs = array_ops.reshape(inputs, [-1]) offsets = array_ops.expand_dims( math_ops.range(batch_size) * max_seq_len * num_tags, 1) offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0) # Use int32 or int64 based on tag_indices' dtype. if tag_indices.dtype == dtypes.int64: offsets = math_ops.to_int64(offsets) flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1]) unary_scores = array_ops.reshape( array_ops.gather(flattened_inputs, flattened_tag_indices), [batch_size, max_seq_len]) masks = array_ops.sequence_mask(sequence_lengths, maxlen=array_ops.shape(tag_indices)[1], dtype=dtypes.float32) unary_scores = math_ops.reduce_sum(unary_scores * masks, 1) return unary_scores
def _process_labels(self, labels): if labels is None: raise ValueError( 'You must provide a labels Tensor. Given: None. ' 'Suggested troubleshooting steps: Check that your data contain ' 'your label feature. Check that your input_fn properly parses and ' 'returns labels.') if isinstance(labels, sparse_tensor.SparseTensor): if labels.dtype == dtypes.string: label_ids_values = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels.values) label_ids = sparse_tensor.SparseTensor( indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) else: label_ids = labels return math_ops.to_int64( sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) msg = ('labels shape must be [batch_size, {}]. ' 'Given: ').format(self._n_classes) labels_shape = array_ops.shape(labels) check_rank_op = control_flow_ops.Assert( math_ops.equal(array_ops.rank(labels), 2), data=[msg, labels_shape]) check_label_dim = control_flow_ops.Assert( math_ops.equal(labels_shape[-1], self._n_classes), data=[msg, labels_shape]) with ops.control_dependencies([check_rank_op, check_label_dim]): return array_ops.identity(labels)
def _select_last_activations(activations, sequence_lengths): """Selects the nth set of activations for each n in `sequence_length`. Returns a `Tensor` of shape `[batch_size, k]`. If `sequence_length` is not `None`, then `output[i, :] = activations[i, sequence_length[i] - 1, :]`. If `sequence_length` is `None`, then `output[i, :] = activations[i, -1, :]`. Args: activations: A `Tensor` with shape `[batch_size, padded_length, k]`. sequence_lengths: A `Tensor` with shape `[batch_size]` or `None`. Returns: A `Tensor` of shape `[batch_size, k]`. """ with ops.name_scope( 'select_last_activations', values=[activations, sequence_lengths]): activations_shape = array_ops.shape(activations) batch_size = activations_shape[0] padded_length = activations_shape[1] output_units = activations_shape[2] if sequence_lengths is None: sequence_lengths = padded_length start_indices = math_ops.to_int64( math_ops.range(batch_size) * padded_length) last_indices = start_indices + sequence_lengths - 1 reshaped_activations = array_ops.reshape( activations, [batch_size * padded_length, output_units]) last_activations = array_ops.gather(reshaped_activations, last_indices) last_activations.set_shape([activations.shape[0], activations.shape[2]]) return last_activations
def Forward(*args): """Forward pass of the recurrent net.""" theta, state0, inputs, max_input_length, extras = _Pack(args, forward_sig) slen_dim = _SeqLenDim(inputs) # Creates accumulators for state0 and extras. acc_state = _EmptyAcc(slen_dim, state0) acc_extras = _EmptyAcc(slen_dim, extras) t = slen_dim - max_input_length if self._aligned_end else 0 dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t) run = functional_ops.For( start=t, limit=slen_dim if self._aligned_end else max_input_length, delta=1, inputs=[dev_t] + _Flatten( [theta, state0, inputs, acc_state, acc_extras]), body=ForwardLoopBody, rewrite_with_while=compiled) _, state1, _, acc_state, acc_extras = _Pack( run[1:], [self._theta, self._state, self._inputs, self._state, self._extras]) return _Flatten([acc_state, state1, acc_extras])
def testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None classes = math_ops.to_int64(classes) # needed for Windows build. dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle( 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat() get_next = dataset.apply( resampling.rejection_resample( target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, seed=27)).make_one_shot_iterator().get_next() with self.cached_session() as sess: returned = [] while len(returned) < 4000: returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2)
def ndlstm_base_dynamic(inputs, noutput, scope=None, reverse=False): """Run an LSTM, either forward or backward. This is a 1D LSTM implementation using dynamic_rnn and the TensorFlow LSTM op. Args: inputs: input sequence (length, batch_size, ninput) noutput: depth of output scope: optional scope name reverse: run LSTM in reverse Returns: Output sequence (length, batch_size, noutput) """ with variable_scope.variable_scope(scope, "SeqLstm", [inputs]): # TODO(tmb) make batch size, sequence_length dynamic # example: sequence_length = tf.shape(inputs)[0] _, batch_size, _ = _shape(inputs) lstm_cell = core_rnn_cell_impl.BasicLSTMCell(noutput, state_is_tuple=False) state = array_ops.zeros([batch_size, lstm_cell.state_size]) sequence_length = int(inputs.get_shape()[0]) sequence_lengths = math_ops.to_int64( array_ops.fill([batch_size], sequence_length)) if reverse: inputs = array_ops.reverse_v2(inputs, [0]) outputs, _ = rnn.dynamic_rnn( lstm_cell, inputs, sequence_lengths, state, time_major=True) if reverse: outputs = array_ops.reverse_v2(outputs, [0]) return outputs
def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul): """Common code for SparseDenseCwise{Mul,Div} gradients.""" x_indices = op.inputs[0] x_shape = op.inputs[2] y = op.inputs[3] y_shape = math_ops.to_int64(array_ops.shape(y)) num_added_dims = array_ops.expand_dims( array_ops.size(x_shape) - array_ops.size(y_shape), 0) augmented_y_shape = array_ops.concat( [array_ops.ones(num_added_dims, ops.dtypes.int64), y_shape], 0) scaling = x_shape // augmented_y_shape scaled_indices = x_indices // scaling scaled_indices = array_ops.slice(scaled_indices, array_ops.concat([[0], num_added_dims], 0), [-1, -1]) dense_vals = array_ops.gather_nd(y, scaled_indices) if is_mul: dx = grad * dense_vals dy_val = grad * op.inputs[1] else: dx = grad / dense_vals dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals)) # indices can repeat after scaling, so we can't use sparse_to_dense(). dy = sparse_ops.sparse_add( array_ops.zeros_like(y), sparse_tensor.SparseTensor(scaled_indices, dy_val, y_shape)) # (sp_indices, sp_vals, sp_shape, dense) return (None, dx, None, dy)
def _process_labels(self, labels): if isinstance(labels, sparse_tensor.SparseTensor): if labels.dtype == dtypes.string: label_ids_values = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels.values) label_ids = sparse_tensor.SparseTensor( indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) else: label_ids = labels return math_ops.to_int64( sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) msg = ('labels shape must be [batch_size, {}]. ' 'Given: ').format(self._n_classes) labels_shape = array_ops.shape(labels) check_rank_op = control_flow_ops.Assert( math_ops.equal(array_ops.rank(labels), 2), data=[msg, labels_shape]) check_label_dim = control_flow_ops.Assert( math_ops.equal(labels_shape[-1], self._n_classes), data=[msg, labels_shape]) with ops.control_dependencies([check_rank_op, check_label_dim]): return array_ops.identity(labels)
def per_example_maxent_loss(labels, weights, logits, num_classes, eps=1e-15): """Maximum entropy loss for multiclass problems. Maximum entropy is a generalization of logistic loss for the case when more than 2 classes are present. Args: labels: Rank 2 (N, 1) or Rank 1 (N) tensor of per-example labels. weights: Rank 2 (N, 1) tensor of per-example weights. logits: Rank 2 (N, K) tensor of per-example predictions, K - num of classes. num_classes: number of classes in classification task. Used to expand label indices into one-hot encodings. eps: tolerance, used as a minimum possible value. Returns: loss: A Rank 2 (N, 1) tensor of per-example maxent loss update_op: An update operation to update the loss's internal state. """ labels = math_ops.to_int64(labels) # If labels are of rank 1, make them rank 2. labels_shape = labels.get_shape() if len(labels_shape) != 2: labels = array_ops.expand_dims(labels, 1) # Labels are indices of classes, convert them to one hot encodings. target_one_hot = array_ops.one_hot(indices=labels, depth=num_classes) labels = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) labels = math_ops.to_float(labels) # Calculate softmax probabilities for each class. unnormalized_probs = math_ops.exp(logits) normalizers = math_ops.reduce_sum(unnormalized_probs, 1, keepdims=True) softmax_predictions = math_ops.divide(unnormalized_probs, math_ops.add(normalizers, eps)) # Pull out the probabilities for real label. probs_for_real_class = math_ops.reduce_sum(labels * softmax_predictions, 1) # Add handling for values near 0 and 1. zeros = array_ops.zeros_like(probs_for_real_class, dtype=logits.dtype) + eps one_minus_eps = array_ops.ones_like( probs_for_real_class, dtype=logits.dtype) - eps # Take maximum(eps, pred) cond = (probs_for_real_class >= eps) probs_for_real_class = array_ops.where(cond, probs_for_real_class, zeros) # Take minimum(1-eps, pred) cond = (probs_for_real_class <= 1 - eps) probs_for_real_class = array_ops.where(cond, probs_for_real_class, one_minus_eps) unweighted_loss = array_ops.expand_dims(-math_ops.log(probs_for_real_class), 1) if weights is None: return unweighted_loss, control_flow_ops.no_op() else: return unweighted_loss * weights, control_flow_ops.no_op()
def _lengths_to_masks(lengths, max_length): """Creates a binary matrix that can be used to mask away padding. Args: lengths: A vector of integers representing lengths. max_length: An integer indicating the maximum length. All values in lengths should be less than max_length. Returns: masks: Masks that can be used to get rid of padding. """ tiled_ranges = array_ops.tile( array_ops.expand_dims(math_ops.range(max_length), 0), [array_ops.shape(lengths)[0], 1]) lengths = array_ops.expand_dims(lengths, 1) masks = math_ops.to_float( math_ops.to_int64(tiled_ranges) < math_ops.to_int64(lengths)) return masks
def to_dnn_input_layer(self, input_tensor, weight_collections=None, trainable=True): return array_ops.reshape( array_ops.one_hot( math_ops.to_int64(input_tensor), self.length, 1., 0.), [-1, self.length * self.source_column.dimension])
def _make_dense_split(quantile_accumulator_handle, stats_accumulator_handle, stamp_token, next_stamp_token, multiclass_strategy, class_id, feature_column_id, l1_regularization, l2_regularization, tree_complexity_regularization, min_node_weight, is_multi_dimentional, loss_uses_sum_reduction, weak_learner_type): """Function that builds splits for a dense feature column.""" # Get the bucket boundaries are_splits_ready, buckets = ( gen_quantile_ops.quantile_accumulator_get_buckets( quantile_accumulator_handles=[quantile_accumulator_handle], stamp_token=stamp_token)) # quantile_accumulator_get_buckets returns a list of results per handle that # we pass to it. In this case we're getting results just for one resource. are_splits_ready = are_splits_ready[0] buckets = buckets[0] # After we receive the boundaries from previous iteration we can flush # the quantile accumulator. with ops.control_dependencies([buckets]): flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( quantile_accumulator_handle=quantile_accumulator_handle, stamp_token=stamp_token, next_stamp_token=next_stamp_token) if is_multi_dimentional: num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( gen_stats_accumulator_ops.stats_accumulator_tensor_flush( stats_accumulator_handle, stamp_token, next_stamp_token)) else: num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( gen_stats_accumulator_ops.stats_accumulator_scalar_flush( stats_accumulator_handle, stamp_token, next_stamp_token)) # For sum_reduction, we don't need to divide by number of minibatches. num_minibatches = control_flow_ops.cond(loss_uses_sum_reduction, lambda: math_ops.to_int64(1), lambda: num_minibatches) # Put quantile and stats accumulator flushing in the dependency path. with ops.control_dependencies([flush_quantiles, partition_ids]): are_splits_ready = array_ops.identity(are_splits_ready) partition_ids, gains, split_infos = ( split_handler_ops.build_dense_inequality_splits( num_minibatches=num_minibatches, bucket_boundaries=buckets, partition_ids=partition_ids, bucket_ids=bucket_ids, gradients=gradients, hessians=hessians, class_id=class_id, feature_column_group_id=feature_column_id, l1_regularization=l1_regularization, l2_regularization=l2_regularization, tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, multiclass_strategy=multiclass_strategy, weak_learner_type=weak_learner_type)) return are_splits_ready, partition_ids, gains, split_infos
def shortlist_insert(): larger_ids = array_ops.boolean_mask( math_ops.to_int64(ids), larger_scores) larger_score_values = array_ops.boolean_mask(scores, larger_scores) shortlist_ids, new_ids, new_scores = tensor_forest_ops.top_n_insert( self.sl_ids, self.sl_scores, larger_ids, larger_score_values) u1 = state_ops.scatter_update(self.sl_ids, shortlist_ids, new_ids) u2 = state_ops.scatter_update(self.sl_scores, shortlist_ids, new_scores) return control_flow_ops.group(u1, u2)
def _SparseReduceSumGrad(op, out_grad): """Similar to gradient for the Sum Op (i.e. tf.reduce_sum()).""" sp_indices = op.inputs[0] sp_shape = op.inputs[2] output_shape_kept_dims = math_ops.reduced_shape(sp_shape, op.inputs[3]) out_grad_reshaped = array_ops.reshape(out_grad, output_shape_kept_dims) scale = sp_shape // math_ops.to_int64(output_shape_kept_dims) # (sparse_indices, sparse_values, sparse_shape, reduction_axes) return (None, array_ops.gather_nd(out_grad_reshaped, sp_indices // scale), None, None)
def func_body_augmented_pam(iteration, chosen_ids): """Call the update_medoid_per_cluster subroutine.""" mask = math_ops.equal( math_ops.to_int64(predictions), math_ops.to_int64(iteration)) this_cluster_ids = array_ops.where(mask) pairwise_distances_subset = array_ops.transpose( array_ops.gather( array_ops.transpose( array_ops.gather(pairwise_distances, this_cluster_ids)), this_cluster_ids)) chosen_ids = update_medoid_per_cluster(pairwise_distances, pairwise_distances_subset, labels, chosen_ids, this_cluster_ids, iteration, margin_multiplier, margin_type) return iteration + 1, chosen_ids
def _logits_to_prediction(self, logits=None): predictions = {PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PredictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PredictionKey.PROBABILITIES] = math_ops.sigmoid(logits) predictions[PredictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def _dense_to_sparse_tensor(dense_tensor): """Returns a SparseTensor for the input dense_tensor.""" ignore_value = 0.0 sparse_indices = array_ops.where(math_ops.not_equal( dense_tensor, math_ops.cast(ignore_value, dense_tensor.dtype))) sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices) # SparseTensor needs the shape to be converted to int64. int64_shape = math_ops.to_int64(array_ops.shape(dense_tensor)) return ops.SparseTensor(sparse_indices, sparse_values, shape=int64_shape)
def _my_metric_op(predictions, targets): """Simply adds the predictions and targets.""" return tf.add(math_ops.to_int64(predictions), targets)
def __init__(self, is_training, config, num_steps, model_name, flag_with_saver=False, model_root='./cache/models/mscoco', flag_reset_state=False): # Set up paths and dirs self.cu = CommonUtiler() self.model_dir = os.path.join(model_root, model_name) self.variable_dir = os.path.join(self.model_dir, 'variables') self.cu.create_dir_if_not_exists(self.model_dir) self.cu.create_dir_if_not_exists(self.variable_dir) self.batch_size = batch_size = config.batch_size self.num_steps = num_steps rnn_size = config.rnn_size emb_size = config.emb_size vocab_size = config.vocab_size vf_size = config.vf_size seq_len = config.seq_len hidden_img = config.hidden_img # Inputs to the model self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) #take 3 d visual features - batch_size,sequence length, input_size #self._visual_features = tf.placeholder(tf.float32, [batch_size, vf_size]) self.input_vf = tf.placeholder(tf.float32, [batch_size, seq_len, vf_size]) #self.input_vf = tf.placeholder(tf.float32, [batch_size, vf_size]) self._valid_flags = tf.placeholder(tf.float32, [batch_size, num_steps]) self._seq_lens = tf.placeholder(tf.int32, [batch_size]) # Create rnn cell if config.rnn_type == 'GRU': rnn_cell_basic = tf.nn.rnn_cell.GRUCell(rnn_size) elif config.rnn_type == 'LSTM': rnn_cell_basic = tf.nn.rnn_cell.LSTMCell(rnn_size, input_size=emb_size, use_peepholes=True) else: raise NameError("Unknown rnn type %s!" % config.rnn_type) if is_training and config.keep_prob_rnn < 1: rnn_cell_basic = tf.nn.rnn_cell.DropoutWrapper( rnn_cell_basic, output_keep_prob=config.keep_prob_rnn) cell = tf.nn.rnn_cell.MultiRNNCell([rnn_cell_basic] * config.num_rnn_layers) state_size = cell.state_size # Create word embeddings self._embedding = embedding = tf.get_variable("embedding", [vocab_size, emb_size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob_emb < 1: inputs = tf.nn.dropout(inputs, config.keep_prob_emb) # Different ways to fuze text and visual information if config.multimodal_type == 'mrnn': mm_size = config.mm_size # Run RNNs if flag_reset_state: self._initial_state = initial_state = tf.placeholder(tf.float32, [batch_size, state_size]) else: self._initial_state = initial_state = cell.zero_state( batch_size, tf.float32) inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)] outputs_rnn, state = tf.nn.rnn(cell, inputs, initial_state=initial_state, sequence_length=self._seq_lens) self._final_state = state output_rnn = tf.reshape(tf.concat(1, outputs_rnn), [-1, rnn_size]) # Map RNN output to multimodal space w_r2m = tf.get_variable("w_r2m", [rnn_size, mm_size]) b_r2m = tf.get_variable("b_r2m", [mm_size]) multimodal_l = tf.nn.relu(tf.matmul(output_rnn, w_r2m) + b_r2m) # Map Visual feature to multimodal space #--------------------------------------------------------------- # LSTM ADDED HERE LSTM ADDED HERE. with tf.variable_scope('scope_limited'): cell_img = tf.nn.rnn_cell.LSTMCell(hidden_img,state_is_tuple=True) val_img, state_img = tf.nn.dynamic_rnn(cell_img, self.input_vf, dtype=tf.float32) val_img = tf.transpose(val_img, [1, 0, 2]) last_img = tf.gather(val_img, int(val_img.get_shape()[0]) - 1) self._visual_features = last_img #self._visual_features = tf.constant(np.random.rand(batch_size,vf_size), shape=[batch_size, vf_size], dtype="float32") #self._visual_features = self.input_vf #---------------------------------------------------------------- w_vf2m = tf.get_variable("w_vf2m", [hidden_img, mm_size]) b_vf2m = tf.get_variable("b_vf2m", [mm_size]) mm_vf_single = tf.nn.relu( tf.matmul(self._visual_features, w_vf2m) + b_vf2m) mm_vf = tf.reshape(tf.tile(mm_vf_single, [1, num_steps]), [-1, mm_size]) multimodal_l = multimodal_l + mm_vf if is_training and config.keep_prob_mm < 1: multimodal_l = tf.nn.dropout(multimodal_l, config.keep_prob_mm) # Map multimodal space to word space w_m2w = tf.get_variable("w_m2w", [mm_size, emb_size]) b_m2w = tf.get_variable("b_m2w", [emb_size]) output = tf.nn.relu(tf.matmul(multimodal_l, w_m2w) + b_m2w) elif config.multimodal_type == 'init': # Mapping visual feature to the RNN state w_vf2state = tf.get_variable("w_vf2state", [vf_size, state_size]) b_vf2state = tf.get_variable("b_vf2state", [state_size]) if flag_reset_state: self._initial_state = initial_state = tf.placeholder(tf.float32, [batch_size, state_size]) else: self._initial_state = initial_state = tf.nn.relu( tf.matmul(self._visual_features, w_vf2state) + b_vf2state) # Run RNNs inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)] outputs_rnn, state = tf.nn.rnn(cell, inputs, initial_state=initial_state, sequence_length=self._seq_lens) self._final_state = state output_rnn = tf.reshape(tf.concat(1, outputs_rnn), [-1, rnn_size]) # Map multimodal space to word space w_m2w = tf.get_variable("w_m2w", [rnn_size, emb_size]) b_m2w = tf.get_variable("b_m2w", [emb_size]) output = tf.nn.relu(tf.matmul(output_rnn, w_m2w) + b_m2w) else: raise NameError("Unknown multimodal type %s!" % config.multimodal_type) # Build sampled softmax loss # share the weights between embedding and softmax acc. to [2] w_loss = tf.transpose(embedding) b_loss = tf.get_variable("b_loss", [vocab_size]) self._logit = logit = tf.matmul(output, w_loss) + b_loss target = tf.reshape(math_ops.to_int64(self._targets), [-1]) valid_flag = tf.reshape(self._valid_flags, [-1]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logit, target) self._cost = cost = tf.reduce_sum(loss * valid_flag) / ( tf.reduce_sum(valid_flag) + 1e-12) # Create saver if necessary if flag_with_saver: self.saver = tf.train.Saver(max_to_keep=None) else: self.saver = None # Return the model if it is just for inference if not is_training: return # Create learning rate and gradients optimizer self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) if hasattr(config, 'optimizer'): if config.optimizer == 'ori': optimizer = tf.train.GradientDescentOptimizer(self.lr) elif config.optimizer == 'ada': # No GPU optimizer = tf.train.AdagradOptimizer(self.lr) elif config.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(self.lr) elif config.optimizer == 'rms': optimizer = tf.train.RMSPropOptimizer(self.lr) else: raise NameError("Unknown optimizer type %s!" % config.optimizer) else: optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def _call_cell(self, inputs, initial_cell_state, initial_output, dtype, sequence_length): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len)` or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size, output_size]` - Output (h): A `3-D` tensor of shape `[time_len, batch_size, output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] input_size = inputs_shape[2].value w = vs.get_variable( "weights", [input_size + self._num_units, self._num_units * 4], dtype=dtype) b = vs.get_variable("biases", [w.get_shape().with_rank(2)[1]], initializer=init_ops.constant_initializer(0.0), dtype=dtype) if self._use_peephole: wci = vs.get_variable("w_i_diag", [self._num_units], dtype=dtype) wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype) wcf = vs.get_variable("w_f_diag", [self._num_units], dtype=dtype) else: wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = time_len else: max_seq_len = math_ops.to_int64( math_ops.reduce_max(sequence_length)) _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=w, wci=wci, wco=wco, wcf=wcf, b=b, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def _call_cell(self, inputs, initial_cell_state=None, initial_output=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len)` or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size, output_size]` - Output (h): A `3-D` tensor of shape `[time_len, batch_size, output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] if self._use_peephole: wci = self._w_i_diag wco = self._w_o_diag wcf = self._w_f_diag else: wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = math_ops.to_int64(time_len) else: max_seq_len = math_ops.to_int64( math_ops.reduce_max(sequence_length)) print(" Xsmm LSTM Fused Cell: dropout = %.3f, Resudue = %s" % (self._dropout, self._residual_connection)) orig_inputs = inputs if self._dropout > 0.0: inputs = tf.nn.dropout(inputs, 1 - self._dropout) ''' _, cs, _, _, _, _, h = gen_lstm_ops.block_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=self._kernel, wci=wci, wcf=wcf, wco=wco, b=self._bias, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) ''' _, cs, _, _, _, _, h = xsmm_lstm.xsmm_fused_lstm( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=self._kernel, wci=wci, wcf=wcf, wco=wco, b=self._bias, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole, use_residue=False, use_dropout=False) if self._residual_connection: with tf.name_scope("fused_residual_connection"): h = h + orig_inputs return cs, h
def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores, tp_tensor, fp_tensor, remove_zero_labels=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of precision / recall arrays. This metrics keeps tracks of boolean True positives and False positives arrays. """ with variable_scope.variable_scope(name, 'stream_precision_recall', [n_gbboxes, rclasses, tp_tensor, fp_tensor]): n_gbboxes = math_ops.to_int64(n_gbboxes) rclasses = math_ops.to_int64(rclasses) rscores = math_ops.to_float(rscores) stype = tf.int32 tp_tensor = tf.cast(tp_tensor, stype) fp_tensor = tf.cast(fp_tensor, stype) # Reshape TP and FP tensors and clean away 0 class values. rclasses = tf.reshape(rclasses, [-1]) rscores = tf.reshape(rscores, [-1]) tp_tensor = tf.reshape(tp_tensor, [-1]) fp_tensor = tf.reshape(fp_tensor, [-1]) if remove_zero_labels: mask = tf.greater(rclasses, 0) rclasses = tf.boolean_mask(rclasses, mask) rscores = tf.boolean_mask(rscores, mask) tp_tensor = tf.boolean_mask(tp_tensor, mask) fp_tensor = tf.boolean_mask(fp_tensor, mask) # Local variables accumlating information over batches. v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64) v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32) v_scores = _create_local('v_scores', shape=[0, ]) v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) # Update operations. nobjects_op = state_ops.assign_add(v_nobjects, tf.reduce_sum(n_gbboxes)) ndetections_op = state_ops.assign_add(v_ndetections, tf.size(rscores, out_type=tf.int32)) scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0), validate_shape=False) tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0), validate_shape=False) fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0), validate_shape=False) # Precision and recall computations. # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value') r = _precision_recall(v_nobjects, v_ndetections, v_scores, v_tp, v_fp, 'value') with ops.control_dependencies([nobjects_op, ndetections_op, scores_op, tp_op, fp_op]): update_op = _precision_recall(nobjects_op, ndetections_op, scores_op, tp_op, fp_op, 'update_op') # update_op = tf.Print(update_op, # [tf.reduce_sum(tf.cast(mask, tf.int64)), # tf.reduce_sum(tf.cast(mask2, tf.int64)), # tf.reduce_min(rscores), # tf.reduce_sum(n_gbboxes)], # 'Metric: ') # Some debugging stuff! # update_op = tf.Print(update_op, # [tf.shape(tp_op), # tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)], # 'TP and FP shape: ') # update_op[0] = tf.Print(update_op, # [nobjects_op], # '# Groundtruth bboxes: ') # update_op = tf.Print(update_op, # [update_op[0][0], # update_op[0][-1], # tf.reduce_min(update_op[0]), # tf.reduce_max(update_op[0]), # tf.reduce_min(update_op[1]), # tf.reduce_max(update_op[1])], # 'Precision and recall :') if metrics_collections: ops.add_to_collections(metrics_collections, r) if updates_collections: ops.add_to_collections(updates_collections, update_op) return r, update_op
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot(indices=array_ops.fill( [batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=dtypes.int64) add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished)) lengths_to_add *= array_ops.expand_dims(add_mask, 2) new_prediction_lengths = (lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) # Calculate the scores for each beam scores = _get_scores(log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) scores_flat = array_ops.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function next_beam_size = ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width") next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper(gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged. # 2. Beams that are now finished (EOS predicted) have their length # increased by 1. # 3. Beams that are not yet finished have their length increased by 1. lengths_to_add = math_ops.to_int64( math_ops.logical_not(previously_finished)) next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState(cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput(scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def _assert_integer_form(x): """Check x for integer components (or floats that are equal to integers).""" x = ops.convert_to_tensor(x, name='x') casted_x = math_ops.to_int64(x) return check_ops.assert_equal(x, math_ops.cast( math_ops.round(casted_x), x.dtype))
def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0, name=None, hash_key=None): """Crosses a list of Tensor or SparseTensor objects. See sparse_feature_cross_kernel.cc for more details. Args: inputs: List of `SparseTensor` or `Tensor` to be crossed. hashed_output: If true, returns the hash of the cross instead of the string. This will allow us avoiding string manipulations. num_buckets: It is used if hashed_output is true. output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. name: A name prefix for the returned tensors (optional). hash_key: Specify the hash_key that will be used by the `FingerprintCat64` function to combine the crosses fingerprints on SparseFeatureCrossOp. The default value is None, but will become SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY after 2016-11-20 (optional). Returns: A `SparseTensor` with the crossed features. Return type is string if hashed_output=False, int64 otherwise. Raises: TypeError: If the inputs aren't either SparseTensor or Tensor. """ if not isinstance(inputs, list): raise TypeError("Inputs must be a list") if not all( isinstance(i, sparse_tensor.SparseTensor) or isinstance(i, ops.Tensor) for i in inputs): raise TypeError("All inputs must be SparseTensors") sparse_inputs = [ i for i in inputs if isinstance(i, sparse_tensor.SparseTensor) ] dense_inputs = [ i for i in inputs if not isinstance(i, sparse_tensor.SparseTensor) ] indices = [sp_input.indices for sp_input in sparse_inputs] values = [sp_input.values for sp_input in sparse_inputs] shapes = [sp_input.dense_shape for sp_input in sparse_inputs] out_type = dtypes.int64 if hashed_output else dtypes.string internal_type = dtypes.string for i in range(len(values)): if values[i].dtype != dtypes.string: values[i] = math_ops.to_int64(values[i]) internal_type = dtypes.int64 for i in range(len(dense_inputs)): if dense_inputs[i].dtype != dtypes.string: dense_inputs[i] = math_ops.to_int64(dense_inputs[i]) internal_type = dtypes.int64 if hash_key: indices_out, values_out, shape_out = ( _sparse_feature_cross_op.sparse_feature_cross_v2( indices, values, shapes, dense_inputs, hashed_output, num_buckets, hash_key=hash_key, out_type=out_type, internal_type=internal_type, name=name)) else: indices_out, values_out, shape_out = ( _sparse_feature_cross_op.sparse_feature_cross( indices, values, shapes, dense_inputs, hashed_output, num_buckets, out_type=out_type, internal_type=internal_type, name=name)) return sparse_tensor.SparseTensor(indices_out, values_out, shape_out)
def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32, name=None, weights=None): """Computes the confusion matrix from predictions and labels. Calculate the Confusion Matrix for a pair of prediction and label 1-D int arrays. The matrix columns represent the prediction labels and the rows represent the real labels. The confusion matrix is always a 2-D array of shape `[n, n]`, where `n` is the number of valid labels for a given classification task. Both prediction and labels must be 1-D arrays of the same shape in order for this function to work. If `num_classes` is None, then `num_classes` will be set to the one plus the maximum value in either predictions or labels. Class labels are expected to start at 0. E.g., if `num_classes` was three, then the possible labels would be `[0, 1, 2]`. If `weights` is not `None`, then each prediction contributes its corresponding weight to the total value of the confusion matrix cell. For example: ```python tf.confusion_matrix([1, 2, 4], [2, 2, 4]) ==> [[0 0 0 0 0] [0 0 1 0 0] [0 0 1 0 0] [0 0 0 0 0] [0 0 0 0 1]] ``` Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`, resulting in a 5x5 confusion matrix. Args: labels: 1-D `Tensor` of real labels for the classification task. predictions: 1-D `Tensor` of predictions for a given classification. num_classes: The possible number of labels the classification task can have. If this value is not provided, it will be calculated using both predictions and labels array. dtype: Data type of the confusion matrix. name: Scope name. weights: An optional `Tensor` whose shape matches `predictions`. Returns: A k X k matrix representing the confusion matrix, where k is the number of possible labels in the classification task. Raises: ValueError: If both predictions and labels are not 1-D vectors and have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`. """ with ops.name_scope(name, 'confusion_matrix', (predictions, labels, num_classes, weights)) as name: labels, predictions = remove_squeezable_dimensions( ops.convert_to_tensor(labels, name='labels'), ops.convert_to_tensor(predictions, name='predictions')) predictions = math_ops.cast(predictions, dtypes.int64) labels = math_ops.cast(labels, dtypes.int64) # Sanity checks - underflow or overflow can cause memory corruption. labels = control_flow_ops.with_dependencies([ check_ops.assert_non_negative( labels, message='`labels` contains negative values') ], labels) predictions = control_flow_ops.with_dependencies([ check_ops.assert_non_negative( predictions, message='`predictions` contains negative values') ], predictions) if num_classes is None: num_classes = math_ops.maximum(math_ops.reduce_max(predictions), math_ops.reduce_max(labels)) + 1 else: num_classes_int64 = math_ops.cast(num_classes, dtypes.int64) labels = control_flow_ops.with_dependencies([ check_ops.assert_less( labels, num_classes_int64, message='`labels` out of bound') ], labels) predictions = control_flow_ops.with_dependencies([ check_ops.assert_less(predictions, num_classes_int64, message='`predictions` out of bound') ], predictions) if weights is not None: predictions.get_shape().assert_is_compatible_with( weights.get_shape()) weights = math_ops.cast(weights, dtype) shape = array_ops.stack([num_classes, num_classes]) indices = array_ops.transpose(array_ops.stack([labels, predictions])) values = (array_ops.ones_like(predictions, dtype) if weights is None else weights) cm_sparse = sparse_tensor.SparseTensor( indices=indices, values=values, dense_shape=math_ops.to_int64(shape)) zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) return sparse_ops.sparse_add(zero_matrix, cm_sparse)
def index_table_from_tensor(vocabulary_list, num_oov_buckets=0, default_value=-1, hasher_spec=FastHashSpec, dtype=dtypes.string, name=None): """Returns a lookup table that converts a string tensor into int64 IDs. This operation constructs a lookup table to convert tensor of strings into int64 IDs. The mapping can be initialized from a string `vocabulary_list` 1-D tensor where each element is a key and corresponding index within the tensor is the value. Any lookup of an out-of-vocabulary token will return a bucket ID based on its hash if `num_oov_buckets` is greater than zero. Otherwise it is assigned the `default_value`. The bucket ID range is `[vocabulary list size, vocabulary list size + num_oov_buckets - 1]`. The underlying table must be initialized by calling `tf.tables_initializer.run()` or `table.init.run()` once. Elements in `vocabulary_list` cannot have duplicates, otherwise when executing the table initializer op, it will throw a `FailedPreconditionError`. Sample Usages: ```python vocabulary_list = tf.constant(["emerson", "lake", "palmer"]) table = tf.lookup.index_table_from_tensor( vocabulary_list=vocabulary_list, num_oov_buckets=1, default_value=-1) features = tf.constant(["emerson", "lake", "and", "palmer"]) ids = table.lookup(features) ... tf.tables_initializer().run() ids.eval() ==> [0, 1, 4, 2] ``` Args: vocabulary_list: A 1-D `Tensor` that specifies the mapping of keys to indices. The type of this object must be castable to `dtype`. num_oov_buckets: The number of out-of-vocabulary buckets. default_value: The value to use for out-of-vocabulary feature values. Defaults to -1. hasher_spec: A `HasherSpec` to specify the hash function to use for assignment of out-of-vocabulary buckets. dtype: The type of values passed to `lookup`. Only string and integers are supported. name: A name for this op (optional). Returns: The lookup table to map an input `Tensor` to index `int64` `Tensor`. Raises: ValueError: If `vocabulary_list` is invalid. ValueError: If `num_oov_buckets` is negative. """ if vocabulary_list is None: raise ValueError("vocabulary_list must be specified.") if num_oov_buckets < 0: raise ValueError("num_oov_buckets must be greater or equal than 0, got %d." % num_oov_buckets) if (not dtype.is_integer) and (dtypes.string != dtype.base_dtype): raise TypeError("Only integer and string keys are supported.") with ops.name_scope(name, "string_to_index") as feat_to_id_scope: keys = ops.convert_to_tensor(vocabulary_list) if keys.dtype.is_integer != dtype.is_integer: raise ValueError("Expected %s, got %s." % ("integer" if dtype.is_integer else "non-integer", keys.dtype)) if (not dtype.is_integer) and (keys.dtype.base_dtype != dtype): raise ValueError("Expected %s, got %s." % (dtype, keys.dtype)) num_elements = array_ops.size(keys) values = math_ops.to_int64(math_ops.range(num_elements)) shared_name = "" with ops.name_scope(None, "hash_table") as hash_table_scope: if context.executing_eagerly(): # Ensure a unique name when eager execution is enabled to avoid spurious # sharing issues. shared_name += str(ops.uid()) table_keys = math_ops.to_int64(keys) if keys.dtype.is_integer else keys init = KeyValueTensorInitializer( table_keys, values, table_keys.dtype.base_dtype, dtypes.int64, name="table_init") table = HashTable( init, default_value, shared_name=shared_name, name=hash_table_scope) if num_oov_buckets: table = IdTableWithHashBuckets( table, num_oov_buckets=num_oov_buckets, hasher_spec=hasher_spec, name=feat_to_id_scope, key_dtype=dtype) return table
def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores, remove_zero_scores=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of True and False Positive arrays. This metrics also keeps track of scores and number of grountruth objects. """ # Input dictionaries: dict outputs as streaming metrics. if isinstance(scores, dict) or isinstance(fp, dict): d_values = {} d_update_ops = {} for c in num_gbboxes.keys(): scope = 'streaming_tp_fp_%s' % c v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c], remove_zero_scores, metrics_collections, updates_collections, name=scope) d_values[c] = v d_update_ops[c] = up return d_values, d_update_ops # Input Tensors... with variable_scope.variable_scope(name, 'streaming_tp_fp', [num_gbboxes, tp, fp, scores]): num_gbboxes = math_ops.to_int64(num_gbboxes) scores = math_ops.to_float(scores) stype = tf.bool tp = tf.cast(tp, stype) fp = tf.cast(fp, stype) # Reshape TP and FP tensors and clean away 0 class values. scores = tf.reshape(scores, [-1]) tp = tf.reshape(tp, [-1]) fp = tf.reshape(fp, [-1]) # Remove TP and FP both false. mask = tf.logical_or(tp, fp) if remove_zero_scores: rm_threshold = 1e-4 mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) scores = tf.boolean_mask(scores, mask) tp = tf.boolean_mask(tp, mask) fp = tf.boolean_mask(fp, mask) # Local variables accumlating information over batches. v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64) v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32) v_scores = _create_local('v_scores', shape=[ 0, ]) v_tp = _create_local('v_tp', shape=[ 0, ], dtype=stype) v_fp = _create_local('v_fp', shape=[ 0, ], dtype=stype) # Update operations. nobjects_op = state_ops.assign_add(v_nobjects, tf.reduce_sum(num_gbboxes)) ndetections_op = state_ops.assign_add( v_ndetections, tf.size(scores, out_type=tf.int32)) scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0), validate_shape=False) tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0), validate_shape=False) fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0), validate_shape=False) # Value and update ops. val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores) with ops.control_dependencies( [nobjects_op, ndetections_op, scores_op, tp_op, fp_op]): update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op) if metrics_collections: ops.add_to_collections(metrics_collections, val) if updates_collections: ops.add_to_collections(updates_collections, update_op) return val, update_op
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) #step_log_probs",Tensor shape=(?, 10, 56136) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) #step_log_probs_masked (?, 10, 56136) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs #total_probs (?, 10, 56136) # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.tile( array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]), depth=vocab_size, on_value=constant_op.constant(0, dtype=dtypes.int64), off_value=constant_op.constant(1, dtype=dtypes.int64), dtype=dtypes.int64) #lengths_to_add shape=(?, 10, 56136) add_mask = (1 - math_ops.to_int64(previously_finished)) #add_mask shape=(?, 10), dtype=int64 lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add #lengths_to_add shape=(?, 10, 56136) new_prediction_lengths = ( lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) #new_prediction_lengths shape=(?, 10, 56136) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) scores_mask = tf.constant([step_log_probs.dtype.min,0],dtype=dtypes.float32,shape=[vocab_size],name='mask') scores_masked =tf.add(scores,scores_mask) scores_mask2 = tf.constant([0,0,0,0,0,step_log_probs.dtype.min,0],dtype=dtypes.float32,shape=[vocab_size],name='mask2') scores_masked =tf.add(scores_mask2,scores_masked) def new_scores(scores_masked): scores_no_stop = tf.constant([0,0,step_log_probs.dtype.min,0],dtype=dtypes.float32,shape=[vocab_size],name='no_stop') scores=tf.add(scores_masked,scores_no_stop) return scores #constrain the length scores = control_flow_ops.cond( #time <9 , time <0, lambda: new_scores(scores_masked), lambda: scores_masked) #scores shape=(?, 10, 56136) #[batch_size, beam_width, vocab_size] time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) #scores_shape" shape=(3,) scores_to_flat_1 =array_ops.reshape(scores, [batch_size,2, -1]) print ("scores_to_flat_1",scores_to_flat_1) scores_to_0 = scores[:, 0] scores_to_1 = scores[:, -1] scores_to_flat_2=tf.concat([scores_to_0,scores_to_1],1) scores_flat = control_flow_ops.cond( time > 0, lambda: scores_to_flat_1, lambda: array_ops.reshape(scores_to_flat_2, [batch_size,2, -1])) num_available_beam = control_flow_ops.cond( time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]), lambda: math_ops.reduce_prod(scores_shape[2:])) #scores_flat", shape=(?, ?) #num_available_beam" shape=() # Pick the next beams according to the specified successors function next_beam_size = math_ops.minimum( ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"), num_available_beam) #scores_t = tf.reshape(scores_flat,[batch_size,2,-1]) ############################ #input_words=['entrencheds01', 'entrencheds02', 'forgev01', 'forgev04', \ # 'hitn02', 'hitn03', 'vaultn02', 'vaultn04', 'deepa03', \ # 'deeps02', 'admitv01', 'admitv02', 'plantn01', 'plantn02',\ # 'squaren01', 'squaren05', 'drawv05', 'drawv06', 'spellv03', \ # 'spellv02', 'shotn02', 'shotn04', 'coachv01', 'coachv02', 'casen05',\ # 'casen09', 'focusn01', 'focusn02', 'tasten01', 'tasten04', 'footn01', \ # 'footv01'] input_words=get_words() return_list=prior_scores(input_words) return_array=np.array(return_list) return_tensor=tf.convert_to_tensor(return_array) tiling = [1, 5, 1] prior_mask=tf.tile(tf.expand_dims(return_tensor, 1), tiling) prior_mask=tf.cast(prior_mask, tf.float32) prior_mask=array_ops.reshape(prior_mask, [batch_size, -1]) #print ("prior_mask",prior_mask) scores_sum= tf.reduce_sum(scores_to_flat_1,1) #print ("scores_sum_1",scores_sum) #def cal_scores_sum(scores_sum,prior_mask): # return tf.add(scores_sum,prior_mask) #scores_sum = control_flow_ops.cond( # time > 0, # lambda: cal_scores_sum(scores_sum,prior_mask), # lambda: scores_sum) #scores_sum=tf.add(scores_sum,prior_mask) #print ("scores_sum_2",scores_sum) ############################ #scores_final=tf.concat([scores_sum, scores_sum],1) def cal_scores_indices(scores_to_0,scores_to_1): next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_to_0, k=5) print ("ori next_beam_scores_1,word_indices_1",next_beam_scores_1) print ("ori word_indices_1",word_indices_1) next_beam_scores_2, word_indices_2 = nn_ops.top_k(scores_to_1, k=5) next_beam_scores=tf.concat([next_beam_scores_1,next_beam_scores_2],1) word_indices=tf.concat([word_indices_1,word_indices_2+9*vocab_size],1) return next_beam_scores,word_indices def cal_scores_indices_t1(scores_final,next_beam_size): next_beam_scores_1, word_indices_1=nn_ops.top_k(scores_final, k=5) #next_beam_scores_1, word_indices_1=sample(next_beam_scores_1,word_indices_1) print ("next_beam_scores_1", next_beam_scores_1) print ("word_indices_1",word_indices_1) next_beam_scores=tf.concat([next_beam_scores_1,next_beam_scores_1],1) word_indices=tf.concat([word_indices_1,word_indices_1+5*vocab_size],1) return next_beam_scores, word_indices next_beam_scores, word_indices=control_flow_ops.cond( time > 0, lambda: cal_scores_indices_t1(scores_sum,next_beam_size), lambda: cal_scores_indices(scores_to_0,scores_to_1)) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) #shape=(?, ?) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") #raw_next_word_ids shape=(?, 10) next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int64( math_ops.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) print ('next_beam_probs',next_beam_probs) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def _call_cell(self, inputs, initial_cell_state=None, initial_output=None, dtype=None, sequence_length=None): """Run this LSTM on inputs, starting from the given state. Args: inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` initial_cell_state: initial value for cell state, shape `[batch_size, self._num_units]` initial_output: initial value of cell output, shape `[batch_size, self._num_units]` dtype: The data type for the initial state and expected output. sequence_length: Specifies the length of each sequence in inputs. An `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0, time_len)` or None. Returns: A pair containing: - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size, output_size]` - Output (h): A `3-D` tensor of shape `[time_len, batch_size, output_size]` """ inputs_shape = inputs.get_shape().with_rank(3) time_len = inputs_shape.dims[0].value if time_len is None: time_len = array_ops.shape(inputs)[0] if self._use_peephole: wci = self._w_i_diag wco = self._w_o_diag wcf = self._w_f_diag else: wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = math_ops.to_int64(time_len) else: max_seq_len = math_ops.to_int64( math_ops.reduce_max(sequence_length)) _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm_fused_our( seq_len_max=max_seq_len, x=inputs, cs_prev=initial_cell_state, h_prev=initial_output, w=self._kernel, wci=wci, wcf=wcf, wco=wco, b=self._bias, group_size_attr=self._group_size, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) return cs, h
def _dnn_sampled_softmax_classifier_model_fn(features, targets, mode, params): """model_fn that uses candidate sampling. Args: features: Single Tensor or dict of Tensor (depends on data passed to `fit`) targets: A single Tensor of shape [batch_size, n_labels] containing the target indices. mode: Represents if this training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters that are listed below. hidden_units- List of hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns- An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. n_classes- number of target classes. It must be greater than 2. n_samples- number of sample target classes. Needs to be tuned - A good starting point could be 2% of n_classes. n_labels- number of labels in each example. top_k- The number of classes to predict. optimizer- An instance of `tf.Optimizer` used to train the model. If `None`, will use an Adagrad optimizer. dropout- When not `None`, the probability we will drop out a given coordinate. gradient_clip_norm- A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. num_ps_replicas- The number of parameter server replicas. Returns: predictions: A single Tensor or a dict of Tensors. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] n_samples = params["n_samples"] n_labels = params["n_labels"] top_k = params["top_k"] optimizer = params["optimizer"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] num_ps_replicas = params["num_ps_replicas"] parent_scope = "dnn_ss" features = _get_feature_dict(features) targets = _reshape_targets(targets) # Setup the input layer partitioner. input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Create the input layer. with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, feature_columns, weight_collections=[parent_scope], scope=scope) # Setup the hidden layer partitioner. hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) final_hidden_layer_dim = None # Create hidden layers using fully_connected. for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, [net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, variables_collections=[parent_scope], scope=scope) final_hidden_layer_dim = num_hidden_units # Add dropout if it is enabled. if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) # Create the weights and biases for the logit layer. with variable_scope.variable_scope( parent_scope + "/logits", [net], partitioner=hidden_layer_partitioner) as scope: dtype = net.dtype.base_dtype weights_shape = [n_classes, final_hidden_layer_dim] weights = variables.model_variable( "weights", shape=weights_shape, dtype=dtype, initializer=initializers.xavier_initializer(), trainable=True, collections=[parent_scope]) biases = variables.model_variable( "biases", shape=[ n_classes, ], dtype=dtype, initializer=init_ops.zeros_initializer, trainable=True, collections=[parent_scope]) if mode == estimator.ModeKeys.TRAIN: # Call the candidate sampling APIs and calculate the loss. sampled_values = nn.learned_unigram_candidate_sampler( true_classes=math_ops.to_int64(targets), num_true=n_labels, num_sampled=n_samples, unique=True, range_max=n_classes) sampled_softmax_loss = nn.sampled_softmax_loss( weights=weights, biases=biases, inputs=net, labels=math_ops.to_int64(targets), num_sampled=n_samples, num_classes=n_classes, num_true=n_labels, sampled_values=sampled_values) loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") train_op = optimizers.optimize_loss( loss=loss, global_step=contrib_framework.get_global_step(), learning_rate=_DEFAULT_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope) return None, loss, train_op elif mode == estimator.ModeKeys.EVAL: logits = nn.bias_add( standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) # Since the targets have multiple labels, setup the target probabilities # as 1.0/n_labels for each of the labels. target_one_hot = array_ops.one_hot(indices=targets, depth=n_classes, on_value=1.0 / n_labels) target_one_hot = math_ops.reduce_sum(input_tensor=target_one_hot, reduction_indices=[1]) loss = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) return predictions, loss, None elif mode == estimator.ModeKeys.INFER: logits = nn.bias_add( standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) return predictions, None, None
def evaluate(self, X, Y, metric, batch_size=None): """ evaluate. Evaluate the forest model with the given data and metric. Arguments: X: `2-D Array` of shape (n_samples, n_features). The input data to evaluate on. Y: `1-D Array` of shape (n_samples). The labels/targets data. metric: `func` returning a `Tensor`. The metric function. batch_size: `int`. If specified, process the data by batch. Return: The metric value. """ with self.graph.as_default(): # Verify data dimension validate_dim(X, max_dim=2, min_dim=2, var_name='X') if not self.regression: validate_dim(Y, max_dim=1, min_dim=1, var_name='Y') else: validate_dim(Y, min_dim=1, var_name='Y') # Get data size num_samples = get_num_sample(X) capacity = None if batch_size is None: batch_size = num_samples capacity = 1 # Build Tree Graph self._build_estimator(X, Y) # Generate Data Tensors. Be aware that every eval with different # data will re-create a data tensor. if self._eval.get_params('X') != hex(id(X)) or \ self._eval.get_params('Y') != hex(id(Y)) or \ self._eval.get_params('batch_size') != batch_size or \ self._eval.get_params('metric') != metric or \ not self._eval.is_ready: X, Y, cr = generate_data_tensor(X, Y, batch_size=batch_size, shuffle=False, num_threads=8, capacity=capacity) X, _, spec = data_ops.ParseDataTensorOrDict(X) Y = data_ops.ParseLabelTensorOrDict(Y) if not self.params.regression: Y = math_ops.to_float(array_ops.one_hot(math_ops.to_int64( array_ops.squeeze(Y)), self.params.n_classes, 1, 0)) Y = tf.reshape(Y, [-1, self.n_classes]) pred, _, _ = self.forest_graph.inference_graph(X) self._eval_op = metric(pred, Y) self._build_eval(X, Y, metric, batch_size) # Start QueueRunners tf.train.start_queue_runners(sess=self.session) if cr: cr.launch_threads(self.session) n_batches = int(math.ceil(float(num_samples) / batch_size)) m = 0. for i in range(n_batches): m += self.session.run(self._eval_op) / n_batches return m
def index_to_string_table_from_tensor(vocabulary_list, default_value="UNK", name=None): """Returns a lookup table that maps a `Tensor` of indices into strings. This operation constructs a lookup table to map int64 indices into string values. The mapping is initialized from a string `mapping` 1-D `Tensor` where each element is a value and the corresponding index within the tensor is the key. Any input which does not have a corresponding index in 'mapping' (an out-of-vocabulary entry) is assigned the `default_value` The underlying table must be initialized by calling `tf.tables_initializer.run()` or `table.init.run()` once. Elements in `mapping` cannot have duplicates, otherwise when executing the table initializer op, it will throw a `FailedPreconditionError`. Sample Usages: ```python vocabulary_list = tf.constant(["emerson", "lake", "palmer"]) indices = tf.constant([1, 5], tf.int64) table = tf.contrib.lookup.index_to_string_table_from_tensor( vocabulary_list, default_value="UNKNOWN") values = table.lookup(indices) ... tf.tables_initializer().run() values.eval() ==> ["lake", "UNKNOWN"] ``` Args: vocabulary_list: A 1-D string `Tensor` that specifies the strings to map from indices. default_value: The value to use for out-of-vocabulary indices. name: A name for this op (optional). Returns: The lookup table to map a string values associated to a given index `int64` `Tensors`. Raises: ValueError: when `vocabulary_list` is not set. """ if vocabulary_list is None: raise ValueError("vocabulary_list must be specified.") with ops.name_scope(name, "index_to_string") as scope: vocabulary_list = ops.convert_to_tensor(vocabulary_list, dtypes.string) num_elements = array_ops.size(vocabulary_list) keys = math_ops.to_int64(math_ops.range(num_elements)) shared_name = "" init = KeyValueTensorInitializer(keys, vocabulary_list, dtypes.int64, dtypes.string, name="table_init") # TODO(yleon): Use a more effienct structure. return HashTable(init, default_value, shared_name=shared_name, name=scope)