def as_tensor_or_sparse_tensor(value): """Convert value to a SparseTensor or Tensor Args: value (any-tensor-like): A SparseTensor, SparseTensorValue, scipy sparse matrix, pydata sparse array, or an object whose type has a registered Tensor conversion function. Returns: A SparseTensor or Tensor based on value. Examples: >>> import awesomeml as aml >>> import tensorflow as tf >>> import scipy.sparse >>> sess = tf.Session() >>> a = scipy.sparse.random(3, 4, 0.8) >>> b = as_tensor_or_sparse_tensor(a) >>> isinstance(b, tf.SparseTensor) True >>> a2 = aml.utils.as_numpy_array(sess.run(b)) >>> np.testing.assert_array_almost_equal(a.todense(), a2) """ if _utils.is_sparse(value): value = _utils.as_tf_sparse(value) return tf.convert_to_tensor_or_sparse_tensor(value)
def as_sparse_tensor(value): """ """ #assert_is_sparse(value) if isinstance(value, tf.SparseTensor): return value value = _utils.as_tf_sparse(value) return tf.convert_to_tensor_or_sparse_tensor(value)
def cos_simi(class_centers, v): sess = tf.Session() class_centers = tf.convert_to_tensor_or_sparse_tensor(class_centers, dtype=tf.float32) v = tf.convert_to_tensor_or_sparse_tensor(v, dtype=tf.float32) n_centers = tf.shape(class_centers)[0] y_most = tf.Variable( tf.zeros(shape=tf.shape(n_centers), dtype=tf.float32, name='y_most')) v2 = tf.clip_by_value(v, 1e-10, 1.0) for i in range(n_centers.eval(session=sess)): v1 = class_centers[i] v1_dot_v2 = (v1 * v2) v1_nor = tf.norm(v1) v2_nor = tf.norm(v2) y_most = y_most[i].assign( tf.clip_by_value((v1_dot_v2 / (v1_nor * v2_nor)), 1e-10, 1.0)) # print(y_most) return tf.argmax(y_most, 1)
def moveaxis(a, axis_src, axis_dst): """Move an axis of a tensor to new position, similar to np.moveaxis Other axes remain in the original order Args: a (Tensor): the tensor whose axes should be reordered axis_src (int, Seq[int]): Original positions of the axes to move. These must be unique. axis_dst (int, Seq[int]): Destination position for each of the origianl axes. These must also be unique. Examples: >>> a = np.zeros((3, 4, 5)) >>> moveaxis(a, 0, -1).get_shape().as_list() [4, 5, 3] >>> moveaxis(a, -1, 0).get_shape().as_list() [5, 3, 4] >>> moveaxis(a, [0, 1], [-1, -2]).get_shape().as_list() [5, 4, 3] >>> moveaxis(a, [0, 1, 2], [-1, -2, -3]).get_shape().as_list() [5, 4, 3] >>> moveaxis(a, [0, 1], [-1, -2, -3]) Traceback (most recent call last): ... ValueError: ... >>> sa = scipy.sparse.random(3, 4) >>> moveaxis(sa, [0, 1], [-1, -2]).get_shape().as_list() [4, 3] """ a = tf.convert_to_tensor_or_sparse_tensor(a) # a = _tf_utils.as_tensor_or_sparse_tensor(a) ndims = a.get_shape().ndims # src = _utils.validate_axis( # axis_src, ndims, 'axis_src', accept_none=False, # scalar_to_seq=True) # dst = _utils.validate_axis( # axis_dst, ndims, 'axis_dst', accept_none=False, # scalar_to_seq=True) src = _utils.validate_axis2(axis_src, ndims) dst = _utils.validate_axis2(axis_dst, ndims) if len(src) != len(dst): raise ValueError('`axis_src` and `axis_dst` arguments must have the ' 'same number of elements') order = [i for i in range(ndims) if i not in src] for dst_1, src_1 in sorted(zip(dst, src)): order.insert(dst_1, src_1) if isinstance(a, tf.Tensor): res = tf.transpose(a, order) else: res = tf.sparse_transpose(a, order) return res
def test_map_per_key_reductions(self, key, key_vocab, reductions, x, expected_results): if isinstance(key, tf.compat.v1.SparseTensorValue): key = tf.convert_to_tensor_or_sparse_tensor(key) else: key = tf.constant(key) key_vocab = tf.constant(key_vocab) reductions = tuple([tf.constant(t) for t in reductions]) if isinstance(x, tf.compat.v1.SparseTensorValue): x = tf.convert_to_tensor_or_sparse_tensor(x) else: x = tf.constant(x) expected_results = tuple(tf.constant(t) for t in expected_results) results = tf_utils.map_per_key_reductions(reductions, key, key_vocab, x) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.tables_initializer()) output = sess.run(results) for result, expected_result in zip(output, expected_results): self.assertAllEqual(result, expected_result)
def dictionary_from_list(vocab, char_string): # inds are all non-zero char values inds = np.array([[i, j] for i, word in enumerate(vocab) for j in range(len(word))], dtype=np.int32) # parse each character label using char_string as index reference vals = np.array([char_string.index(c) for word in vocab for c in word], dtype=np.int32) dims = np.array([len(vocab), max(map(lambda x: len(x), vocab))], dtype=np.int32) tensor = tf.SparseTensorValue(indices=inds, values=vals, dense_shape=dims) tensor = tf.convert_to_tensor_or_sparse_tensor(tensor) return tensor
def _serialize_feature(values): """Serialize a Tensor or SparseTensor as `Feature` protos. `values` should be a Tensor of rank >=1 or SparseTensor of rank 2. We will refer to the size of the first dimension as batch_size. This function encodes each row of the `Tensor` as a list of values (flattening the other dimensions) and each row of the `SparseTensor` as a list of values, where the indices within each row are ignored and assumed to be 0, 1, .... Args: values: A `Tensor` or `SparseTensor`. Returns: A tensor of shape (batch_size,) and type `tf.string` where each element is a serialized `Feature` proto. Raises: ValueError: If the dtype is of `values` is not `tf.string`, `tf.float32` or `tf.int64`. """ values = tf.convert_to_tensor_or_sparse_tensor(values) if values.dtype == tf.string: values_dict = { 'bytes_list': _encode_proto({'value': values}, 'tensorflow.BytesList') } elif values.dtype == tf.float32: values_dict = { 'float_list': _encode_proto({'value': values}, 'tensorflow.FloatList') } elif values.dtype == tf.int64: values_dict = { 'int64_list': _encode_proto({'value': values}, 'tensorflow.Int64List') } else: raise ValueError('Cannot encode values of dtype {}'.format( values.dtype)) return _encode_proto(values_dict, 'tensorflow.Feature')
def _model_fn(features, labels, mode, params): """Build TF graph based on canned DNN classifier.""" key_column = features.pop(constants.KEY_COLUMN, None) if key_column is None: raise ValueError('Key is missing from features.') spec = _dnn_model_fn(features=features, labels=labels, mode=mode, **params) predictions = spec.predictions if predictions: predictions[ constants.KEY_COLUMN] = tf.convert_to_tensor_or_sparse_tensor( key_column) spec = spec._replace(predictions=predictions) spec = spec._replace( export_outputs={ 'classes': tf.estimator.export.PredictOutput(predictions) }) return spec
def dictionary_from_list(vocab, charset): """Create a label-indexed version of the lexicon from a list of strings. Parameters: vocab : list of strings in the lexicon charset : string containing one instance of each valid character Returns: tensor_dict : A tf.SparseTensor with one row per lexicon entry and columns containing indices of corresponding chracters in charset. """ # inds are locations of valid character values inds = np.array([[i, j] for i, word in enumerate(vocab) for j in range(len(word))], dtype=np.int32) # parse each character label using charset as index reference vals = np.array([charset.index(ch) for word in vocab for ch in word], dtype=np.int32) dims = np.array([len(vocab), max(map(lambda x: len(x), vocab))], dtype=np.int32) tensor = tf.SparseTensorValue(indices=inds, values=vals, dense_shape=dims) tensor = tf.convert_to_tensor_or_sparse_tensor(tensor) return tensor
def __init__(self, inner_model, dummy_batch, loss_fn, metrics): # TODO(b/124477598): the following set_session() should be removed in the # future. This is a workaround for Keras' caching sessions in a way that # isn't compatible with TFF. This is already fixed in TF master, but not as # of v1.13.1. # # We do not use .clear_session() because it blows away the graph stack by # resetting the default graph. tf.keras.backend.set_session(None) if hasattr(dummy_batch, '_asdict'): dummy_batch = dummy_batch._asdict() # Convert input to tensors, possibly from nested lists that need to be # converted to a single top-level tensor. dummy_tensors = collections.OrderedDict([ (k, tf.convert_to_tensor_or_sparse_tensor(v)) for k, v in six.iteritems(dummy_batch) ]) # NOTE: sub-classed `tf.keras.Model`s do not have fully initialized # variables until they are called on input. We forced that here. inner_model(dummy_tensors['x']) def _tensor_spec_with_undefined_batch_dim(tensor): # Remove the batch dimension and leave it unspecified. spec = tf.TensorSpec( shape=[None] + tensor.shape.dims[1:], dtype=tensor.dtype) return spec self._input_spec = nest.map_structure(_tensor_spec_with_undefined_batch_dim, dummy_tensors) self._keras_model = inner_model self._loss_fn = loss_fn self._metrics = metrics if metrics is not None else [] # This is defined here so that it closes over the `loss_fn`. class _WeightedMeanLossMetric(keras_metrics.Metric): """A `tf.keras.metrics.Metric` wrapper for the loss function.""" def __init__(self, name='loss', dtype=tf.float32): super(_WeightedMeanLossMetric, self).__init__(name, dtype) self._total_loss = self.add_weight('total_loss', initializer='zeros') self._total_weight = self.add_weight( 'total_weight', initializer='zeros') self._loss_fn = loss_fn def update_state(self, y_true, y_pred, sample_weight=None): y_true = tf.cast(y_true, self._dtype) y_pred = tf.cast(y_pred, self._dtype) # _loss_fn is expected to return the scalar mean loss, so we multiply by # the batch_size to get back to total loss. batch_size = tf.cast(tf.shape(y_pred)[0], self._dtype) batch_total_loss = self._loss_fn(y_true, y_pred) * batch_size op = self._total_loss.assign_add(batch_total_loss) with tf.control_dependencies([op]): return self._total_weight.assign_add(batch_size) def result(self): return tf.div_no_nan(self._total_loss, self._total_weight) self._loss_metric = _WeightedMeanLossMetric() metric_variable_type_dict = nest.map_structure(tf.TensorSpec.from_tensor, self.report_local_outputs()) federated_local_outputs_type = tff.FederatedType( metric_variable_type_dict, tff.CLIENTS, all_equal=False) def federated_output(local_outputs): results = collections.OrderedDict() for metric, variables in zip(self.get_metrics(), local_outputs): results[metric.name] = federated_aggregate_keras_metric( type(metric), metric.get_config(), variables) return results self._federated_output_computation = tff.federated_computation( federated_output, federated_local_outputs_type) # Keras creates variables that are not added to any collection, making it # impossible for TFF to extract them and create the appropriate initializer # before call a tff.Computation. Here we store them in a TFF specific # collection so that they can be retrieved later. # TODO(b/122081673): this likely goes away in TF2.0 for variable in itertools.chain(self.trainable_variables, self.non_trainable_variables, self.local_variables): tf.add_to_collection(graph_keys.GraphKeys.VARS_FOR_TFF_TO_INITIALIZE, variable)
def _tensor_spec_with_undefined_batch_dim(tensor): tensor = tf.convert_to_tensor_or_sparse_tensor(tensor) # Remove the batch dimension and leave it unspecified. spec = tf.TensorSpec(shape=[None] + tensor.shape.dims[1:], dtype=tensor.dtype) return spec
sess = tf.InteractiveSession() vocab_size = 2000 k = 1000 s = 4 embed_size = 128 nce_samples = 1000 # ************************************* # RI VOCAB GENERATION # ************************************* generator = Generator(k, s) ris = [generator.generate() for _ in range(vocab_size)] ri_tensor = ris_to_sp_tensor_value(ris, k) ri_tensor = tf.convert_to_tensor_or_sparse_tensor(ri_tensor) # ************************************* # DUMMY INPUT DATA # ************************************* # batch of word sequence indices ctx_size = 3 input_data = np.array([[0, 1, 2], [0, 2, 2], [1, 3, 5], [3, 0, 2]]) input_labels = tf.constant(np.array([[3], [1], [10], [25]], dtype=np.int64)) input_labels = tx.TensorLayer(input_labels, n_units=1) input_layer = tx.TensorLayer(input_data, n_units=3, dtype=tf.int64) ri_layer = tx.TensorLayer(ri_tensor, k) ri_inputs = tx.gather_sparse(ri_layer.tensor, input_layer.tensor)
def __init__(self, inner_model, dummy_batch, loss_func, metrics): if hasattr(dummy_batch, '_asdict'): dummy_batch = dummy_batch._asdict() # Convert input to tensors, possibly from nested lists that need to be # converted to a single top-level tensor. dummy_tensors = collections.OrderedDict([ (k, tf.convert_to_tensor_or_sparse_tensor(v)) for k, v in six.iteritems(dummy_batch) ]) # NOTE: sub-classed `tf.keras.Model`s do not have fully initialized # variables until they are called on input. We forced that here. inner_model(dummy_tensors['x']) def _tensor_spec_with_undefined_batch_dim(tensor): # Remove the batch dimension and leave it unspecified. spec = tf.TensorSpec(shape=[None] + tensor.shape.dims[1:], dtype=tensor.dtype) return spec self._input_spec = nest.map_structure( _tensor_spec_with_undefined_batch_dim, dummy_tensors) self._keras_model = inner_model self._loss_fn = loss_func self._metrics = metrics if metrics is not None else [] # This is defined here so that it closes over the `loss_func`. class _WeightedMeanLossMetric(keras_metrics.Metric): """A `tf.keras.metrics.Metric` wrapper for the loss function.""" def __init__(self, name='loss', dtype=tf.float32): super(_WeightedMeanLossMetric, self).__init__(name, dtype) self._total_loss = self.add_weight('total_loss', initializer='zeros') self._total_weight = self.add_weight('total_weight', initializer='zeros') self._loss_fn = loss_func def update_state(self, y_true, y_pred, sample_weight=None): y_true = tf.cast(y_true, self._dtype) y_pred = tf.cast(y_pred, self._dtype) # _loss_fn is expected to return the scalar mean loss, so we multiply by # the batch_size to get back to total loss. batch_size = tf.cast(tf.shape(y_pred)[0], self._dtype) batch_total_loss = self._loss_fn(y_true, y_pred) * batch_size op = self._total_loss.assign_add(batch_total_loss) with tf.control_dependencies([op]): return self._total_weight.assign_add(batch_size) def result(self): return tf.div_no_nan(self._total_loss, self._total_weight) self._loss_metric = _WeightedMeanLossMetric() # Keras creates variables that are not added to any collection, making it # impossible for TFF to extract them and create the appropriate initializer # before call a tff.Computation. Here we store them in a TFF specific # collection so that they can be retrieved later. # TODO(b/122081673): this likely goes away in TF2.0 for variable in itertools.chain(self.trainable_variables, self.non_trainable_variables, self.local_variables): tf.add_to_collection( graph_keys.GraphKeys.VARS_FOR_TFF_TO_INITIALIZE, variable)
tf.clip_by_average_norm() tf.clip_by_norm() tf.clip_by_value() tf.colocate_with() tf.complex() tf.cond tf.confusion_matrix() tf.conj() tf.cross() tf.cumprod() tf.cumsum() tf.constant() tf.convert_to_tensor() tf.convert_to_tensor_or_indexed_slices() tf.convert_to_tensor_or_sparse_tensor() tf.decode_base64() tf.decode_csv() tf.decode_json_example() tf.decode_raw() tf.device() tf.diag() tf.diag_part() tf.div() tf.divide() tf.batch_to_space_nd() tf.space_to_batch_nd() tf.batch_to_space() tf.space_to_batch() tf.depth_to_space()
def normalize_adj(A, method='sym', *, axis1=-2, axis2=-1, eps=0.0, assume_symmetric_input=False): """Normalize adjacency matrix defined by axis0 and axis1 in a tensor or sparse tensor Args: A (any-tensor): Input adjacency matrix or matrices. method (str): Normalization method, could be: * 'sym', 'symmetric': Symmetric normalization, i.e. A' = D^-0.5 * A * D^-0.5 * 'row': Row normalizatiion, i.e. A' = D^-1 * A * 'col', 'column': Column normalization, i.e. A' = A * D^-1 axis1 (int): Specify the first axis of the adjacency matrices. Note that the input A could be a batch of matrices. axis2 (int): Specify the second axis of the adjacency matrices. eps (float): Regularization small value to avoid dividing by zero. Default to 0.0. assume_symmetric_input (bool): Whether assume the input adjacency matrices are symmetric or not. It affects results of symmetric normalization only. When it is True, it will reuse the row sum as col sum, which will avoid the computation of column sum. Will need to be set as False when the inputs is not symmetric, otherwise the result will be incorrect. Default to True. Returns: any-tensor: Normalized adjacency matrix """ A = tf.convert_to_tensor_or_sparse_tensor(A) ndims = A.get_shape().ndims print("USING NORMALIZE ADJ") if not A.dtype.is_floating: A = tf.cast(A, tf.float32) if method in ['row', 'col', 'column']: axis_to_sum = axis2 if method == 'row' else axis1 norm = tf.reduce_sum(A, axis_to_sum, keepdims=True) norm = 1.0 / (norm + eps) res = A * norm elif method in ['sym', 'symmetric']: norm1 = tf.reduce_sum(A, axis=axis2, keepdims=True) norm1 = 1.0 / (tf.sqrt(norm1) + eps) if assume_symmetric_input: norm2 = _tf_ops.swapaxes(norm1, axis1, axis2) else: norm2 = tf.reduce_sum(A, axis=axis1, keepdims=True) norm2 = 1.0 / (tf.sqrt(norm2) + eps) res = A * norm1 * norm2 else: assert method in ['dsm', 'ds', 'doubly_stochastic'] # step 1: row normalize norm = tf.reduce_sum(A, axis=axis2, keepdims=True) norm = 1.0 / (norm + eps) P = A * norm # step 2: P @ P^T / column_sum P = _tf_ops.swapaxes(P, axis2, -1) P = _tf_ops.swapaxes(P, axis1, -2) norm = tf.reduce_sum(P, axis=-2, keepdims=True) norm = 1.0 / (norm + eps) PT = _tf_ops.swapaxes(P, -1, -2) P = P * norm T = tf.matmul(P, PT) T = _tf_ops.swapaxes(T, axis1, -2) T = _tf_ops.swapaxes(T, axis2, -1) res = T return res