Beispiel #1
0
    def one_step(self, new_chain_state, current_reducer_state,
                 previous_kernel_results):  # pylint: disable=unused-argument
        """Advance progress bar by one result.

    All arguments are ignored.

    Args:
      new_chain_state: A (possibly nested) structure of incoming chain state(s)
        with shape and dtype compatible with those used to initialize the
        `TracingState`.
      current_reducer_state: `TracingState`s representing all previously traced
        results.
      previous_kernel_results: A (possibly nested) structure of `Tensor`s
        representing internal calculations made in a related
        `TransitionKernel`.

    Returns:
      new_reducer_state: empty list.
    """
        def update_bar():
            try:
                next(self.bar)
            except StopIteration:
                pass

        tf.py_function(update_bar, (), ())
        return []
    def __init__(self):
        """
        *********************************************
        *****************Constructor*****************
        *********************************************
        """
        # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset
        # train split, loaded as_supervided
        self.data_train = tfds.load('ted_hrlr_translate/pt_to_en',
                                    split='train',
                                    as_supervised=True)
        # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset
        # validate split, loaded as_supervided
        self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en',
                                    split='validation',
                                    as_supervised=True)
        pt, en = self.tokenize_dataset(self.data_train)
        # the Portuguese tokenizer created from the training set
        self.tokenizer_pt = pt
        # the English tokenizer created from the training set
        self.tokenizer_en = en

        self.data_train = self.data_train.map(lambda x, y: tf.py_function(
            self.tf_encode, [x, y], (tf.int64, tf.int64)))
        self.data_valid = self.data_valid.map(lambda x, y: tf.py_function(
            self.tf_encode, [x, y], (tf.int64, tf.int64)))
 def __init__(self, batch_size, max_len):
     self.data_train = tfds.load('ted_hrlr_translate/pt_to_en',
                                 split='train',
                                 as_supervised=True)
     self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en',
                                 split='validation',
                                 as_supervised=True)
     pt, en = self.tokenize_dataset(self.data_train)
     self.tokenizer_pt = pt
     self.tokenizer_en = en
     self.data_train = self.data_train.map(lambda x, y: tf.py_function(
         self.tf_encode, [x, y], (tf.int64, tf.int64)))
     self.data_valid = self.data_valid.map(lambda x, y: tf.py_function(
         self.tf_encode, [x, y], (tf.int64, tf.int64)))
     self.data_train = self.data_train.filter(
         lambda x, y: len(x) <= max_len and len(y) <= max_len)
     self.data_valid = self.data_valid.filter(
         lambda x, y: len(x) <= max_len and len(y) <= max_len)
     self.data_train = self.data_train.cache().shuffle(10000000)
     self.data_train = self.data_train.padded_batch(batch_size,
                                                    ([None], [None]))
     self.data_train = self.data_train.prefetch(
         tf.data.experimental.AUTOTUNE)
     self.data_valid = self.data_valid.padded_batch(batch_size,
                                                    ([None], [None]))
    def shard_train_batch(images, labels):
        """Converts 4D input into 5D input where first dimension denotes cores."""

        boxes = labels[ssd_constants.BOXES]
        classes = labels[ssd_constants.CLASSES]
        num_matched_boxes = labels[ssd_constants.NUM_MATCHED_BOXES]

        local_num_replicas = params['local_num_replicas']

        images = split_a_tensor_or_a_dict(images, local_num_replicas)
        boxes = split_a_tensor_or_a_dict(boxes, local_num_replicas)
        classes = split_a_tensor_or_a_dict(classes, local_num_replicas)

        num_matched_boxes = split_a_tensor_or_a_dict(
            tf.reshape(num_matched_boxes, [-1]), local_num_replicas)

        if params['conv0_space_to_depth']:

            def _space_to_depth_training_fn(images, labels):
                images = fused_transpose_and_space_to_depth(
                    images,
                    block_size=ssd_constants.SPACE_TO_DEPTH_BLOCK_SIZE,
                    transpose_input=transpose_input)
                if transpose_input:
                    labels = tf.transpose(labels, [0, 2, 3, 1])
                images = pad_images_if_uneven(images)
                return images, labels

            images, boxes = _space_to_depth_training_fn(images, boxes)
        elif transpose_input:

            # numpy's 5D tranpose is faster than tf 5D transpose.
            # pylint: disable=protected-access
            def np_transpose_bs_gt_8(x):
                return tf.convert_to_tensor(x._numpy().transpose(
                    [0, 2, 3, 4, 1]))

            def np_transpose_bs_le_8(x):
                return tf.convert_to_tensor(x._numpy().transpose(
                    [0, 2, 3, 1, 4]))

            # pylint: enable=protected-access

            if host_batch_size // params['local_num_replicas'] > 8:
                images = tf.py_function(np_transpose_bs_gt_8, [images],
                                        Tout=images.dtype)
            else:
                images = tf.py_function(np_transpose_bs_le_8, [images],
                                        Tout=images.dtype)
            # Use tf tranpose on 4D tensor.
            boxes = tf.transpose(boxes, [0, 2, 3, 1])

        return (images, {
            ssd_constants.BOXES: boxes,
            ssd_constants.CLASSES: classes,
            ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes
        })
    def parse_and_select_from_tfrecord2(self, raw_proto):
        """Dataset map function that parses a TFRecord example and select fields."""
        # https://stackoverflow.com/questions/41951433/tensorflow-valueerror-shape-must-be-rank-1-but-is-rank-0-for-parseexample-pa
        parsed_features = tf.io.parse_example([raw_proto], self.features)

        self._in1_preprocessors = self.preprocess_list(self.in1_fields,
                                                       self.frame_rate)
        # pylint: disable=g-complex-comprehension
        in_data = tf.concat([
            tf.py_function(
                pp.process, inp=[parsed_features[pp.name]], Tout=tf.float32)
            for pp in self._in1_preprocessors
        ],
                            axis=1)
        in_data = tf.reshape(in_data, (-1, ), name='input1_reshape')

        if self.in2_fields:
            self._in2_preprocessors = self.preprocess_list(
                self.in2_fields, self.frame_rate)
            # pylint: disable=g-complex-comprehension
            in2_data = tf.concat([
                tf.py_function(pp.process,
                               inp=[parsed_features[pp.name]],
                               Tout=tf.float32)
                for pp in self._in2_preprocessors
            ],
                                 axis=1)
            in2_data = tf.reshape(in2_data, (-1, ), name='input2_reshape')
        else:
            in2_data = in_data[0:1]

        self._out_preprocessors = self.preprocess_list([self.out_field],
                                                       self.frame_rate)
        # pylint: disable=g-complex-comprehension
        out_data = tf.concat([
            tf.py_function(
                pp.process, inp=[parsed_features[pp.name]], Tout=tf.float32)
            for pp in self._out_preprocessors
        ],
                             axis=1)
        out_data = tf.reshape(out_data, (-1, ), name='output_reshape')
        if self.attended_direction:
            attended_data = parsed_features[self.attended_direction]
            attended_data = tf.reshape(attended_data, (-1),
                                       name='attended_reshape')
        else:
            attended_data = None
        return in_data, in2_data, out_data, attended_data
Beispiel #6
0
 def tf_encode(x):
     src, tgt = x['inputs'], x['targets']
     result_src, result_tgt = tf.py_function(encode, [src, tgt],
                                             [tf.int64, tf.int64])
     result_src.set_shape([None])
     result_tgt.set_shape([None])
     return {'inputs': result_src, 'targets': result_tgt}
Beispiel #7
0
 def tf_encode(self, pt, en):
     """acts as a tensorflow wrapper for the encode instance method"""
     pt_wrap, en_wrap = tf.py_function(
         self.encode, [pt, en], [tf.int64, tf.int64])
     pt_wrap.set_shape([None])
     en_wrap.set_shape([None])
     return pt_wrap, en_wrap
    def shard_batch(batch):
        """Shards batch  for local devices."""
        images, labels = batch['image'], batch['label']
        local_device_count = jax.local_device_count()
        batch_size = tf.shape(images)[0]
        if batch_size % local_device_count != 0:
            # We don't need all hosts to have the same batch size but (for now)
            # we need all devices on each host to have the same batch size.
            clipped_batch = (batch_size //
                             local_device_count) * local_device_count
            images = images[:clipped_batch]
            labels = labels[:clipped_batch]
        if space_to_depth:
            images = tf.reshape(images, [
                local_device_count, -1, image_size // 2, 2, image_size // 2,
                2 * 3
            ])
            images = tf.transpose(images, [0, 1, 2, 4, 3, 5])
            images = tf.reshape(images, [
                local_device_count, -1, image_size // 2, image_size // 2, 4 * 3
            ])
        else:
            images = tf.reshape(
                images, [local_device_count, -1, image_size, image_size, 3])
        if transpose_images:

            def numpy_transpose(x):
                return tf.convert_to_tensor(x._numpy().transpose(
                    [0, 2, 3, 4, 1]))  # pylint: disable=protected-access

            images = tf.py_function(numpy_transpose, [images], dtype)
            # apparently [0, 2, 3, 1, 4] is better for per-device batch <= 8?
        labels = tf.reshape(labels, [local_device_count, -1])
        batch.update(image=images, label=labels)
        return batch
Beispiel #9
0
 def augment(self, image, label):
     if self.augmentation:
         image = tf.image.random_flip_left_right(image)
         image = tf.py_function(self.random_shift,
                                inp=[image],
                                Tout=self.dtype)
     return image, label
 def tf_encode(self, pt, en):
     """function that wraps the 'encode' methods instance"""
     result_pt, result_en = tf.py_function(self.encode, [pt, en],
                                           [tf.int64, tf.int64])
     result_pt.set_shape([None])
     result_en.set_shape([None])
     return result_pt, result_en
 def tf_encode(self, pt, en):
     """ acts as a tensorflow wrapper for the encode instance method """
     result_pt, result_en = tf.py_function(self.encode, [pt, en],
                                           [tf.int64, tf.int64])
     result_pt.set_shape([None])
     result_en.set_shape([None])
     return result_pt, result_en
Beispiel #12
0
 def tf_encode(self, pt, en):
     """ return tensors """
     result_pt, result_en = tf.py_function(self.encode, [pt, en],
                                           [tf.int64, tf.int64])
     result_pt.set_shape([None])
     result_en.set_shape([None])
     return result_pt, result_en
def load_linear_probe_train_data(model,
                                 layer_idx,
                                 input_shape,
                                 batch_size,
                                 data_path=None):
    """Loads train data for linear probe experiments."""
    buffer_size = 50000
    if 'tiny' in data_path:
        train_dataset = tfds.load(name='cifar10',
                                  split='train[:6%]',
                                  as_supervised=True)
    else:
        train_dataset = tfds.load(name='cifar10',
                                  split='train',
                                  as_supervised=True)

    if 'tiny' in data_path:
        buffer_size //= 16

    train_dataset = train_dataset.shuffle(buffer_size=buffer_size)
    processing_fn = lambda x, y: tf.py_function(
        inp=(x, y),
        func=functools.partial(preprocess_linear_probe_data,
                               model=model,
                               layer_idx=layer_idx,
                               is_training=True,
                               pooling=FLAGS.pooling),
        Tout=[tf.float32, tf.int64])
    train_dataset = train_dataset.map(processing_fn)
    train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
    train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return train_dataset
def pitch_shift(audio, semitones):

  def librosa_pitch_shift(x, semitones):
    return librosa.effects.pitch_shift(
        x.numpy(), SAMPLE_RATE_HZ, n_steps=semitones)

  return tf.py_function(
      func=librosa_pitch_shift, inp=[audio, semitones], Tout=tf.float32)
 def tf_encode(self, pt, en):
     """ Tensorflow wrapper for the encode instance method """
     tf_pt, tf_en = tf.py_function(self.encode,
                                   inp=[pt, en],
                                   Tout=[tf.int64, tf.int64])
     tf_pt.set_shape([None])
     tf_en.set_shape([None])
     return tf_pt, tf_en
Beispiel #16
0
 def tf_encode(self, pt, en):
     """Method"""
     pt_encoded, en_encoded = tf.py_function(func=self.encode,
                                             inp=[pt, en],
                                             Tout=[tf.int64, tf.int64])
     pt_encoded.set_shape([None])
     en_encoded.set_shape([None])
     return pt_encoded, en_encoded
Beispiel #17
0
    def tf_encode(self, pt, en):
        """A wrapper for encode"""
        res_pt, res_en = tf.py_function(self.encode, [pt, en],
                                        [tf.int64, tf.int64])

        res_pt.set_shape([None])
        res_en.set_shape([None])

        return res_pt, res_en
    def _decode_tf(self, ids):
        """Decode in TensorFlow.

    Args:
      ids: a 1d tf.Tensor with dtype tf.int32
    Returns:
      a tf Scalar with dtype tf.string
    """
        return tf.py_function(func=self.decode, inp=[ids], Tout=tf.string)
 def __init__(self):
     self.data_train = tfds.load('ted_hrlr_translate/pt_to_en',
                                 split='train', as_supervised=True)
     self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en',
                                 split='validation', as_supervised=True)
     pt, en = self.tokenize_dataset(self.data_train)
     self.tokenizer_pt = pt
     self.tokenizer_en = en
     self.data_train = self.data_train.map(lambda x, y:
                                           tf.py_function(self.tf_encode,
                                                          [x, y],
                                                          (tf.int64,
                                                           tf.int64)))
     self.data_valid = self.data_valid.map(lambda x, y:
                                           tf.py_function(self.tf_encode,
                                                          [x, y],
                                                          (tf.int64,
                                                           tf.int64)))
Beispiel #20
0
def include_spectrogram(tensor, hparams=None):
    """Include the spectrogram in our tensor dictionary"""
    spec = tf.py_function(
        functools.partial(create_timbre_spectrogram, hparams=hparams),
        [tensor['audio']], tf.float32)

    return dict(spec=spec,
                note_croppings=tensor['note_croppings'],
                instrument_families=tensor['instrument_families'])
Beispiel #21
0
 def tf_encode(self, pt, en):
     """
     tensorflow encode
     """
     pt_tokens, en_tokens = tf.py_function(func=self.encode,
                                           inp=[pt, en],
                                           Tout=[tf.int64, tf.int64])
     pt_tokens.set_shape([None])
     en_tokens.set_shape([None])
     return pt_tokens, en_tokens
Beispiel #22
0
    def tf_encode(self, pt, en):
        '''a tensorflow wrapper for the encode instance method
        Args:
            pt is a np.ndarray containing the Portuguese tokens
            en is a np.ndarray. containing the English tokens

        Returns: a tensorflow wrapper

        '''
        return tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64])
def encode_map_fn(text, label):
    encoded_text, label = tf.py_function(encode,
                                         inp=[text,
                                              tf.cast(label, tf.int64)],
                                         Tout=(tf.int64, tf.int64))

    encoded_text.set_shape([None])
    label.set_shape([])

    return encoded_text, label
    def tf_encode(self, pt, en):
        """
        Make sure to set the shape of the pt and en return tensors
        """
        result_pt, result_en = tf.py_function(self.encode, [pt, en],
                                              [tf.int64, tf.int64])
        result_pt.set_shape([None])
        result_en.set_shape([None])

        return (result_pt, result_en)
Beispiel #25
0
 def tf_encode(self, pt, en):
     """ Tf wrapper for encoding
         makes use of tf.py_function for encode
     """
     pt_tok, en_tok = tf.py_function(func=self.encode,
                                     inp=[pt, en],
                                     Tout=[tf.int64, tf.int64])
     pt_tok.set_shape([None])
     en_tok.set_shape([None])
     return pt_tok, en_tok
Beispiel #26
0
    def tf_encode(self, pt, en):
        '''acts as a tensorflow wrapper for the encode instance method
        Args:
            pt: tf.Tensor containing the Portuguese sentence
            en: tf.Tensor containing the corresponding English sentence
        '''
        result_pt, result_en = tf.py_function(self.encode, [pt, en],
                                              [tf.int64, tf.int64])
        result_pt.set_shape([None])
        result_en.set_shape([None])

        return result_pt, result_en
Beispiel #27
0
    def tf_encode(self, pt, en):
        """wrapper for the encode instance method
        Args:
            Make sure to set the shape of the pt and en return tensors
        """
        rslt_pt, rsl_en = tf.py_function(self.encode, [pt, en],
                                         [tf.int64, tf.int64])

        rslt_pt.set_shape([None])
        rsl_en.set_shape([None])

        return rslt_pt, rsl_en
Beispiel #28
0
    def __init__(self, batch_size, max_len):
        """
        *********************************************
        *****************Constructor*****************
        *********************************************
        @batch_size: is the batch size for training/validation
        @max_len: is the maximum number of tokens allowed per
                  example sentence
        """
        # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset
        # train split, loaded as_supervided
        self.data_train = tfds.load('ted_hrlr_translate/pt_to_en',
                                    split='train',
                                    as_supervised=True)
        # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset
        # validate split, loaded as_supervided
        self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en',
                                    split='validation',
                                    as_supervised=True)
        pt, en = self.tokenize_dataset(self.data_train)
        # the Portuguese tokenizer created from the training set
        self.tokenizer_pt = pt
        # the English tokenizer created from the training set
        self.tokenizer_en = en

        self.data_train = self.data_train.map(lambda x, y: tf.py_function(
            self.tf_encode, [x, y], (tf.int64, tf.int64)))
        self.data_valid = self.data_valid.map(lambda x, y: tf.py_function(
            self.tf_encode, [x, y], (tf.int64, tf.int64)))
        self.data_train = self.data_train.filter(
            lambda x, y: len(x) <= max_len and len(y) <= max_len)
        self.data_valid = self.data_valid.filter(
            lambda x, y: len(x) <= max_len and len(y) <= max_len)
        self.data_train = self.data_train.cache().shuffle(10000000)
        self.data_train = self.data_train.padded_batch(batch_size,
                                                       ([None], [None]))
        self.data_train = self.data_train.prefetch(
            tf.data.experimental.AUTOTUNE)
        self.data_valid = self.data_valid.padded_batch(batch_size,
                                                       ([None], [None]))
Beispiel #29
0
    def tf_encode(self, pt, en):
        """[Method that acts as a tensorflow wrapper for the encode instance
            method]

        Args:
            pt ([type]): [description]
            en ([type]): [description]
        """

        p, e = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64])
        p.set_shape([None]), e.set_shape([None])

        return p, e
  def initialize(self, initial_chain_state, initial_kernel_results=None):  # pylint: disable=unused-argument
    """Initialize progress bars.

    All arguments are ignored.

    Args:
      initial_chain_state: A (possibly nested) structure of `Tensor`s or Python
        `list`s of `Tensor`s representing the current state(s) of the Markov
        chain(s). It is used to infer the structure of future trace results.
      initial_kernel_results: A (possibly nested) structure of `Tensor`s
        representing internal calculations made in a related `TransitionKernel`.
        It is used to infer the structure of future trace results.

    Returns:
      state: empty list.
    """
    num_results = tf.convert_to_tensor(self.num_results)
    def init_bar(num_results):
      self.bar = self.progress_bar_fn(int(num_results))

    tf.py_function(init_bar, (num_results,), ())
    return []